├── LLaMA-Factory
    ├── .dockerignore
    ├── .env.local
    ├── .gitattributes
    ├── .github
    │   ├── CODE_OF_CONDUCT.md
    │   ├── CONTRIBUTING.md
    │   ├── ISSUE_TEMPLATE
    │   │   └── bug-report.yml
    │   ├── PULL_REQUEST_TEMPLATE.md
    │   ├── SECURITY.md
    │   └── workflows
    │   │   ├── label_issue.yml
    │   │   ├── publish.yml
    │   │   └── tests.yml
    ├── .gitignore
    ├── .pre-commit-config.yaml
    ├── CITATION.cff
    ├── LICENSE
    ├── MANIFEST.in
    ├── Makefile
    ├── README.md
    ├── README_zh.md
    ├── assets
    │   ├── benchmark.svg
    │   ├── logo.png
    │   ├── wechat.jpg
    │   └── wechat_npu.jpg
    ├── data
    │   ├── README.md
    │   ├── README_zh.md
    │   ├── alpaca_en_demo.json
    │   ├── alpaca_zh_demo.json
    │   ├── belle_multiturn
    │   │   └── belle_multiturn.py
    │   ├── c4_demo.json
    │   ├── dataset_info.json
    │   ├── dpo_en_demo.json
    │   ├── dpo_zh_demo.json
    │   ├── glaive_toolcall_en_demo.json
    │   ├── glaive_toolcall_zh_demo.json
    │   ├── hh_rlhf_en
    │   │   └── hh_rlhf_en.py
    │   ├── identity.json
    │   ├── kto_en_demo.json
    │   ├── mllm_demo.json
    │   ├── mllm_demo_data
    │   │   ├── 1.jpg
    │   │   ├── 1.mp4
    │   │   ├── 2.avi
    │   │   ├── 2.jpg
    │   │   ├── 3.jpg
    │   │   └── 3.mp4
    │   ├── mllm_video_demo.json
    │   ├── train_sft_Base.json
    │   ├── ultra_chat
    │   │   └── ultra_chat.py
    │   └── wiki_demo.txt
    ├── docker
    │   ├── docker-cuda
    │   │   ├── Dockerfile
    │   │   └── docker-compose.yml
    │   ├── docker-npu
    │   │   ├── Dockerfile
    │   │   └── docker-compose.yml
    │   └── docker-rocm
    │   │   ├── Dockerfile
    │   │   └── docker-compose.yml
    ├── evaluation
    │   ├── ceval
    │   │   ├── ceval.py
    │   │   ├── ceval.zip
    │   │   └── mapping.json
    │   ├── cmmlu
    │   │   ├── cmmlu.py
    │   │   ├── cmmlu.zip
    │   │   └── mapping.json
    │   └── mmlu
    │   │   ├── mapping.json
    │   │   ├── mmlu.py
    │   │   └── mmlu.zip
    ├── examples
    │   ├── README.md
    │   ├── README_zh.md
    │   ├── accelerate
    │   │   └── fsdp_config.yaml
    │   ├── deepspeed
    │   │   ├── ds_z0_config.json
    │   │   ├── ds_z2_config.json
    │   │   ├── ds_z2_offload_config.json
    │   │   ├── ds_z3_config.json
    │   │   └── ds_z3_offload_config.json
    │   ├── extras
    │   │   ├── adam_mini
    │   │   │   └── qwen2_full_sft.yaml
    │   │   ├── badam
    │   │   │   └── llama3_full_sft.yaml
    │   │   ├── fsdp_qlora
    │   │   │   ├── llama3_lora_sft.yaml
    │   │   │   └── train.sh
    │   │   ├── galore
    │   │   │   └── llama3_full_sft.yaml
    │   │   ├── llama_pro
    │   │   │   ├── expand.sh
    │   │   │   └── llama3_freeze_sft.yaml
    │   │   ├── loraplus
    │   │   │   └── llama3_lora_sft.yaml
    │   │   ├── mod
    │   │   │   └── llama3_full_sft.yaml
    │   │   └── pissa
    │   │   │   ├── init.sh
    │   │   │   └── llama3_lora_sft.yaml
    │   ├── inference
    │   │   ├── llama3.yaml
    │   │   ├── llama3_lora_sft.yaml
    │   │   ├── llama3_vllm.yaml
    │   │   ├── llava1_5.yaml
    │   │   └── qwen2_vl.yaml
    │   ├── merge_lora
    │   │   ├── llama3_gptq.yaml
    │   │   ├── llama3_lora_sft.yaml
    │   │   └── qwen2vl_lora_sft.yaml
    │   ├── train_full
    │   │   ├── llama3_full_predict.yaml
    │   │   ├── llama3_full_sft_ds3.yaml
    │   │   └── qwen2vl_full_sft.yaml
    │   ├── train_lora
    │   │   ├── llama3_lora_dpo.yaml
    │   │   ├── llama3_lora_eval.yaml
    │   │   ├── llama3_lora_kto.yaml
    │   │   ├── llama3_lora_ppo.yaml
    │   │   ├── llama3_lora_predict.yaml
    │   │   ├── llama3_lora_pretrain.yaml
    │   │   ├── llama3_lora_reward.yaml
    │   │   ├── llama3_lora_sft.yaml
    │   │   ├── llama3_lora_sft_ds0.yaml
    │   │   ├── llama3_lora_sft_ds3.yaml
    │   │   ├── llama3_preprocess.yaml
    │   │   ├── llava1_5_lora_sft.yaml
    │   │   ├── qwen2vl_lora_dpo.yaml
    │   │   └── qwen2vl_lora_sft.yaml
    │   └── train_qlora
    │   │   ├── llama3_lora_sft_aqlm.yaml
    │   │   ├── llama3_lora_sft_awq.yaml
    │   │   ├── llama3_lora_sft_gptq.yaml
    │   │   └── llama3_lora_sft_otfq.yaml
    ├── pyproject.toml
    ├── requirements.txt
    ├── scripts
    │   ├── cal_flops.py
    │   ├── cal_lr.py
    │   ├── cal_mfu.py
    │   ├── cal_ppl.py
    │   ├── length_cdf.py
    │   ├── llama_pro.py
    │   ├── llamafy_baichuan2.py
    │   ├── llamafy_qwen.py
    │   ├── loftq_init.py
    │   ├── pissa_init.py
    │   ├── test_image.py
    │   └── test_toolcall.py
    ├── setup.py
    ├── src
    │   ├── api.py
    │   ├── llamafactory
    │   │   ├── __init__.py
    │   │   ├── api
    │   │   │   ├── __init__.py
    │   │   │   ├── app.py
    │   │   │   ├── chat.py
    │   │   │   ├── common.py
    │   │   │   └── protocol.py
    │   │   ├── chat
    │   │   │   ├── __init__.py
    │   │   │   ├── base_engine.py
    │   │   │   ├── chat_model.py
    │   │   │   ├── hf_engine.py
    │   │   │   └── vllm_engine.py
    │   │   ├── cli.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── aligner.py
    │   │   │   ├── collator.py
    │   │   │   ├── data_utils.py
    │   │   │   ├── formatter.py
    │   │   │   ├── loader.py
    │   │   │   ├── mm_plugin.py
    │   │   │   ├── parser.py
    │   │   │   ├── preprocess.py
    │   │   │   ├── processors
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── feedback.py
    │   │   │   │   ├── pairwise.py
    │   │   │   │   ├── pretrain.py
    │   │   │   │   ├── processor_utils.py
    │   │   │   │   ├── supervised.py
    │   │   │   │   └── unsupervised.py
    │   │   │   ├── template.py
    │   │   │   └── tool_utils.py
    │   │   ├── eval
    │   │   │   ├── __init__.py
    │   │   │   ├── evaluator.py
    │   │   │   └── template.py
    │   │   ├── extras
    │   │   │   ├── __init__.py
    │   │   │   ├── constants.py
    │   │   │   ├── env.py
    │   │   │   ├── logging.py
    │   │   │   ├── misc.py
    │   │   │   ├── packages.py
    │   │   │   └── ploting.py
    │   │   ├── hparams
    │   │   │   ├── __init__.py
    │   │   │   ├── data_args.py
    │   │   │   ├── evaluation_args.py
    │   │   │   ├── finetuning_args.py
    │   │   │   ├── generating_args.py
    │   │   │   ├── model_args.py
    │   │   │   └── parser.py
    │   │   ├── launcher.py
    │   │   ├── model
    │   │   │   ├── __init__.py
    │   │   │   ├── adapter.py
    │   │   │   ├── loader.py
    │   │   │   ├── model_utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── checkpointing.py
    │   │   │   │   ├── embedding.py
    │   │   │   │   ├── liger_kernel.py
    │   │   │   │   ├── longlora.py
    │   │   │   │   ├── misc.py
    │   │   │   │   ├── mod.py
    │   │   │   │   ├── moe.py
    │   │   │   │   ├── packing.py
    │   │   │   │   ├── quantization.py
    │   │   │   │   ├── rope.py
    │   │   │   │   ├── unsloth.py
    │   │   │   │   ├── valuehead.py
    │   │   │   │   └── visual.py
    │   │   │   └── patcher.py
    │   │   ├── train
    │   │   │   ├── __init__.py
    │   │   │   ├── callbacks.py
    │   │   │   ├── dpo
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── workflow.py
    │   │   │   ├── kto
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── workflow.py
    │   │   │   ├── ppo
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ppo_utils.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── workflow.py
    │   │   │   ├── pt
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── workflow.py
    │   │   │   ├── rm
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── metric.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── workflow.py
    │   │   │   ├── sft
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── metric.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── workflow.py
    │   │   │   ├── test_utils.py
    │   │   │   ├── trainer_utils.py
    │   │   │   └── tuner.py
    │   │   └── webui
    │   │   │   ├── __init__.py
    │   │   │   ├── chatter.py
    │   │   │   ├── common.py
    │   │   │   ├── components
    │   │   │       ├── __init__.py
    │   │   │       ├── chatbot.py
    │   │   │       ├── data.py
    │   │   │       ├── eval.py
    │   │   │       ├── export.py
    │   │   │       ├── infer.py
    │   │   │       ├── top.py
    │   │   │       └── train.py
    │   │   │   ├── css.py
    │   │   │   ├── engine.py
    │   │   │   ├── interface.py
    │   │   │   ├── locales.py
    │   │   │   ├── manager.py
    │   │   │   ├── runner.py
    │   │   │   └── utils.py
    │   ├── train.py
    │   └── webui.py
    └── tests
    │   ├── data
    │       ├── processors
    │       │   ├── test_feedback.py
    │       │   ├── test_pairwise.py
    │       │   ├── test_processor_utils.py
    │       │   ├── test_supervised.py
    │       │   └── test_unsupervised.py
    │       ├── test_collator.py
    │       ├── test_formatter.py
    │       ├── test_mm_plugin.py
    │       └── test_template.py
    │   ├── e2e
    │       ├── test_chat.py
    │       └── test_train.py
    │   ├── eval
    │       └── test_eval_template.py
    │   └── model
    │       ├── model_utils
    │           ├── test_attention.py
    │           ├── test_checkpointing.py
    │           └── test_packing.py
    │       ├── test_base.py
    │       ├── test_freeze.py
    │       ├── test_full.py
    │       ├── test_lora.py
    │       └── test_pissa.py
├── License.txt
├── README.md
├── assets
    ├── AutoMathCritique.jpg
    ├── case_study_1.pdf-1-10.jpg
    └── train-test-flowchart.png
└── selfimprove
    ├── critic.py
    ├── data_filter.py
    ├── evaluate-all.sh
    ├── inference-all.sh
    ├── inference.py
    ├── meta-llama
        ├── Meta-Llama-3-8B-all-sftdata_0_new.json
        ├── Meta-Llama-3-8B-all-sftdata_0_origin.json
        ├── Meta-Llama-3-8B-gsm8k-sftdata_0_new.json
        ├── Meta-Llama-3-8B-gsm8k-sftdata_0_origin.json
        ├── Meta-Llama-3-8B-math-sftdata_0_new.json
        └── Meta-Llama-3-8B-math-sftdata_0_origin.json
    ├── process.py
    └── test_filter.py


/LLaMA-Factory/.dockerignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | .git
 3 | .github
 4 | .venv
 5 | cache
 6 | data
 7 | docker
 8 | saves
 9 | hf_cache
10 | ms_cache
11 | om_cache
12 | output
13 | .dockerignore
14 | .gitattributes
15 | .gitignore
16 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.env.local:
--------------------------------------------------------------------------------
 1 | # Note: actually we do not support .env, just for reference
 2 | # api
 3 | API_HOST=
 4 | API_PORT=
 5 | API_KEY=
 6 | API_MODEL_NAME=
 7 | FASTAPI_ROOT_PATH=
 8 | MAX_CONCURRENT=
 9 | # general
10 | DISABLE_VERSION_CHECK=
11 | FORCE_CHECK_IMPORTS=
12 | LLAMAFACTORY_VERBOSITY=
13 | USE_MODELSCOPE_HUB=
14 | USE_OPENMIND_HUB=
15 | RECORD_VRAM=
16 | # torchrun
17 | FORCE_TORCHRUN=
18 | MASTER_ADDR=
19 | MASTER_PORT=
20 | NNODES=
21 | NODE_RANK=
22 | NPROC_PER_NODE=
23 | # wandb
24 | WANDB_DISABLED=
25 | WANDB_PROJECT=
26 | WANDB_API_KEY=
27 | # gradio ui
28 | GRADIO_SHARE=
29 | GRADIO_SERVER_NAME=
30 | GRADIO_SERVER_PORT=
31 | GRADIO_ROOT_PATH=
32 | GRADIO_IPV6=
33 | # setup
34 | ENABLE_SHORT_CONSOLE=1
35 | # reserved (do not use)
36 | LLAMABOARD_ENABLED=
37 | LLAMABOARD_WORKDIR=
38 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to LLaMA Factory
 2 | 
 3 | Everyone is welcome to contribute, and we value everybody's contribution. Code contributions are not the only way to help the community. Answering questions, helping others, and improving the documentation are also immensely valuable.
 4 | 
 5 | It also helps us if you spread the word! Reference the library in blog posts about the awesome projects it made possible, shout out on Twitter every time it has helped you, or simply ⭐️ the repository to say thank you.
 6 | 
 7 | However you choose to contribute, please be mindful and respect our [code of conduct](CODE_OF_CONDUCT.md).
 8 | 
 9 | **This guide was heavily inspired by [transformers guide to contributing](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md).**
10 | 
11 | ## Ways to contribute
12 | 
13 | There are several ways you can contribute to LLaMA Factory:
14 | 
15 | * Fix outstanding issues with the existing code.
16 | * Submit issues related to bugs or desired new features.
17 | * Contribute to the examples or to the documentation.
18 | 
19 | ### Style guide
20 | 
21 | LLaMA Factory follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html), check it for details.
22 | 
23 | ### Create a Pull Request
24 | 
25 | 1. Fork the [repository](https://github.com/hiyouga/LLaMA-Factory) by clicking on the [Fork](https://github.com/hiyouga/LLaMA-Factory/fork) button on the repository's page. This creates a copy of the code under your GitHub user account.
26 | 
27 | 2. Clone your fork to your local disk, and add the base repository as a remote:
28 | 
29 | ```bash
30 | git clone git@github.com:[username]/LLaMA-Factory.git
31 | cd LLaMA-Factory
32 | git remote add upstream https://github.com/hiyouga/LLaMA-Factory.git
33 | ```
34 | 
35 | 3. Create a new branch to hold your development changes:
36 | 
37 | ```bash
38 | git checkout -b dev_your_branch
39 | ```
40 | 
41 | 4. Set up a development environment by running the following command in a virtual environment:
42 | 
43 | ```bash
44 | pip install -e ".[dev]"
45 | ```
46 | 
47 | If LLaMA Factory was already installed in the virtual environment, remove it with `pip uninstall llamafactory` before reinstalling it in editable mode with the -e flag.
48 | 
49 | 5. Check code before commit:
50 | 
51 | ```bash
52 | make commit
53 | make style && make quality
54 | make test
55 | ```
56 | 
57 | 6. Submit changes:
58 | 
59 | ```bash
60 | git add .
61 | git commit -m "commit message"
62 | git fetch upstream
63 | git rebase upstream/main
64 | git push -u origin dev_your_branch
65 | ```
66 | 
67 | 7. Create a merge request from your branch `dev_your_branch` at [origin repo](https://github.com/hiyouga/LLaMA-Factory).
68 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug / Help"
 2 | description: Create a report to help us improve the LLaMA Factory
 3 | body:
 4 |   - type: markdown
 5 |     attributes:
 6 |       value: |
 7 |         Issues included in **FAQs** or those with **insufficient** information may be closed without a response.
 8 |         包含在**常见问题**内或提供信息**不完整**的 issues 可能不会被回复。
 9 | 
10 |   - type: checkboxes
11 |     id: reminder
12 |     attributes:
13 |       label: Reminder
14 |       description: |
15 |         Please ensure you have read the README carefully and searched the existing issues (including FAQs).
16 |         请确保您已经认真阅读了 README 并且搜索过现有的 issues（包括常见问题）。
17 | 
18 |       options:
19 |         - label: I have read the README and searched the existing issues.
20 |           required: true
21 | 
22 |   - type: textarea
23 |     id: system-info
24 |     validations:
25 |       required: true
26 |     attributes:
27 |       label: System Info
28 |       description: |
29 |         Please share your system info with us. You can run the command **llamafactory-cli env** and copy-paste its output below.
30 |         请提供您的系统信息。您可以在命令行运行 **llamafactory-cli env** 并将其输出复制到该文本框中。
31 | 
32 |       placeholder: llamafactory version, platform, python version, ...
33 | 
34 |   - type: textarea
35 |     id: reproduction
36 |     validations:
37 |       required: true
38 |     attributes:
39 |       label: Reproduction
40 |       description: |
41 |         Please provide code snippets, error messages and stack traces that reproduces the problem.
42 |         请提供运行参数，错误信息以及异常堆栈以便于我们复现该问题。
43 |         Remember to use Markdown tags to correctly format your code.
44 |         请合理使用 Markdown 标签来格式化您的文本。
45 | 
46 |       placeholder: |
47 |         ```bash
48 |         llamafactory-cli train ...
49 |         ```
50 | 
51 |   - type: textarea
52 |     id: expected-behavior
53 |     validations:
54 |       required: false
55 |     attributes:
56 |       label: Expected behavior
57 |       description: |
58 |         Please provide a clear and concise description of what you would expect to happen.
59 |         请提供您原本的目的，即这段代码的期望行为。
60 | 
61 |   - type: textarea
62 |     id: others
63 |     validations:
64 |       required: false
65 |     attributes:
66 |       label: Others
67 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # What does this PR do?
2 | 
3 | Fixes # (issue)
4 | 
5 | ## Before submitting
6 | 
7 | - [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
8 | - [ ] Did you write any new necessary tests?
9 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Reporting Security Issues
2 | 
3 | To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
4 | 
5 | We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
6 | 
7 | Report security bugs in third-party modules to the person or team maintaining the module.
8 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/workflows/label_issue.yml:
--------------------------------------------------------------------------------
 1 | name: label_issue
 2 | 
 3 | on:
 4 |   issues:
 5 |     types:
 6 |       - opened
 7 | 
 8 | jobs:
 9 |   label_issue:
10 |     runs-on: ubuntu-latest
11 | 
12 |     permissions:
13 |       issues: write
14 | 
15 |     steps:
16 |       - env:
17 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |           ISSUE_URL: ${{ github.event.issue.html_url }}
19 |           ISSUE_TITLE: ${{ github.event.issue.title }}
20 |         run: |
21 |           LABEL=pending
22 |           NPU_KEYWORDS=(npu huawei ascend 华为 昇腾)
23 |           ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]')
24 |           for KEYWORD in ${NPU_KEYWORDS[@]}; do
25 |             if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then
26 |               LABEL=pending,npu
27 |               break
28 |             fi
29 |           done
30 |           gh issue edit $ISSUE_URL --add-label $LABEL
31 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: publish
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - published
 7 | 
 8 | jobs:
 9 |   publish:
10 |     name: Upload release to PyPI
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     environment:
15 |       name: release
16 |       url: https://pypi.org/p/llamafactory
17 | 
18 |     permissions:
19 |       id-token: write
20 | 
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v4
24 | 
25 |       - name: Set up Python
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: "3.8"
29 | 
30 |       - name: Install dependencies
31 |         run: |
32 |           python -m pip install --upgrade pip
33 |           python -m pip install build
34 | 
35 |       - name: Build package
36 |         run: |
37 |           python -m build
38 | 
39 |       - name: Publish package
40 |         uses: pypa/gh-action-pypi-publish@release/v1
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "main"
 7 |     paths:
 8 |       - "**.py"
 9 |       - "requirements.txt"
10 |       - ".github/workflows/*.yml"
11 |   pull_request:
12 |     branches:
13 |       - "main"
14 |     paths:
15 |       - "**.py"
16 |       - "requirements.txt"
17 |       - ".github/workflows/*.yml"
18 | 
19 | jobs:
20 |   tests:
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         python-version:
25 |           - "3.8"  # TODO: remove py38 in next transformers release
26 |           - "3.9"
27 |           - "3.10"
28 |           - "3.11"
29 |         os:
30 |           - "ubuntu-latest"
31 |           - "windows-latest"
32 |           - "macos-13"
33 | 
34 |     runs-on: ${{ matrix.os }}
35 | 
36 |     environment:
37 |       name: tests
38 | 
39 |     env:
40 |       HF_TOKEN: ${{ secrets.HF_TOKEN }}
41 |       OS_NAME: ${{ matrix.os }}
42 | 
43 |     steps:
44 |       - name: Checkout
45 |         uses: actions/checkout@v4
46 | 
47 |       - name: Set up Python
48 |         uses: actions/setup-python@v5
49 |         with:
50 |           python-version: ${{ matrix.python-version }}
51 |           cache: "pip"
52 |           cache-dependency-path: "setup.py"
53 | 
54 |       - name: Install dependencies
55 |         run: |
56 |           python -m pip install --upgrade pip
57 |           python -m pip install ".[torch,dev]"
58 | 
59 |       - name: Check quality
60 |         run: |
61 |           make style && make quality
62 | 
63 |       - name: Test with pytest
64 |         run: |
65 |           make test
66 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v5.0.0
 4 |     hooks:
 5 |     -   id: check-ast
 6 |     -   id: check-added-large-files
 7 |         args: ['--maxkb=25000']
 8 |     -   id: check-merge-conflict
 9 |     -   id: check-yaml
10 |     -   id: debug-statements
11 |     -   id: end-of-file-fixer
12 |     -   id: trailing-whitespace
13 |         args: [--markdown-linebreak-ext=md]
14 |     -   id: no-commit-to-branch
15 |         args: ['--branch', 'main']
16 | 
17 | -   repo: https://github.com/asottile/pyupgrade
18 |     rev: v3.17.0
19 |     hooks:
20 |     -   id: pyupgrade
21 |         args: [--py38-plus]
22 | 
23 | -   repo: https://github.com/astral-sh/ruff-pre-commit
24 |     rev: v0.6.9
25 |     hooks:
26 |     -   id: ruff
27 |         args: [--fix]
28 |     -   id: ruff-format
29 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | date-released: 2024-03
 3 | message: "If you use this software, please cite it as below."
 4 | authors:
 5 | - family-names: "Zheng"
 6 |   given-names: "Yaowei"
 7 | - family-names: "Zhang"
 8 |   given-names: "Richong"
 9 | - family-names: "Zhang"
10 |   given-names: "Junhao"
11 | - family-names: "Ye"
12 |   given-names: "Yanhan"
13 | - family-names: "Luo"
14 |   given-names: "Zheyan"
15 | - family-names: "Feng"
16 |   given-names: "Zhangchi"
17 | - family-names: "Ma"
18 |   given-names: "Yongqiang"
19 | title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
20 | url: "https://arxiv.org/abs/2403.13372"
21 | preferred-citation:
22 |   type: conference-paper
23 |   conference:
24 |     name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)"
25 |   authors:
26 |     - family-names: "Zheng"
27 |       given-names: "Yaowei"
28 |     - family-names: "Zhang"
29 |       given-names: "Richong"
30 |     - family-names: "Zhang"
31 |       given-names: "Junhao"
32 |     - family-names: "Ye"
33 |       given-names: "Yanhan"
34 |     - family-names: "Luo"
35 |       given-names: "Zheyan"
36 |     - family-names: "Feng"
37 |       given-names: "Zhangchi"
38 |     - family-names: "Ma"
39 |       given-names: "Yongqiang"
40 |   title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
41 |   url: "https://arxiv.org/abs/2403.13372"
42 |   year: 2024
43 |   publisher: "Association for Computational Linguistics"
44 |   address: "Bangkok, Thailand"
45 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE requirements.txt
2 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build commit quality style test
 2 | 
 3 | check_dirs := scripts src tests setup.py
 4 | 
 5 | build:
 6 | 	pip install build && python -m build
 7 | 
 8 | commit:
 9 | 	pre-commit install
10 | 	pre-commit run --all-files
11 | 
12 | quality:
13 | 	ruff check $(check_dirs)
14 | 	ruff format --check $(check_dirs)
15 | 
16 | style:
17 | 	ruff check $(check_dirs) --fix
18 | 	ruff format $(check_dirs)
19 | 
20 | test:
21 | 	CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/
22 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/assets/logo.png


--------------------------------------------------------------------------------
/LLaMA-Factory/assets/wechat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/assets/wechat.jpg


--------------------------------------------------------------------------------
/LLaMA-Factory/assets/wechat_npu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/assets/wechat_npu.jpg


--------------------------------------------------------------------------------
/LLaMA-Factory/data/belle_multiturn/belle_multiturn.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import datasets
 5 | 
 6 | 
 7 | _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 8 | 
 9 | _DESCRIPTION = "BELLE multiturn chat dataset."
10 | 
11 | _CITATION = """\
12 | @article{belle2023exploring,
13 |   title={Exploring the Impact of Instruction Data Scaling on Large Language Models: An Empirical Study on Real-World Use Cases},
14 |   author={Yunjie Ji, Yong Deng, Yan Gong, Yiping Peng, Qiang Niu, Lei Zhang, Baochang Ma, Xiangang Li},
15 |   journal={arXiv preprint arXiv:2303.14742},
16 |   year={2023}
17 | }
18 | """
19 | 
20 | _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
21 | _LICENSE = "gpl-3.0"
22 | _URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
23 | 
24 | 
25 | class BelleMultiturn(datasets.GeneratorBasedBuilder):
26 |     VERSION = datasets.Version("0.0.0")
27 | 
28 |     def _info(self):
29 |         features = datasets.Features(
30 |             {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
31 |         )
32 |         return datasets.DatasetInfo(
33 |             description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
34 |         )
35 | 
36 |     def _split_generators(self, dl_manager: datasets.DownloadManager):
37 |         file_path = dl_manager.download(_URL)
38 |         return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
39 | 
40 |     def _generate_examples(self, filepath: str):
41 |         with open(filepath, encoding="utf-8") as f:
42 |             for key, row in enumerate(f):
43 |                 data = json.loads(row)
44 |                 conversations = []
45 |                 prompt = data["instruction"].strip()
46 |                 response = data["output"].strip()
47 | 
48 |                 assist_idx = prompt.rfind("Assistant:")
49 |                 human_idx = prompt.rfind("Human:")
50 |                 query = prompt[human_idx + 6 : assist_idx].strip()
51 |                 prompt = prompt[:human_idx].strip()
52 |                 conversations.insert(0, {"from": "gpt", "value": response})
53 |                 conversations.insert(0, {"from": "human", "value": query})
54 | 
55 |                 while prompt.rfind("Assistant:") != -1:
56 |                     assist_idx = prompt.rfind("Assistant:")
57 |                     human_idx = prompt.rfind("Human:")
58 |                     if human_idx != -1:
59 |                         old_query = prompt[human_idx + 6 : assist_idx].strip()
60 |                         old_resp = prompt[assist_idx + 10 :].strip()
61 |                         conversations.insert(0, {"from": "gpt", "value": old_resp})
62 |                         conversations.insert(0, {"from": "human", "value": old_query})
63 |                     else:
64 |                         break
65 |                     prompt = prompt[:human_idx].strip()
66 | 
67 |                 yield key, {"conversations": conversations}
68 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "messages": [
  4 |       {
  5 |         "content": "<image>Who are they?",
  6 |         "role": "user"
  7 |       },
  8 |       {
  9 |         "content": "They're Kane and Gretzka from Bayern Munich.",
 10 |         "role": "assistant"
 11 |       },
 12 |       {
 13 |         "content": "What are they doing?",
 14 |         "role": "user"
 15 |       },
 16 |       {
 17 |         "content": "They are celebrating on the soccer field.",
 18 |         "role": "assistant"
 19 |       }
 20 |     ],
 21 |     "images": [
 22 |       "mllm_demo_data/1.jpg"
 23 |     ]
 24 |   },
 25 |   {
 26 |     "messages": [
 27 |       {
 28 |         "content": "<image>Who is he?",
 29 |         "role": "user"
 30 |       },
 31 |       {
 32 |         "content": "He's Thomas Muller from Bayern Munich.",
 33 |         "role": "assistant"
 34 |       },
 35 |       {
 36 |         "content": "Why is he on the ground?",
 37 |         "role": "user"
 38 |       },
 39 |       {
 40 |         "content": "Because he's sliding on his knees to celebrate.",
 41 |         "role": "assistant"
 42 |       }
 43 |     ],
 44 |     "images": [
 45 |       "mllm_demo_data/2.jpg"
 46 |     ]
 47 |   },
 48 |   {
 49 |     "messages": [
 50 |       {
 51 |         "content": "<image>Please describe this image",
 52 |         "role": "user"
 53 |       },
 54 |       {
 55 |         "content": "Chinese astronaut Gui Haichao is giving a speech.",
 56 |         "role": "assistant"
 57 |       },
 58 |       {
 59 |         "content": "What has he accomplished?",
 60 |         "role": "user"
 61 |       },
 62 |       {
 63 |         "content": "He was appointed to be a payload specialist on Shenzhou 16 mission in June 2022, thus becoming the first Chinese civilian of Group 3 in space on 30 May 2023. He is responsible for the on-orbit operation of space science experimental payloads.",
 64 |         "role": "assistant"
 65 |       }
 66 |     ],
 67 |     "images": [
 68 |       "mllm_demo_data/3.jpg"
 69 |     ]
 70 |   },
 71 |   {
 72 |     "messages": [
 73 |       {
 74 |         "content": "<image>他们是谁？",
 75 |         "role": "user"
 76 |       },
 77 |       {
 78 |         "content": "他们是拜仁慕尼黑的凯恩和格雷茨卡。",
 79 |         "role": "assistant"
 80 |       },
 81 |       {
 82 |         "content": "他们在做什么？",
 83 |         "role": "user"
 84 |       },
 85 |       {
 86 |         "content": "他们在足球场上庆祝。",
 87 |         "role": "assistant"
 88 |       }
 89 |     ],
 90 |     "images": [
 91 |       "mllm_demo_data/1.jpg"
 92 |     ]
 93 |   },
 94 |   {
 95 |     "messages": [
 96 |       {
 97 |         "content": "<image>他是谁？",
 98 |         "role": "user"
 99 |       },
100 |       {
101 |         "content": "他是来自拜仁慕尼黑的托马斯·穆勒。",
102 |         "role": "assistant"
103 |       },
104 |       {
105 |         "content": "他为什么在地上？",
106 |         "role": "user"
107 |       },
108 |       {
109 |         "content": "因为他正在双膝跪地滑行庆祝。",
110 |         "role": "assistant"
111 |       }
112 |     ],
113 |     "images": [
114 |       "mllm_demo_data/2.jpg"
115 |     ]
116 |   },
117 |   {
118 |     "messages": [
119 |       {
120 |         "content": "<image>请描述这张图片",
121 |         "role": "user"
122 |       },
123 |       {
124 |         "content": "中国宇航员桂海潮正在讲话。",
125 |         "role": "assistant"
126 |       },
127 |       {
128 |         "content": "他取得过哪些成就？",
129 |         "role": "user"
130 |       },
131 |       {
132 |         "content": "他于2022年6月被任命为神舟十六号任务的有效载荷专家，从而成为2023年5月30日进入太空的首位平民宇航员。他负责在轨操作空间科学实验有效载荷。",
133 |         "role": "assistant"
134 |       }
135 |     ],
136 |     "images": [
137 |       "mllm_demo_data/3.jpg"
138 |     ]
139 |   }
140 | ]
141 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo_data/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/data/mllm_demo_data/1.jpg


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo_data/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/data/mllm_demo_data/1.mp4


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo_data/2.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/data/mllm_demo_data/2.avi


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo_data/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/data/mllm_demo_data/2.jpg


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo_data/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/data/mllm_demo_data/3.jpg


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_demo_data/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/data/mllm_demo_data/3.mp4


--------------------------------------------------------------------------------
/LLaMA-Factory/data/mllm_video_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "messages": [
 4 |       {
 5 |         "content": "<video>Why is this video funny?",
 6 |         "role": "user"
 7 |       },
 8 |       {
 9 |         "content": "Because a baby is reading, and he is so cute!",
10 |         "role": "assistant"
11 |       }
12 |     ],
13 |     "videos": [
14 |       "mllm_demo_data/1.mp4"
15 |     ]
16 |   },
17 |   {
18 |     "messages": [
19 |       {
20 |         "content": "<video>What is she doing?",
21 |         "role": "user"
22 |       },
23 |       {
24 |         "content": "She is cooking.",
25 |         "role": "assistant"
26 |       }
27 |     ],
28 |     "videos": [
29 |       "mllm_demo_data/2.avi"
30 |     ]
31 |   },
32 |   {
33 |     "messages": [
34 |       {
35 |         "content": "<video>What's in the video?",
36 |         "role": "user"
37 |       },
38 |       {
39 |         "content": "A baby is playing in the living room.",
40 |         "role": "assistant"
41 |       }
42 |     ],
43 |     "videos": [
44 |       "mllm_demo_data/3.mp4"
45 |     ]
46 |   }
47 | ]
48 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/data/ultra_chat/ultra_chat.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import List
 4 | 
 5 | import datasets
 6 | 
 7 | 
 8 | _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 9 | 
10 | _DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
11 | 
12 | _CITATION = """\
13 | @misc{UltraChat,
14 |   author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and Qin, Yujia and Liu, Zhiyuan and Sun, Maosong and Zhou, Bowen},
15 |   title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
16 |   year = {2023},
17 |   publisher = {GitHub},
18 |   journal = {GitHub repository},
19 |   howpublished = {\\url{https://github.com/thunlp/ultrachat}},
20 | }
21 | """
22 | 
23 | _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
24 | _LICENSE = "cc-by-nc-4.0"
25 | _BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
26 | 
27 | 
28 | class UltraChat(datasets.GeneratorBasedBuilder):
29 |     VERSION = datasets.Version("0.0.0")
30 | 
31 |     def _info(self):
32 |         features = datasets.Features(
33 |             {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
34 |         )
35 |         return datasets.DatasetInfo(
36 |             description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
37 |         )
38 | 
39 |     def _split_generators(self, dl_manager: datasets.DownloadManager):
40 |         file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)]  # multiple shards
41 |         return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
42 | 
43 |     def _generate_examples(self, filepaths: List[str]):
44 |         for filepath in filepaths:
45 |             with open(filepath, encoding="utf-8") as f:
46 |                 for row in f:
47 |                     try:
48 |                         data = json.loads(row)
49 |                     except Exception:
50 |                         continue
51 |                     key: int = data["id"]
52 |                     content: List[str] = data["data"]
53 |                     if len(content) % 2 == 1:
54 |                         content.pop(-1)
55 |                     if len(content) < 2:
56 |                         continue
57 |                     conversations = [
58 |                         {"from": "human" if i % 2 == 0 else "gpt", "value": content[i]} for i in range(len(content))
59 |                     ]
60 |                     yield key, {"conversations": conversations}
61 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/docker/docker-cuda/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Default use the NVIDIA official image with PyTorch 2.3.0
 2 | # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
 3 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
 4 | FROM ${BASE_IMAGE}
 5 | 
 6 | # Define environments
 7 | ENV MAX_JOBS=4
 8 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 9 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
10 | 
11 | # Define installation arguments
12 | ARG INSTALL_BNB=false
13 | ARG INSTALL_VLLM=false
14 | ARG INSTALL_DEEPSPEED=false
15 | ARG INSTALL_FLASHATTN=false
16 | ARG INSTALL_LIGER_KERNEL=false
17 | ARG INSTALL_HQQ=false
18 | ARG INSTALL_EETQ=false
19 | ARG PIP_INDEX=https://pypi.org/simple
20 | 
21 | # Set the working directory
22 | WORKDIR /app
23 | 
24 | # Install the requirements
25 | COPY requirements.txt /app
26 | RUN pip config set global.index-url "$PIP_INDEX" && \
27 |     pip config set global.extra-index-url "$PIP_INDEX" && \
28 |     python -m pip install --upgrade pip && \
29 |     python -m pip install -r requirements.txt
30 | 
31 | # Copy the rest of the application into the image
32 | COPY . /app
33 | 
34 | # Install the LLaMA Factory
35 | RUN EXTRA_PACKAGES="metrics"; \
36 |     if [ "$INSTALL_BNB" == "true" ]; then \
37 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
38 |     fi; \
39 |     if [ "$INSTALL_VLLM" == "true" ]; then \
40 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
41 |     fi; \
42 |     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
43 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
44 |     fi; \
45 |     if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
46 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
47 |     fi; \
48 |     if [ "$INSTALL_HQQ" == "true" ]; then \
49 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
50 |     fi; \
51 |     if [ "$INSTALL_EETQ" == "true" ]; then \
52 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
53 |     fi; \
54 |     pip install -e ".[$EXTRA_PACKAGES]"
55 | 
56 | # Rebuild flash attention
57 | RUN pip uninstall -y transformer-engine flash-attn && \
58 |     if [ "$INSTALL_FLASHATTN" == "true" ]; then \
59 |         pip uninstall -y ninja && pip install ninja && \
60 |         pip install --no-cache-dir flash-attn --no-build-isolation; \
61 |     fi
62 | 
63 | # Set up volumes
64 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
65 | 
66 | # Expose port 7860 for the LLaMA Board
67 | ENV GRADIO_SERVER_PORT 7860
68 | EXPOSE 7860
69 | 
70 | # Expose port 8000 for the API service
71 | ENV API_PORT 8000
72 | EXPOSE 8000
73 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/docker/docker-cuda/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-cuda/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         INSTALL_BNB: false
 8 |         INSTALL_VLLM: false
 9 |         INSTALL_DEEPSPEED: false
10 |         INSTALL_FLASHATTN: false
11 |         INSTALL_LIGER_KERNEL: false
12 |         INSTALL_HQQ: false
13 |         INSTALL_EETQ: false
14 |         PIP_INDEX: https://pypi.org/simple
15 |     container_name: llamafactory
16 |     volumes:
17 |       - ../../hf_cache:/root/.cache/huggingface
18 |       - ../../ms_cache:/root/.cache/modelscope
19 |       - ../../om_cache:/root/.cache/openmind
20 |       - ../../data:/app/data
21 |       - ../../output:/app/output
22 |     ports:
23 |       - "7860:7860"
24 |       - "8000:8000"
25 |     ipc: host
26 |     tty: true
27 |     shm_size: '16gb'
28 |     stdin_open: true
29 |     command: bash
30 |     deploy:
31 |       resources:
32 |         reservations:
33 |           devices:
34 |           - driver: nvidia
35 |             count: "all"
36 |             capabilities: [gpu]
37 |     restart: unless-stopped
38 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/docker/docker-npu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the Ubuntu 22.04 image with CANN 8.0.rc1
 2 | # More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
 3 | # FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
 4 | FROM ascendai/cann:8.0.rc1-910b-ubuntu22.04-py3.8
 5 | # FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
 6 | # FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
 7 | 
 8 | # Define environments
 9 | ENV DEBIAN_FRONTEND=noninteractive
10 | 
11 | # Define installation arguments
12 | ARG INSTALL_DEEPSPEED=false
13 | ARG PIP_INDEX=https://pypi.org/simple
14 | ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
15 | 
16 | # Set the working directory
17 | WORKDIR /app
18 | 
19 | # Install the requirements
20 | COPY requirements.txt /app
21 | RUN pip config set global.index-url "$PIP_INDEX" && \
22 |     pip config set global.extra-index-url "$TORCH_INDEX" && \
23 |     python -m pip install --upgrade pip && \
24 |     python -m pip install -r requirements.txt
25 | 
26 | # Copy the rest of the application into the image
27 | COPY . /app
28 | 
29 | # Install the LLaMA Factory
30 | RUN EXTRA_PACKAGES="torch-npu,metrics"; \
31 |     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
32 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
33 |     fi; \
34 |     pip install -e ".[$EXTRA_PACKAGES]"
35 | 
36 | # Set up volumes
37 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
38 | 
39 | # Expose port 7860 for the LLaMA Board
40 | ENV GRADIO_SERVER_PORT 7860
41 | EXPOSE 7860
42 | 
43 | # Expose port 8000 for the API service
44 | ENV API_PORT 8000
45 | EXPOSE 8000
46 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/docker/docker-npu/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-npu/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         INSTALL_DEEPSPEED: false
 8 |         PIP_INDEX: https://pypi.org/simple
 9 |     container_name: llamafactory
10 |     volumes:
11 |       - ../../hf_cache:/root/.cache/huggingface
12 |       - ../../ms_cache:/root/.cache/modelscope
13 |       - ../../om_cache:/root/.cache/openmind
14 |       - ../../data:/app/data
15 |       - ../../output:/app/output
16 |       - /usr/local/dcmi:/usr/local/dcmi
17 |       - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
18 |       - /usr/local/Ascend/driver:/usr/local/Ascend/driver
19 |       - /etc/ascend_install.info:/etc/ascend_install.info
20 |     ports:
21 |       - "7860:7860"
22 |       - "8000:8000"
23 |     ipc: host
24 |     tty: true
25 |     shm_size: '16gb'
26 |     stdin_open: true
27 |     command: bash
28 |     devices:
29 |       - /dev/davinci0
30 |       - /dev/davinci_manager
31 |       - /dev/devmm_svm
32 |       - /dev/hisi_hdc
33 |     restart: unless-stopped
34 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/docker/docker-rocm/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM hardandheavy/transformers-rocm:2.2.0
 2 | 
 3 | # Define environments
 4 | ENV MAX_JOBS=4
 5 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 6 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 7 | 
 8 | # Define installation arguments
 9 | ARG INSTALL_BNB=false
10 | ARG INSTALL_VLLM=false
11 | ARG INSTALL_DEEPSPEED=false
12 | ARG INSTALL_FLASHATTN=false
13 | ARG INSTALL_LIGER_KERNEL=false
14 | ARG INSTALL_HQQ=false
15 | ARG PIP_INDEX=https://pypi.org/simple
16 | 
17 | # Set the working directory
18 | WORKDIR /app
19 | 
20 | # Install the requirements
21 | COPY requirements.txt /app
22 | RUN pip config set global.index-url "$PIP_INDEX" && \
23 |     pip config set global.extra-index-url "$PIP_INDEX" && \
24 |     python -m pip install --upgrade pip && \
25 |     python -m pip install -r requirements.txt
26 | 
27 | # Copy the rest of the application into the image
28 | COPY . /app
29 | 
30 | # Install the LLaMA Factory
31 | RUN EXTRA_PACKAGES="metrics"; \
32 |     if [ "$INSTALL_BNB" == "true" ]; then \
33 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
34 |     fi; \
35 |     if [ "$INSTALL_VLLM" == "true" ]; then \
36 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
37 |     fi; \
38 |     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
39 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
40 |     fi; \
41 |     if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
42 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
43 |     fi; \
44 |     if [ "$INSTALL_HQQ" == "true" ]; then \
45 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
46 |     fi; \
47 |     pip install -e ".[$EXTRA_PACKAGES]"
48 | 
49 | # Rebuild flash attention
50 | RUN pip uninstall -y transformer-engine flash-attn && \
51 |     if [ "$INSTALL_FLASHATTN" == "true" ]; then \
52 |         pip uninstall -y ninja && pip install ninja && \
53 |         pip install --no-cache-dir flash-attn --no-build-isolation; \
54 |     fi
55 | 
56 | # Set up volumes
57 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
58 | 
59 | # Expose port 7860 for the LLaMA Board
60 | ENV GRADIO_SERVER_PORT 7860
61 | EXPOSE 7860
62 | 
63 | # Expose port 8000 for the API service
64 | ENV API_PORT 8000
65 | EXPOSE 8000
66 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/docker/docker-rocm/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-rocm/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         INSTALL_BNB: false
 8 |         INSTALL_VLLM: false
 9 |         INSTALL_DEEPSPEED: false
10 |         INSTALL_FLASHATTN: false
11 |         INSTALL_LIGER_KERNEL: false
12 |         INSTALL_HQQ: false
13 |         PIP_INDEX: https://pypi.org/simple
14 |     container_name: llamafactory
15 |     volumes:
16 |       - ../../hf_cache:/root/.cache/huggingface
17 |       - ../../ms_cache:/root/.cache/modelscope
18 |       - ../../om_cache:/root/.cache/openmind
19 |       - ../../data:/app/data
20 |       - ../../output:/app/output
21 |       - ../../saves:/app/saves
22 |     ports:
23 |       - "7860:7860"
24 |       - "8000:8000"
25 |     ipc: host
26 |     tty: true
27 |     shm_size: '16gb'
28 |     stdin_open: true
29 |     command: bash
30 |     devices:
31 |       - /dev/kfd:/dev/kfd
32 |       - /dev/dri:/dev/dri
33 |     restart: unless-stopped
34 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/evaluation/ceval/ceval.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/evaluation/ceval/ceval.zip


--------------------------------------------------------------------------------
/LLaMA-Factory/evaluation/cmmlu/cmmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/evaluation/cmmlu/cmmlu.zip


--------------------------------------------------------------------------------
/LLaMA-Factory/evaluation/mmlu/mmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/evaluation/mmlu/mmlu.zip


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/accelerate/fsdp_config.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_forward_prefetch: false
 9 |   fsdp_cpu_ram_efficient_loading: true
10 |   fsdp_offload_params: true # offload may affect training speed
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: fp16 # or bf16
18 | num_machines: 1 # the number of nodes
19 | num_processes: 2 # the number of GPUs in all nodes
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/deepspeed/ds_z0_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 0,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": true,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/deepspeed/ds_z2_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": true,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/deepspeed/ds_z2_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "offload_optimizer": {
21 |       "device": "cpu",
22 |       "pin_memory": true
23 |     },
24 |     "allgather_partitions": true,
25 |     "allgather_bucket_size": 5e8,
26 |     "overlap_comm": true,
27 |     "reduce_scatter": true,
28 |     "reduce_bucket_size": 5e8,
29 |     "contiguous_gradients": true,
30 |     "round_robin_gradients": true
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/deepspeed/ds_z3_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 3,
20 |     "overlap_comm": true,
21 |     "contiguous_gradients": true,
22 |     "sub_group_size": 1e9,
23 |     "reduce_bucket_size": "auto",
24 |     "stage3_prefetch_bucket_size": "auto",
25 |     "stage3_param_persistence_threshold": "auto",
26 |     "stage3_max_live_parameters": 1e9,
27 |     "stage3_max_reuse_distance": 1e9,
28 |     "stage3_gather_16bit_weights_on_model_save": true
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 3,
20 |     "offload_optimizer": {
21 |       "device": "cpu",
22 |       "pin_memory": true
23 |     },
24 |     "offload_param": {
25 |       "device": "cpu",
26 |       "pin_memory": true
27 |     },
28 |     "overlap_comm": true,
29 |     "contiguous_gradients": true,
30 |     "sub_group_size": 1e9,
31 |     "reduce_bucket_size": "auto",
32 |     "stage3_prefetch_bucket_size": "auto",
33 |     "stage3_param_persistence_threshold": "auto",
34 |     "stage3_max_live_parameters": 1e9,
35 |     "stage3_max_reuse_distance": 1e9,
36 |     "stage3_gather_16bit_weights_on_model_save": true
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/adam_mini/qwen2_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | use_adam_mini: true
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: qwen
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/qwen2-1_5b/full/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-5
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/badam/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | use_badam: true
 9 | badam_mode: layer
10 | badam_switch_mode: ascending
11 | badam_switch_interval: 50
12 | badam_verbose: 2
13 | # deepspeed: examples/deepspeed/ds_z3_config.json
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/full/sft
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-5
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | 
38 | ### eval
39 | val_size: 0.1
40 | per_device_eval_batch_size: 1
41 | eval_strategy: steps
42 | eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/fsdp_qlora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_target: all
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | learning_rate: 1.0e-4
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | val_size: 0.1
38 | per_device_eval_batch_size: 1
39 | eval_strategy: steps
40 | eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/fsdp_qlora/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # DO NOT use GPTQ/AWQ model in FSDP+QLoRA
3 | 
4 | CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
5 |     --config_file examples/accelerate/fsdp_config.yaml \
6 |     src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
7 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/galore/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | use_galore: true
 9 | galore_layerwise: true
10 | galore_target: mlp,self_attn
11 | galore_rank: 128
12 | galore_scale: 2.0
13 | 
14 | ### dataset
15 | dataset: identity,alpaca_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/full/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 1
32 | learning_rate: 1.0e-5
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | pure_bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | val_size: 0.1
41 | per_device_eval_batch_size: 1
42 | eval_strategy: steps
43 | eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/llama_pro/expand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/llama_pro.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pro \
6 |     --num_expand 8
7 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/llama_pro/llama3_freeze_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: models/llama3-8b-pro
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: freeze
 8 | freeze_trainable_layers: 8
 9 | freeze_trainable_modules: all
10 | use_llama_pro: true
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b-pro/freeze/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | val_size: 0.1
39 | per_device_eval_batch_size: 1
40 | eval_strategy: steps
41 | eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/loraplus/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | loraplus_lr_ratio: 16.0
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | learning_rate: 1.0e-4
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | val_size: 0.1
38 | per_device_eval_batch_size: 1
39 | eval_strategy: steps
40 | eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/mod/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | mixture_of_depths: convert
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b-mod/full/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | optim: paged_adamw_8bit
29 | learning_rate: 1.0e-5
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | pure_bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | val_size: 0.1
38 | per_device_eval_batch_size: 1
39 | eval_strategy: steps
40 | eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/pissa/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/pissa_init.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pissa
6 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/extras/pissa/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | pissa_init: true
10 | pissa_iter: 16
11 | pissa_convert: true
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 8
31 | learning_rate: 1.0e-4
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### eval
39 | val_size: 0.1
40 | per_device_eval_batch_size: 1
41 | eval_strategy: steps
42 | eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/inference/llama3.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/inference/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | adapter_name_or_path: saves/llama3-8b/lora/sft
3 | template: llama3
4 | finetuning_type: lora
5 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/inference/llama3_vllm.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | infer_backend: vllm
4 | vllm_enforce_eager: true
5 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/inference/llava1_5.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: llava-hf/llava-1.5-7b-hf
2 | template: llava
3 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/inference/qwen2_vl.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
2 | template: qwen2_vl
3 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/merge_lora/llama3_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | template: llama3
 4 | 
 5 | ### export
 6 | export_dir: models/llama3_gptq
 7 | export_quantization_bit: 4
 8 | export_quantization_dataset: data/c4_demo.json
 9 | export_size: 2
10 | export_device: cpu
11 | export_legacy_format: false
12 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/merge_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 5 | adapter_name_or_path: saves/llama3-8b/lora/sft
 6 | template: llama3
 7 | finetuning_type: lora
 8 | 
 9 | ### export
10 | export_dir: models/llama3_lora_sft
11 | export_size: 2
12 | export_device: cpu
13 | export_legacy_format: false
14 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/merge_lora/qwen2vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 5 | adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
 6 | template: qwen2_vl
 7 | finetuning_type: lora
 8 | 
 9 | ### export
10 | export_dir: models/qwen2_vl_lora_sft
11 | export_size: 2
12 | export_device: cpu
13 | export_legacy_format: false
14 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_full/llama3_full_predict.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: saves/llama3-8b/full/sft
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_predict: true
 7 | finetuning_type: full
 8 | 
 9 | ### dataset
10 | eval_dataset: identity,alpaca_en_demo
11 | template: llama3
12 | cutoff_len: 2048
13 | max_samples: 50
14 | overwrite_cache: true
15 | preprocessing_num_workers: 16
16 | 
17 | ### output
18 | output_dir: saves/llama3-8b/full/predict
19 | overwrite_output_dir: true
20 | 
21 | ### eval
22 | per_device_eval_batch_size: 1
23 | predict_with_generate: true
24 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_full/llama3_full_sft_ds3.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | deepspeed: examples/deepspeed/ds_z3_config.json
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/full/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 2
28 | learning_rate: 1.0e-5
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_full/qwen2vl_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | deepspeed: examples/deepspeed/ds_z3_config.json
 9 | 
10 | ### dataset
11 | dataset: mllm_demo,identity
12 | template: qwen2_vl
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/qwen2_vl-7b/full/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 2
28 | learning_rate: 1.0e-5
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_dpo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: dpo
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | pref_beta: 0.1
10 | pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
11 | 
12 | ### dataset
13 | dataset: dpo_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/dpo
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 5.0e-6
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | val_size: 0.1
39 | per_device_eval_batch_size: 1
40 | eval_strategy: steps
41 | eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_eval.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | adapter_name_or_path: saves/llama3-8b/lora/sft
 4 | 
 5 | ### method
 6 | finetuning_type: lora
 7 | 
 8 | ### dataset
 9 | task: mmlu_test  # choices: [mmlu_test, ceval_validation, cmmlu_test]
10 | template: fewshot
11 | lang: en
12 | n_shot: 5
13 | 
14 | ### output
15 | save_dir: saves/llama3-8b/lora/eval
16 | 
17 | ### eval
18 | batch_size: 4
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_kto.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: kto
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | pref_beta: 0.1
10 | 
11 | ### dataset
12 | dataset: kto_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/kto
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | learning_rate: 5.0e-6
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | val_size: 0.1
38 | per_device_eval_batch_size: 1
39 | eval_strategy: steps
40 | eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_ppo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | reward_model: saves/llama3-8b/lora/reward
 4 | 
 5 | ### method
 6 | stage: ppo
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_target: all
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/ppo
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | learning_rate: 1.0e-5
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### generate
37 | max_new_tokens: 512
38 | top_k: 0
39 | top_p: 0.9
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_predict.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | adapter_name_or_path: saves/llama3-8b/lora/sft
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_predict: true
 8 | finetuning_type: lora
 9 | 
10 | ### dataset
11 | eval_dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 50
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/predict
20 | overwrite_output_dir: true
21 | 
22 | ### eval
23 | per_device_eval_batch_size: 1
24 | predict_with_generate: true
25 | ddp_timeout: 180000000
26 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_pretrain.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: pt
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: c4_demo
12 | cutoff_len: 2048
13 | max_samples: 1000
14 | overwrite_cache: true
15 | preprocessing_num_workers: 16
16 | 
17 | ### output
18 | output_dir: saves/llama3-8b/lora/pretrain
19 | logging_steps: 10
20 | save_steps: 500
21 | plot_loss: true
22 | overwrite_output_dir: true
23 | 
24 | ### train
25 | per_device_train_batch_size: 1
26 | gradient_accumulation_steps: 8
27 | learning_rate: 1.0e-4
28 | num_train_epochs: 3.0
29 | lr_scheduler_type: cosine
30 | warmup_ratio: 0.1
31 | bf16: true
32 | ddp_timeout: 180000000
33 | 
34 | ### eval
35 | val_size: 0.1
36 | per_device_eval_batch_size: 1
37 | eval_strategy: steps
38 | eval_steps: 500
39 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_reward.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: rm
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: dpo_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/reward
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_sft_ds0.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | deepspeed: examples/deepspeed/ds_z0_config.json
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 2
29 | learning_rate: 1.0e-4
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | val_size: 0.1
38 | per_device_eval_batch_size: 1
39 | eval_strategy: steps
40 | eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_lora_sft_ds3.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | deepspeed: examples/deepspeed/ds_z3_config.json
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 2
29 | learning_rate: 1.0e-4
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | val_size: 0.1
38 | per_device_eval_batch_size: 1
39 | eval_strategy: steps
40 | eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llama3_preprocess.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | tokenized_path: saves/llama3-8b/dataset/sft
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/sft
21 | overwrite_output_dir: true
22 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/llava1_5_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: llava-hf/llava-1.5-7b-hf
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: mllm_demo
12 | template: llava
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llava1_5-7b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/qwen2vl_lora_dpo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 3 | 
 4 | ### method
 5 | stage: dpo
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | pref_beta: 0.1
10 | pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
11 | 
12 | ### dataset
13 | dataset: rlhf_v
14 | template: qwen2_vl
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/qwen2_vl-7b/lora/dpo
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 5.0e-6
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | val_size: 0.1
39 | per_device_eval_batch_size: 1
40 | eval_strategy: steps
41 | eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_lora/qwen2vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: mllm_demo,identity  # video: mllm_video_demo
12 | template: qwen2_vl
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/qwen2_vl-7b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_qlora/llama3_lora_sft_aqlm.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_qlora/llama3_lora_sft_awq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_qlora/llama3_lora_sft_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: lora
 8 | lora_target: all
 9 | 
10 | ### dataset
11 | dataset: identity,alpaca_en_demo
12 | template: llama3
13 | cutoff_len: 2048
14 | max_samples: 1000
15 | overwrite_cache: true
16 | preprocessing_num_workers: 16
17 | 
18 | ### output
19 | output_dir: saves/llama3-8b/lora/sft
20 | logging_steps: 10
21 | save_steps: 500
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 8
28 | learning_rate: 1.0e-4
29 | num_train_epochs: 3.0
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.1
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 
35 | ### eval
36 | val_size: 0.1
37 | per_device_eval_batch_size: 1
38 | eval_strategy: steps
39 | eval_steps: 500
40 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/examples/train_qlora/llama3_lora_sft_otfq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | quantization_method: bitsandbytes  # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
 5 | 
 6 | ### method
 7 | stage: sft
 8 | do_train: true
 9 | finetuning_type: lora
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | val_size: 0.1
39 | per_device_eval_batch_size: 1
40 | eval_strategy: steps
41 | eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.ruff]
 6 | target-version = "py38"
 7 | line-length = 119
 8 | indent-width = 4
 9 | 
10 | [tool.ruff.lint]
11 | ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
12 | select = ["C", "E", "F", "I", "W"]
13 | 
14 | [tool.ruff.lint.isort]
15 | lines-after-imports = 2
16 | known-first-party = ["llamafactory"]
17 | known-third-party = [
18 |     "accelerate",
19 |     "datasets",
20 |     "gradio",
21 |     "numpy",
22 |     "peft",
23 |     "torch",
24 |     "transformers",
25 |     "trl"
26 | ]
27 | 
28 | [tool.ruff.format]
29 | quote-style = "double"
30 | indent-style = "space"
31 | docstring-code-format = true
32 | skip-magic-trailing-comma = false
33 | line-ending = "auto"
34 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers>=4.41.2,<=4.46.1
 2 | datasets>=2.16.0,<=3.1.0
 3 | accelerate>=0.34.0,<=1.0.1
 4 | peft>=0.11.1,<=0.12.0
 5 | trl>=0.8.6,<=0.9.6
 6 | gradio>=4.0.0,<5.0.0
 7 | pandas>=2.0.0
 8 | scipy
 9 | einops
10 | sentencepiece
11 | tiktoken
12 | protobuf
13 | uvicorn
14 | pydantic
15 | fastapi
16 | sse-starlette
17 | matplotlib>=3.7.0
18 | fire
19 | packaging
20 | pyyaml
21 | numpy<2.0.0
22 | av
23 | tyro<0.9.0
24 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/scripts/cal_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 Microsoft Corporation and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the Microsoft's DeepSpeed library.
 4 | # https://www.deepspeed.ai/tutorials/flops-profiler/
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import fire
19 | import torch
20 | from deepspeed.accelerator import get_accelerator  # type: ignore
21 | from deepspeed.profiling.flops_profiler import get_model_profile  # type: ignore
22 | 
23 | from llamafactory.chat import ChatModel
24 | 
25 | 
26 | def calculate_flops(
27 |     model_name_or_path: str,
28 |     batch_size: int = 1,
29 |     seq_length: int = 512,
30 |     flash_attn: str = "auto",
31 | ):
32 |     r"""
33 |     Calculates the flops of pre-trained models.
34 |     Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
35 |     """
36 |     with get_accelerator().device(0):
37 |         chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
38 |         fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.engine.model.device)
39 |         input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
40 |         flops, macs, params = get_model_profile(
41 |             chat_model.engine.model, kwargs=input_dict, print_profile=True, detailed=True
42 |         )
43 |         print("FLOPs:", flops)
44 |         print("MACs:", macs)
45 |         print("Params:", params)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     fire.Fire(calculate_flops)
50 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/scripts/length_cdf.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from collections import defaultdict
16 | 
17 | import fire
18 | from tqdm import tqdm
19 | 
20 | from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
21 | from llamafactory.hparams import get_train_args
22 | from llamafactory.model import load_tokenizer
23 | 
24 | 
25 | def length_cdf(
26 |     model_name_or_path: str,
27 |     dataset: str = "alpaca_en_demo",
28 |     dataset_dir: str = "data",
29 |     template: str = "default",
30 |     interval: int = 1000,
31 | ):
32 |     r"""
33 |     Calculates the distribution of the input lengths in the dataset.
34 |     Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en_demo --template default
35 |     """
36 |     model_args, data_args, training_args, _, _ = get_train_args(
37 |         dict(
38 |             stage="sft",
39 |             model_name_or_path=model_name_or_path,
40 |             dataset=dataset,
41 |             dataset_dir=dataset_dir,
42 |             template=template,
43 |             cutoff_len=1_000_000,
44 |             output_dir="dummy_dir",
45 |             overwrite_cache=True,
46 |             do_train=True,
47 |         )
48 |     )
49 |     tokenizer_module = load_tokenizer(model_args)
50 |     template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
51 |     trainset = get_dataset(template, model_args, data_args, training_args, "sft", **tokenizer_module)["train_dataset"]
52 |     total_num = len(trainset)
53 |     length_dict = defaultdict(int)
54 |     for sample in tqdm(trainset["input_ids"]):
55 |         length_dict[len(sample) // interval * interval] += 1
56 | 
57 |     length_tuples = list(length_dict.items())
58 |     length_tuples.sort()
59 |     count_accu, prob_accu = 0, 0
60 |     for length, count in length_tuples:
61 |         count_accu += count
62 |         prob_accu += count / total_num * 100
63 |         print(f"{count_accu:d} ({prob_accu:.2f}%) samples have length < {length + interval}.")
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     fire.Fire(length_cdf)
68 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/scripts/loftq_init.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is based on the HuggingFace's PEFT library.
 4 | # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import os
19 | from typing import TYPE_CHECKING
20 | 
21 | import fire
22 | from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
23 | from transformers import AutoModelForCausalLM, AutoTokenizer
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from transformers import PreTrainedModel
28 | 
29 | 
30 | def quantize_loftq(
31 |     model_name_or_path: str,
32 |     output_dir: str,
33 |     loftq_bits: int = 4,
34 |     loftq_iter: int = 4,
35 |     lora_alpha: int = None,
36 |     lora_rank: int = 16,
37 |     lora_dropout: float = 0,
38 |     lora_target: tuple = ("q_proj", "v_proj"),
39 |     save_safetensors: bool = True,
40 | ):
41 |     r"""
42 |     Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
43 |     Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
44 |     """
45 |     if isinstance(lora_target, str):
46 |         lora_target = [name.strip() for name in lora_target.split(",")]
47 | 
48 |     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
49 |     model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
50 | 
51 |     loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
52 |     lora_config = LoraConfig(
53 |         task_type=TaskType.CAUSAL_LM,
54 |         inference_mode=True,
55 |         r=lora_rank,
56 |         lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
57 |         lora_dropout=lora_dropout,
58 |         target_modules=lora_target,
59 |         init_lora_weights="loftq",
60 |         loftq_config=loftq_config,
61 |     )
62 | 
63 |     # Init LoftQ model
64 |     print("Initializing LoftQ weights, it may be take several minutes, wait patiently.")
65 |     peft_model = get_peft_model(model, lora_config)
66 |     loftq_dir = os.path.join(output_dir, "loftq_init")
67 | 
68 |     # Save LoftQ model
69 |     setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir))
70 |     setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply loftq again
71 |     peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors)
72 |     print(f"Adapter weights saved in {loftq_dir}")
73 | 
74 |     # Save base model
75 |     base_model: "PreTrainedModel" = peft_model.unload()
76 |     base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
77 |     tokenizer.save_pretrained(output_dir)
78 |     print(f"Model weights saved in {output_dir}")
79 | 
80 |     print("- Fine-tune this model with:")
81 |     print(f"model_name_or_path: {output_dir}")
82 |     print(f"adapter_name_or_path: {loftq_dir}")
83 |     print("finetuning_type: lora")
84 |     print(f"quantization_bit: {loftq_bits}")
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     fire.Fire(quantize_loftq)
89 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/scripts/pissa_init.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is based on the HuggingFace's PEFT library.
 4 | # https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import os
19 | from typing import TYPE_CHECKING
20 | 
21 | import fire
22 | from peft import LoraConfig, TaskType, get_peft_model
23 | from transformers import AutoModelForCausalLM, AutoTokenizer
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from transformers import PreTrainedModel
28 | 
29 | 
30 | def quantize_pissa(
31 |     model_name_or_path: str,
32 |     output_dir: str,
33 |     pissa_iter: int = 16,
34 |     lora_alpha: int = None,
35 |     lora_rank: int = 16,
36 |     lora_dropout: float = 0,
37 |     lora_target: tuple = ("q_proj", "v_proj"),
38 |     save_safetensors: bool = True,
39 | ):
40 |     r"""
41 |     Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
42 |     Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
43 |     """
44 |     if isinstance(lora_target, str):
45 |         lora_target = [name.strip() for name in lora_target.split(",")]
46 | 
47 |     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
48 |     model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
49 | 
50 |     lora_config = LoraConfig(
51 |         task_type=TaskType.CAUSAL_LM,
52 |         r=lora_rank,
53 |         lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
54 |         lora_dropout=lora_dropout,
55 |         target_modules=lora_target,
56 |         init_lora_weights="pissa" if pissa_iter == -1 else f"pissa_niter_{pissa_iter}",
57 |     )
58 | 
59 |     # Init PiSSA model
60 |     peft_model = get_peft_model(model, lora_config)
61 |     pissa_dir = os.path.join(output_dir, "pissa_init")
62 | 
63 |     # Save PiSSA model
64 |     setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir))
65 |     setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply pissa again
66 |     peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors)
67 |     print(f"Adapter weights saved in {pissa_dir}")
68 | 
69 |     # Save base model
70 |     base_model: "PreTrainedModel" = peft_model.unload()
71 |     base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
72 |     tokenizer.save_pretrained(output_dir)
73 |     print(f"Model weights saved in {output_dir}")
74 | 
75 |     print("- Fine-tune this model with:")
76 |     print(f"model_name_or_path: {output_dir}")
77 |     print(f"adapter_name_or_path: {pissa_dir}")
78 |     print("finetuning_type: lora")
79 |     print("pissa_init: false")
80 |     print("pissa_convert: true")
81 |     print("- and optionally with:")
82 |     print("quantization_bit: 4")
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     fire.Fire(quantize_pissa)
87 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/scripts/test_image.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from openai import OpenAI
18 | from transformers.utils.versions import require_version
19 | 
20 | 
21 | require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
22 | 
23 | 
24 | def main():
25 |     client = OpenAI(
26 |         api_key="{}".format(os.environ.get("API_KEY", "0")),
27 |         base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
28 |     )
29 |     messages = []
30 |     messages.append(
31 |         {
32 |             "role": "user",
33 |             "content": [
34 |                 {"type": "text", "text": "Output the color and number of each box."},
35 |                 {
36 |                     "type": "image_url",
37 |                     "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/boxes.png"},
38 |                 },
39 |             ],
40 |         }
41 |     )
42 |     result = client.chat.completions.create(messages=messages, model="test")
43 |     messages.append(result.choices[0].message)
44 |     print("Round 1:", result.choices[0].message.content)
45 |     # The image shows a pyramid of colored blocks with numbers on them. Here are the colors and numbers of ...
46 |     messages.append(
47 |         {
48 |             "role": "user",
49 |             "content": [
50 |                 {"type": "text", "text": "What kind of flower is this?"},
51 |                 {
52 |                     "type": "image_url",
53 |                     "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/flowers.jpg"},
54 |                 },
55 |             ],
56 |         }
57 |     )
58 |     result = client.chat.completions.create(messages=messages, model="test")
59 |     messages.append(result.choices[0].message)
60 |     print("Round 2:", result.choices[0].message.content)
61 |     # The image shows a cluster of forget-me-not flowers. Forget-me-nots are small ...
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/scripts/test_toolcall.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import os
17 | from typing import Sequence
18 | 
19 | from openai import OpenAI
20 | from transformers.utils.versions import require_version
21 | 
22 | 
23 | require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
24 | 
25 | 
26 | def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
27 |     grade_to_score = {"A": 4, "B": 3, "C": 2}
28 |     total_score, total_hour = 0, 0
29 |     for grade, hour in zip(grades, hours):
30 |         total_score += grade_to_score[grade] * hour
31 |         total_hour += hour
32 |     return round(total_score / total_hour, 2)
33 | 
34 | 
35 | def main():
36 |     client = OpenAI(
37 |         api_key="{}".format(os.environ.get("API_KEY", "0")),
38 |         base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
39 |     )
40 |     tools = [
41 |         {
42 |             "type": "function",
43 |             "function": {
44 |                 "name": "calculate_gpa",
45 |                 "description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
46 |                 "parameters": {
47 |                     "type": "object",
48 |                     "properties": {
49 |                         "grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
50 |                         "hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
51 |                     },
52 |                     "required": ["grades", "hours"],
53 |                 },
54 |             },
55 |         }
56 |     ]
57 |     tool_map = {"calculate_gpa": calculate_gpa}
58 | 
59 |     messages = []
60 |     messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
61 |     result = client.chat.completions.create(messages=messages, model="test", tools=tools)
62 |     if result.choices[0].message.tool_calls is None:
63 |         raise ValueError("Cannot retrieve function call from the response.")
64 | 
65 |     messages.append(result.choices[0].message)
66 |     tool_call = result.choices[0].message.tool_calls[0].function
67 |     print(tool_call)
68 |     # Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
69 |     name, arguments = tool_call.name, json.loads(tool_call.arguments)
70 |     tool_result = tool_map[name](**arguments)
71 |     messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
72 |     result = client.chat.completions.create(messages=messages, model="test", tools=tools)
73 |     print(result.choices[0].message.content)
74 |     # Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/api.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import uvicorn
18 | 
19 | from llamafactory.api.app import create_app
20 | from llamafactory.chat import ChatModel
21 | 
22 | 
23 | def main():
24 |     chat_model = ChatModel()
25 |     app = create_app(chat_model)
26 |     api_host = os.getenv("API_HOST", "0.0.0.0")
27 |     api_port = int(os.getenv("API_PORT", "8000"))
28 |     print(f"Visit http://localhost:{api_port}/docs for API document.")
29 |     uvicorn.run(app, host=api_host, port=api_port)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | r"""
16 | Efficient fine-tuning of large language models.
17 | 
18 | Level:
19 |   api, webui > chat, eval, train > data, model > hparams > extras
20 | 
21 | Dependency graph:
22 |   main:
23 |     transformers>=4.41.2,<=4.46.1
24 |     datasets>=2.16.0,<=3.1.0
25 |     accelerate>=0.34.0,<=1.0.1
26 |     peft>=0.11.1,<=0.12.0
27 |     trl>=0.8.6,<=0.9.6
28 |   attention:
29 |     transformers>=4.42.4 (gemma+fa2)
30 |   longlora:
31 |     transformers>=4.41.2,<=4.46.1
32 |   packing:
33 |     transformers>=4.41.2,<=4.46.1
34 | 
35 | Disable version checking: DISABLE_VERSION_CHECK=1
36 | Enable VRAM recording: RECORD_VRAM=1
37 | Force check imports: FORCE_CHECK_IMPORTS=1
38 | Force using torchrun: FORCE_TORCHRUN=1
39 | Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
40 | Use modelscope: USE_MODELSCOPE_HUB=1
41 | Use openmind: USE_OPENMIND_HUB=1
42 | """
43 | 
44 | from .extras.env import VERSION
45 | 
46 | 
47 | __version__ = VERSION
48 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/api/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/api/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | from typing import TYPE_CHECKING, Any, Dict
17 | 
18 | 
19 | if TYPE_CHECKING:
20 |     from pydantic import BaseModel
21 | 
22 | 
23 | def dictify(data: "BaseModel") -> Dict[str, Any]:
24 |     try:  # pydantic v2
25 |         return data.model_dump(exclude_unset=True)
26 |     except AttributeError:  # pydantic v1
27 |         return data.dict(exclude_unset=True)
28 | 
29 | 
30 | def jsonify(data: "BaseModel") -> str:
31 |     try:  # pydantic v2
32 |         return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
33 |     except AttributeError:  # pydantic v1
34 |         return data.json(exclude_unset=True, ensure_ascii=False)
35 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/chat/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base_engine import BaseEngine
16 | from .chat_model import ChatModel
17 | 
18 | 
19 | __all__ = ["BaseEngine", "ChatModel"]
20 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/chat/base_engine.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 the LlamaFactory team.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from abc import ABC, abstractmethod
 16 | from dataclasses import dataclass
 17 | from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Optional, Sequence, Union
 18 | 
 19 | 
 20 | if TYPE_CHECKING:
 21 |     from transformers import PreTrainedModel, PreTrainedTokenizer
 22 |     from vllm import AsyncLLMEngine
 23 | 
 24 |     from ..data import Template
 25 |     from ..data.mm_plugin import ImageInput, VideoInput
 26 |     from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
 27 | 
 28 | 
 29 | @dataclass
 30 | class Response:
 31 |     response_text: str
 32 |     response_length: int
 33 |     prompt_length: int
 34 |     finish_reason: Literal["stop", "length"]
 35 | 
 36 | 
 37 | class BaseEngine(ABC):
 38 |     r"""
 39 |     Base class for inference engine of chat models.
 40 | 
 41 |     Must implements async methods: chat(), stream_chat() and get_scores().
 42 |     """
 43 | 
 44 |     model: Union["PreTrainedModel", "AsyncLLMEngine"]
 45 |     tokenizer: "PreTrainedTokenizer"
 46 |     can_generate: bool
 47 |     template: "Template"
 48 |     generating_args: Dict[str, Any]
 49 | 
 50 |     @abstractmethod
 51 |     def __init__(
 52 |         self,
 53 |         model_args: "ModelArguments",
 54 |         data_args: "DataArguments",
 55 |         finetuning_args: "FinetuningArguments",
 56 |         generating_args: "GeneratingArguments",
 57 |     ) -> None:
 58 |         r"""
 59 |         Initializes an inference engine.
 60 |         """
 61 |         ...
 62 | 
 63 |     @abstractmethod
 64 |     async def chat(
 65 |         self,
 66 |         messages: Sequence[Dict[str, str]],
 67 |         system: Optional[str] = None,
 68 |         tools: Optional[str] = None,
 69 |         images: Optional[Sequence["ImageInput"]] = None,
 70 |         videos: Optional[Sequence["VideoInput"]] = None,
 71 |         **input_kwargs,
 72 |     ) -> List["Response"]:
 73 |         r"""
 74 |         Gets a list of responses of the chat model.
 75 |         """
 76 |         ...
 77 | 
 78 |     @abstractmethod
 79 |     async def stream_chat(
 80 |         self,
 81 |         messages: Sequence[Dict[str, str]],
 82 |         system: Optional[str] = None,
 83 |         tools: Optional[str] = None,
 84 |         images: Optional[Sequence["ImageInput"]] = None,
 85 |         videos: Optional[Sequence["VideoInput"]] = None,
 86 |         **input_kwargs,
 87 |     ) -> AsyncGenerator[str, None]:
 88 |         r"""
 89 |         Gets the response token-by-token of the chat model.
 90 |         """
 91 |         ...
 92 | 
 93 |     @abstractmethod
 94 |     async def get_scores(
 95 |         self,
 96 |         batch_input: List[str],
 97 |         **input_kwargs,
 98 |     ) -> List[float]:
 99 |         r"""
100 |         Gets a list of scores of the reward model.
101 |         """
102 |         ...
103 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .collator import (
16 |     KTODataCollatorWithPadding,
17 |     MultiModalDataCollatorForSeq2Seq,
18 |     PairwiseDataCollatorWithPadding,
19 |     SFTDataCollatorWith4DAttentionMask,
20 | )
21 | from .data_utils import Role, split_dataset
22 | from .loader import get_dataset
23 | from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
24 | 
25 | 
26 | __all__ = [
27 |     "KTODataCollatorWithPadding",
28 |     "MultiModalDataCollatorForSeq2Seq",
29 |     "PairwiseDataCollatorWithPadding",
30 |     "SFTDataCollatorWith4DAttentionMask",
31 |     "Role",
32 |     "split_dataset",
33 |     "get_dataset",
34 |     "TEMPLATES",
35 |     "Template",
36 |     "get_template_and_fix_tokenizer",
37 | ]
38 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/data/data_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from enum import Enum, unique
16 | from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, TypedDict, Union
17 | 
18 | from datasets import DatasetDict, concatenate_datasets, interleave_datasets
19 | 
20 | from ..extras import logging
21 | 
22 | 
23 | if TYPE_CHECKING:
24 |     from datasets import Dataset, IterableDataset
25 | 
26 |     from ..hparams import DataArguments
27 | 
28 | 
29 | logger = logging.get_logger(__name__)
30 | 
31 | 
32 | SLOTS = Sequence[Union[str, Set[str], Dict[str, str]]]
33 | 
34 | 
35 | @unique
36 | class Role(str, Enum):
37 |     USER = "user"
38 |     ASSISTANT = "assistant"
39 |     SYSTEM = "system"
40 |     FUNCTION = "function"
41 |     OBSERVATION = "observation"
42 | 
43 | 
44 | class DatasetModule(TypedDict):
45 |     train_dataset: Optional[Union["Dataset", "IterableDataset"]]
46 |     eval_dataset: Optional[Union["Dataset", "IterableDataset"]]
47 | 
48 | 
49 | def merge_dataset(
50 |     all_datasets: List[Union["Dataset", "IterableDataset"]], data_args: "DataArguments", seed: int
51 | ) -> Union["Dataset", "IterableDataset"]:
52 |     r"""
53 |     Merges multiple datasets to a unified dataset.
54 |     """
55 |     if len(all_datasets) == 1:
56 |         return all_datasets[0]
57 |     elif data_args.mix_strategy == "concat":
58 |         if data_args.streaming:
59 |             logger.warning_once("The samples between different datasets will not be mixed in streaming mode.")
60 | 
61 |         return concatenate_datasets(all_datasets)
62 |     elif data_args.mix_strategy.startswith("interleave"):
63 |         if not data_args.streaming:
64 |             logger.warning_once("We recommend using `mix_strategy=concat` in non-streaming mode.")
65 | 
66 |         return interleave_datasets(
67 |             datasets=all_datasets,
68 |             probabilities=data_args.interleave_probs,
69 |             seed=seed,
70 |             stopping_strategy="first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted",
71 |         )
72 |     else:
73 |         raise ValueError(f"Unknown mixing strategy: {data_args.mix_strategy}.")
74 | 
75 | 
76 | def split_dataset(
77 |     dataset: Union["Dataset", "IterableDataset"], data_args: "DataArguments", seed: int
78 | ) -> "DatasetDict":
79 |     r"""
80 |     Splits the dataset and returns a dataset dict containing train set and validation set.
81 | 
82 |     Supports both map dataset and iterable dataset.
83 |     """
84 |     if data_args.streaming:
85 |         dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=seed)
86 |         val_set = dataset.take(int(data_args.val_size))
87 |         train_set = dataset.skip(int(data_args.val_size))
88 |         return DatasetDict({"train": train_set, "validation": val_set})
89 |     else:
90 |         val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
91 |         dataset = dataset.train_test_split(test_size=val_size, seed=seed)
92 |         return DatasetDict({"train": dataset["train"], "validation": dataset["test"]})
93 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/data/processors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/data/processors/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/data/processors/pretrain.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from itertools import chain
19 | from typing import TYPE_CHECKING, Any, Dict, List
20 | 
21 | 
22 | if TYPE_CHECKING:
23 |     from transformers import PreTrainedTokenizer
24 | 
25 |     from ...hparams import DataArguments
26 | 
27 | 
28 | def preprocess_pretrain_dataset(
29 |     examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
30 | ) -> Dict[str, List[Any]]:
31 |     # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
32 |     eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token
33 |     text_examples = [messages[0]["content"] + eos_token for messages in examples["_prompt"]]
34 | 
35 |     if not data_args.packing:
36 |         if data_args.template == "gemma":
37 |             text_examples = [tokenizer.bos_token + example for example in text_examples]
38 | 
39 |         result = tokenizer(text_examples, add_special_tokens=False, truncation=True, max_length=data_args.cutoff_len)
40 |     else:
41 |         tokenized_examples = tokenizer(text_examples, add_special_tokens=False)
42 |         concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()}
43 |         total_length = len(concatenated_examples[list(concatenated_examples.keys())[0]])
44 |         block_size = data_args.cutoff_len
45 |         total_length = (total_length // block_size) * block_size
46 |         result = {
47 |             k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
48 |             for k, t in concatenated_examples.items()
49 |         }
50 |         if data_args.template == "gemma":
51 |             for i in range(len(result["input_ids"])):
52 |                 result["input_ids"][i][0] = tokenizer.bos_token_id
53 | 
54 |     return result
55 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/data/processors/processor_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import bisect
16 | from typing import List, Sequence, Tuple
17 | 
18 | 
19 | def search_for_fit(numbers: Sequence[int], capacity: int) -> int:
20 |     r"""
21 |     Finds the index of largest number that fits into the knapsack with the given capacity.
22 |     """
23 |     index = bisect.bisect(numbers, capacity)
24 |     return -1 if index == 0 else (index - 1)
25 | 
26 | 
27 | def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]:
28 |     r"""
29 |     An efficient greedy algorithm with binary search for the knapsack problem.
30 |     """
31 |     numbers.sort()  # sort numbers in ascending order for binary search
32 |     knapsacks = []
33 | 
34 |     while numbers:
35 |         current_knapsack = []
36 |         remaining_capacity = capacity
37 | 
38 |         while True:
39 |             index = search_for_fit(numbers, remaining_capacity)
40 |             if index == -1:
41 |                 break  # no more numbers fit in this knapsack
42 | 
43 |             remaining_capacity -= numbers[index]  # update the remaining capacity
44 |             current_knapsack.append(numbers.pop(index))  # add the number to knapsack
45 | 
46 |         knapsacks.append(current_knapsack)
47 | 
48 |     return knapsacks
49 | 
50 | 
51 | def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> Tuple[int, int]:
52 |     r"""
53 |     Computes the real sequence length after truncation by the cutoff_len.
54 |     """
55 |     if target_len * 2 < cutoff_len:  # truncate source
56 |         max_target_len = cutoff_len
57 |     elif source_len * 2 < cutoff_len:  # truncate target
58 |         max_target_len = cutoff_len - source_len
59 |     else:  # truncate both
60 |         max_target_len = int(cutoff_len * (target_len / (source_len + target_len)))
61 | 
62 |     new_target_len = min(max_target_len, target_len)
63 |     max_source_len = max(cutoff_len - new_target_len, 0)
64 |     new_source_len = min(max_source_len, source_len)
65 |     return new_source_len, new_target_len
66 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/eval/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/eval/template.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import Dict, List, Sequence, Tuple
17 | 
18 | from ..data import Role
19 | from ..extras.constants import CHOICES
20 | 
21 | 
22 | @dataclass
23 | class EvalTemplate:
24 |     system: str
25 |     choice: str
26 |     answer: str
27 | 
28 |     def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
29 |         r"""
30 |         input: a dict with keys {"question", "A", "B", "C", "D", "answer"}
31 |         output: a tuple of (prompt, response)
32 |         """
33 |         candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
34 |         return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
35 | 
36 |     def format_example(
37 |         self, target_data: Dict[str, str], support_set: Sequence[Dict[str, str]], subject_name: str
38 |     ) -> List[Dict[str, str]]:
39 |         r"""
40 |         Converts dataset examples to messages.
41 |         """
42 |         messages = []
43 |         for k in range(len(support_set)):
44 |             prompt, response = self._parse_example(support_set[k])
45 |             messages.append({"role": Role.USER.value, "content": prompt})
46 |             messages.append({"role": Role.ASSISTANT.value, "content": response})
47 | 
48 |         prompt, response = self._parse_example(target_data)
49 |         messages.append({"role": Role.USER.value, "content": prompt})
50 |         messages.append({"role": Role.ASSISTANT.value, "content": response})
51 |         messages[0]["content"] = self.system.format(subject=subject_name) + messages[0]["content"]
52 |         return messages
53 | 
54 | 
55 | eval_templates: Dict[str, "EvalTemplate"] = {}
56 | 
57 | 
58 | def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
59 |     eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
60 | 
61 | 
62 | def get_eval_template(name: str) -> "EvalTemplate":
63 |     eval_template = eval_templates.get(name, None)
64 |     assert eval_template is not None, f"Template {name} does not exist."
65 |     return eval_template
66 | 
67 | 
68 | _register_eval_template(
69 |     name="en",
70 |     system="The following are multiple choice questions (with answers) about {subject}.\n\n",
71 |     choice="\n{choice}. {content}",
72 |     answer="\nAnswer:",
73 | )
74 | 
75 | 
76 | _register_eval_template(
77 |     name="zh",
78 |     system="以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。\n\n",
79 |     choice="\n{choice}. {content}",
80 |     answer="\n答案：",
81 | )
82 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/extras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/extras/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/extras/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/commands/env.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import platform
19 | 
20 | import accelerate
21 | import datasets
22 | import peft
23 | import torch
24 | import transformers
25 | import trl
26 | from transformers.utils import is_torch_cuda_available, is_torch_npu_available
27 | 
28 | 
29 | VERSION = "0.9.2.dev0"
30 | 
31 | 
32 | def print_env() -> None:
33 |     info = {
34 |         "`llamafactory` version": VERSION,
35 |         "Platform": platform.platform(),
36 |         "Python version": platform.python_version(),
37 |         "PyTorch version": torch.__version__,
38 |         "Transformers version": transformers.__version__,
39 |         "Datasets version": datasets.__version__,
40 |         "Accelerate version": accelerate.__version__,
41 |         "PEFT version": peft.__version__,
42 |         "TRL version": trl.__version__,
43 |     }
44 | 
45 |     if is_torch_cuda_available():
46 |         info["PyTorch version"] += " (GPU)"
47 |         info["GPU type"] = torch.cuda.get_device_name()
48 | 
49 |     if is_torch_npu_available():
50 |         info["PyTorch version"] += " (NPU)"
51 |         info["NPU type"] = torch.npu.get_device_name()
52 |         info["CANN version"] = torch.version.cann
53 | 
54 |     try:
55 |         import deepspeed  # type: ignore
56 | 
57 |         info["DeepSpeed version"] = deepspeed.__version__
58 |     except Exception:
59 |         pass
60 | 
61 |     try:
62 |         import bitsandbytes
63 | 
64 |         info["Bitsandbytes version"] = bitsandbytes.__version__
65 |     except Exception:
66 |         pass
67 | 
68 |     try:
69 |         import vllm
70 | 
71 |         info["vLLM version"] = vllm.__version__
72 |     except Exception:
73 |         pass
74 | 
75 |     print("\n" + "\n".join([f"- {key}: {value}" for key, value in info.items()]) + "\n")
76 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/extras/packages.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import importlib.metadata
19 | import importlib.util
20 | from functools import lru_cache
21 | from typing import TYPE_CHECKING
22 | 
23 | from packaging import version
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from packaging.version import Version
28 | 
29 | 
30 | def _is_package_available(name: str) -> bool:
31 |     return importlib.util.find_spec(name) is not None
32 | 
33 | 
34 | def _get_package_version(name: str) -> "Version":
35 |     try:
36 |         return version.parse(importlib.metadata.version(name))
37 |     except Exception:
38 |         return version.parse("0.0.0")
39 | 
40 | 
41 | def is_pyav_available():
42 |     return _is_package_available("av")
43 | 
44 | 
45 | def is_fastapi_available():
46 |     return _is_package_available("fastapi")
47 | 
48 | 
49 | def is_galore_available():
50 |     return _is_package_available("galore_torch")
51 | 
52 | 
53 | def is_gradio_available():
54 |     return _is_package_available("gradio")
55 | 
56 | 
57 | def is_matplotlib_available():
58 |     return _is_package_available("matplotlib")
59 | 
60 | 
61 | def is_pillow_available():
62 |     return _is_package_available("PIL")
63 | 
64 | 
65 | def is_requests_available():
66 |     return _is_package_available("requests")
67 | 
68 | 
69 | def is_rouge_available():
70 |     return _is_package_available("rouge_chinese")
71 | 
72 | 
73 | def is_starlette_available():
74 |     return _is_package_available("sse_starlette")
75 | 
76 | 
77 | @lru_cache
78 | def is_transformers_version_greater_than(content: str):
79 |     return _get_package_version("transformers") >= version.parse(content)
80 | 
81 | 
82 | @lru_cache
83 | def is_transformers_version_equal_to_4_46():
84 |     return version.parse("4.46.0") <= _get_package_version("transformers") <= version.parse("4.46.1")
85 | 
86 | 
87 | def is_uvicorn_available():
88 |     return _is_package_available("uvicorn")
89 | 
90 | 
91 | def is_vllm_available():
92 |     return _is_package_available("vllm")
93 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/hparams/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .data_args import DataArguments
16 | from .evaluation_args import EvaluationArguments
17 | from .finetuning_args import FinetuningArguments
18 | from .generating_args import GeneratingArguments
19 | from .model_args import ModelArguments
20 | from .parser import get_eval_args, get_infer_args, get_train_args
21 | 
22 | 
23 | __all__ = [
24 |     "DataArguments",
25 |     "EvaluationArguments",
26 |     "FinetuningArguments",
27 |     "GeneratingArguments",
28 |     "ModelArguments",
29 |     "get_eval_args",
30 |     "get_infer_args",
31 |     "get_train_args",
32 | ]
33 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/hparams/evaluation_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass, field
17 | from typing import Literal, Optional
18 | 
19 | from datasets import DownloadMode
20 | 
21 | 
22 | @dataclass
23 | class EvaluationArguments:
24 |     r"""
25 |     Arguments pertaining to specify the evaluation parameters.
26 |     """
27 | 
28 |     task: str = field(
29 |         metadata={"help": "Name of the evaluation task."},
30 |     )
31 |     task_dir: str = field(
32 |         default="evaluation",
33 |         metadata={"help": "Path to the folder containing the evaluation datasets."},
34 |     )
35 |     batch_size: int = field(
36 |         default=4,
37 |         metadata={"help": "The batch size per GPU for evaluation."},
38 |     )
39 |     seed: int = field(
40 |         default=42,
41 |         metadata={"help": "Random seed to be used with data loaders."},
42 |     )
43 |     lang: Literal["en", "zh"] = field(
44 |         default="en",
45 |         metadata={"help": "Language used at evaluation."},
46 |     )
47 |     n_shot: int = field(
48 |         default=5,
49 |         metadata={"help": "Number of examplars for few-shot learning."},
50 |     )
51 |     save_dir: Optional[str] = field(
52 |         default=None,
53 |         metadata={"help": "Path to save the evaluation results."},
54 |     )
55 |     download_mode: DownloadMode = field(
56 |         default=DownloadMode.REUSE_DATASET_IF_EXISTS,
57 |         metadata={"help": "Download mode used for the evaluation datasets."},
58 |     )
59 | 
60 |     def __post_init__(self):
61 |         if self.save_dir is not None and os.path.exists(self.save_dir):
62 |             raise ValueError("`save_dir` already exists, use another one.")
63 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/hparams/generating_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import asdict, dataclass, field
16 | from typing import Any, Dict, Optional
17 | 
18 | 
19 | @dataclass
20 | class GeneratingArguments:
21 |     r"""
22 |     Arguments pertaining to specify the decoding parameters.
23 |     """
24 | 
25 |     do_sample: bool = field(
26 |         default=True,
27 |         metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."},
28 |     )
29 |     temperature: float = field(
30 |         default=0.95,
31 |         metadata={"help": "The value used to modulate the next token probabilities."},
32 |     )
33 |     top_p: float = field(
34 |         default=0.7,
35 |         metadata={
36 |             "help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."
37 |         },
38 |     )
39 |     top_k: int = field(
40 |         default=50,
41 |         metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."},
42 |     )
43 |     num_beams: int = field(
44 |         default=1,
45 |         metadata={"help": "Number of beams for beam search. 1 means no beam search."},
46 |     )
47 |     max_length: int = field(
48 |         default=1024,
49 |         metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."},
50 |     )
51 |     max_new_tokens: int = field(
52 |         default=1024,
53 |         metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."},
54 |     )
55 |     repetition_penalty: float = field(
56 |         default=1.0,
57 |         metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."},
58 |     )
59 |     length_penalty: float = field(
60 |         default=1.0,
61 |         metadata={"help": "Exponential penalty to the length that is used with beam-based generation."},
62 |     )
63 |     default_system: Optional[str] = field(
64 |         default=None,
65 |         metadata={"help": "Default system message to use in chat completion."},
66 |     )
67 | 
68 |     def to_dict(self) -> Dict[str, Any]:
69 |         args = asdict(self)
70 |         if args.get("max_new_tokens", -1) > 0:
71 |             args.pop("max_length", None)
72 |         else:
73 |             args.pop("max_new_tokens", None)
74 |         return args
75 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp  # use absolute import
16 | 
17 | 
18 | def launch():
19 |     run_exp()
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     launch()
24 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .loader import load_config, load_model, load_tokenizer
16 | from .model_utils.misc import find_all_linear_modules
17 | from .model_utils.quantization import QuantizationMethod
18 | from .model_utils.valuehead import load_valuehead_params
19 | 
20 | 
21 | __all__ = [
22 |     "QuantizationMethod",
23 |     "load_config",
24 |     "load_model",
25 |     "load_tokenizer",
26 |     "find_all_linear_modules",
27 |     "load_valuehead_params",
28 | ]
29 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/model/model_utils/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import math
16 | from contextlib import nullcontext
17 | from typing import TYPE_CHECKING
18 | 
19 | import torch
20 | from transformers.integrations import is_deepspeed_zero3_enabled
21 | 
22 | from ...extras import logging
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from transformers import PreTrainedModel, PreTrainedTokenizer
27 | 
28 | 
29 | logger = logging.get_logger(__name__)
30 | 
31 | 
32 | def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tokens: int) -> None:
33 |     embedding_dim = embed_weight.size(1)
34 |     avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True)
35 |     noise_weight = torch.empty_like(embed_weight[-num_new_tokens:])
36 |     noise_weight.normal_(mean=0, std=(1.0 / math.sqrt(embedding_dim)))
37 |     embed_weight[-num_new_tokens:] = avg_weight + noise_weight
38 | 
39 | 
40 | def resize_embedding_layer(model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer") -> None:
41 |     r"""
42 |     Resize token embeddings.
43 |     """
44 |     if is_deepspeed_zero3_enabled():
45 |         import deepspeed  # type: ignore
46 | 
47 |         params = [model.get_input_embeddings().weight]
48 |         if model.get_output_embeddings() is not None and not model.config.tie_word_embeddings:
49 |             params.append(model.get_output_embeddings().weight)
50 | 
51 |         context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
52 |     else:
53 |         context_maybe_zero3 = nullcontext()
54 | 
55 |     with context_maybe_zero3:
56 |         current_embedding_size = model.get_input_embeddings().weight.size(0)
57 | 
58 |     if len(tokenizer) > current_embedding_size:
59 |         if getattr(model, "quantization_method", None):
60 |             raise ValueError("Cannot resize embedding layers of a quantized model.")
61 | 
62 |         if not isinstance(model.get_output_embeddings(), torch.nn.Linear):
63 |             raise ValueError("Current model does not support resizing embedding layers.")
64 | 
65 |         model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)
66 |         with context_maybe_zero3:
67 |             new_embedding_size = model.get_input_embeddings().weight.size(0)
68 |             num_new_tokens = new_embedding_size - current_embedding_size
69 |             _noisy_mean_initialization(model.get_input_embeddings().weight.data, num_new_tokens)
70 |             _noisy_mean_initialization(model.get_output_embeddings().weight.data, num_new_tokens)
71 | 
72 |         logger.info_rank0(f"Resized token embeddings from {current_embedding_size} to {new_embedding_size}.")
73 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/liger_kernel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import inspect
16 | from typing import TYPE_CHECKING
17 | 
18 | from ...extras import logging
19 | 
20 | 
21 | if TYPE_CHECKING:
22 |     from transformers import PretrainedConfig
23 | 
24 |     from ...hparams import ModelArguments
25 | 
26 | 
27 | logger = logging.get_logger(__name__)
28 | 
29 | 
30 | def apply_liger_kernel(
31 |     config: "PretrainedConfig",
32 |     model_args: "ModelArguments",
33 |     is_trainable: bool,
34 |     require_logits: bool,
35 | ) -> None:
36 |     if not is_trainable or not model_args.enable_liger_kernel:
37 |         return
38 | 
39 |     model_type = getattr(config, "model_type", None)
40 |     if model_type == "gemma":
41 |         from liger_kernel.transformers import apply_liger_kernel_to_gemma as apply_liger_kernel
42 |     elif model_type == "gemma2":
43 |         from liger_kernel.transformers import apply_liger_kernel_to_gemma2 as apply_liger_kernel
44 |     elif model_type == "llama":
45 |         from liger_kernel.transformers import apply_liger_kernel_to_llama as apply_liger_kernel
46 |     elif model_type == "mistral":
47 |         from liger_kernel.transformers import apply_liger_kernel_to_mistral as apply_liger_kernel
48 |     elif model_type == "mixtral":
49 |         from liger_kernel.transformers import apply_liger_kernel_to_mixtral as apply_liger_kernel
50 |     elif model_type == "phi3":
51 |         from liger_kernel.transformers import apply_liger_kernel_to_phi3 as apply_liger_kernel
52 |     elif model_type == "qwen2":
53 |         from liger_kernel.transformers import apply_liger_kernel_to_qwen2 as apply_liger_kernel
54 |     elif model_type == "qwen2_vl":
55 |         from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl as apply_liger_kernel
56 |     else:
57 |         logger.warning_rank0("Current model does not support liger kernel.")
58 |         return
59 | 
60 |     if require_logits and "fused_linear_cross_entropy" in inspect.signature(apply_liger_kernel).parameters:
61 |         logger.info_rank0("Current training stage does not support chunked cross entropy.")
62 |         kwargs = {"fused_linear_cross_entropy": False}
63 |     else:
64 |         kwargs = {}
65 | 
66 |     apply_liger_kernel(**kwargs)
67 |     logger.info_rank0("Liger kernel has been applied to the model.")
68 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/mod.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from ...extras.constants import MOD_SUPPORTED_MODELS
18 | 
19 | 
20 | if TYPE_CHECKING:
21 |     from transformers import PretrainedConfig, PreTrainedModel
22 | 
23 |     from ...hparams import ModelArguments
24 | 
25 | 
26 | def load_mod_pretrained_model(**init_kwargs) -> "PreTrainedModel":
27 |     from MoD import AutoMoDModelForCausalLM
28 | 
29 |     return AutoMoDModelForCausalLM.from_pretrained(**init_kwargs)
30 | 
31 | 
32 | def convert_pretrained_model_to_mod(
33 |     model: "PreTrainedModel", config: "PretrainedConfig", model_args: "ModelArguments"
34 | ) -> "PreTrainedModel":
35 |     from MoD import apply_mod_to_hf
36 | 
37 |     if getattr(config, "model_type", None) not in MOD_SUPPORTED_MODELS:
38 |         raise ValueError("Current model is not supported by mixture-of-depth.")
39 | 
40 |     model = apply_mod_to_hf(model)
41 |     model = model.to(model_args.compute_dtype)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/moe.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Sequence
16 | 
17 | import torch
18 | from transformers.integrations import is_deepspeed_zero3_enabled
19 | from transformers.utils.versions import require_version
20 | 
21 | 
22 | if TYPE_CHECKING:
23 |     from transformers import PretrainedConfig, PreTrainedModel
24 | 
25 |     from ...hparams import ModelArguments
26 | 
27 | 
28 | def _set_z3_leaf_modules(model: "PreTrainedModel", leaf_modules: Sequence["torch.nn.Module"]) -> None:
29 |     require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0")
30 |     from deepspeed.utils import set_z3_leaf_modules  # type: ignore
31 | 
32 |     set_z3_leaf_modules(model, leaf_modules)
33 | 
34 | 
35 | def add_z3_leaf_module(model: "PreTrainedModel") -> None:
36 |     r"""
37 |     Sets module as a leaf module to skip partitioning in deepspeed zero3.
38 |     """
39 |     if not is_deepspeed_zero3_enabled():
40 |         return
41 | 
42 |     model_type = getattr(model.config, "model_type", None)
43 |     if model_type == "dbrx":
44 |         from transformers.models.dbrx.modeling_dbrx import DbrxFFN
45 | 
46 |         _set_z3_leaf_modules(model, [DbrxFFN])
47 | 
48 |     if model_type == "jamba":
49 |         from transformers.models.jamba.modeling_jamba import JambaSparseMoeBlock
50 | 
51 |         _set_z3_leaf_modules(model, [JambaSparseMoeBlock])
52 | 
53 |     if model_type == "jetmoe":
54 |         from transformers.models.jetmoe.modeling_jetmoe import JetMoeMoA, JetMoeMoE
55 | 
56 |         _set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])
57 | 
58 |     if model_type == "mixtral":
59 |         from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
60 | 
61 |         _set_z3_leaf_modules(model, [MixtralSparseMoeBlock])
62 | 
63 |     if model_type == "qwen2moe":
64 |         from transformers.models.qwen2_moe.modeling_qwen2_moe import Qwen2MoeSparseMoeBlock
65 | 
66 |         _set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
67 | 
68 | 
69 | def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
70 |     model_type = getattr(config, "model_type", None)
71 |     if model_args.moe_aux_loss_coef is not None:
72 |         if model_type in ["jamba", "mixtral", "qwen2_moe"]:
73 |             setattr(config, "router_aux_loss_coef", model_args.moe_aux_loss_coef)
74 | 
75 |         elif model_type == "deepseek":
76 |             setattr(config, "aux_loss_alpha", model_args.moe_aux_loss_coef)
77 | 
78 |         elif model_type == "jetmoe":
79 |             setattr(config, "aux_loss_coef", model_args.moe_aux_loss_coef)
80 | 
81 |     if model_type in ["dbrx", "jamba", "jetmoe", "mixtral", "qwen2_moe"]:
82 |         setattr(config, "output_router_logits", is_trainable)
83 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/rope.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 LMSYS and the LlamaFactory team.
 2 | # Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
 3 | #
 4 | # This code is inspired by the LMSYS's FastChat library.
 5 | # https://github.com/lm-sys/FastChat/blob/v0.2.30/fastchat/train/train.py
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | import math
20 | from typing import TYPE_CHECKING
21 | 
22 | from ...extras import logging
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from transformers import PretrainedConfig
27 | 
28 |     from ...hparams import ModelArguments
29 | 
30 | 
31 | logger = logging.get_logger(__name__)
32 | 
33 | 
34 | def configure_rope(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
35 |     if model_args.rope_scaling is None:
36 |         return
37 | 
38 |     if not hasattr(config, "rope_scaling"):
39 |         logger.warning_rank0("Current model does not support RoPE scaling.")
40 |         return
41 | 
42 |     if model_args.model_max_length is not None:
43 |         if is_trainable and model_args.rope_scaling == "dynamic":
44 |             logger.warning_rank0(
45 |                 "Dynamic NTK scaling may not work well with fine-tuning. "
46 |                 "See: https://github.com/huggingface/transformers/pull/24653"
47 |             )
48 | 
49 |         current_max_length = getattr(config, "max_position_embeddings", None)
50 |         if current_max_length and model_args.model_max_length > current_max_length:
51 |             logger.info_rank0(f"Enlarge max model length from {current_max_length} to {model_args.model_max_length}.")
52 |             setattr(config, "max_position_embeddings", model_args.model_max_length)
53 |             scaling_factor = float(math.ceil(model_args.model_max_length / current_max_length))
54 |         else:
55 |             logger.warning_rank0("Input length is smaller than max length. Consider increase input length.")
56 |             scaling_factor = 1.0
57 |     else:
58 |         scaling_factor = 2.0
59 | 
60 |     setattr(config, "rope_scaling", {"type": model_args.rope_scaling, "factor": scaling_factor})
61 |     logger.info_rank0(
62 |         f"Using {model_args.rope_scaling} scaling strategy and setting scaling factor to {scaling_factor}"
63 |     )
64 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/model/model_utils/valuehead.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict
16 | 
17 | import torch
18 | from transformers.utils import cached_file
19 | 
20 | from ...extras import logging
21 | from ...extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
22 | 
23 | 
24 | if TYPE_CHECKING:
25 |     from transformers import PreTrainedModel
26 | 
27 |     from ...hparams import ModelArguments
28 | 
29 | 
30 | logger = logging.get_logger(__name__)
31 | 
32 | 
33 | def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> Dict[str, torch.Tensor]:
34 |     r"""
35 |     Loads value head parameters from Hugging Face Hub or local disk.
36 | 
37 |     Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`.
38 |     """
39 |     kwargs = {"path_or_repo_id": path_or_repo_id, "cache_dir": model_args.cache_dir, "token": model_args.hf_hub_token}
40 |     err_text = ""
41 | 
42 |     try:
43 |         from safetensors import safe_open
44 | 
45 |         vhead_file = cached_file(filename=V_HEAD_SAFE_WEIGHTS_NAME, **kwargs)
46 |         with safe_open(vhead_file, framework="pt", device="cpu") as f:
47 |             return {key: f.get_tensor(key) for key in f.keys()}
48 |     except Exception as err:
49 |         err_text = str(err)
50 | 
51 |     try:
52 |         vhead_file = cached_file(filename=V_HEAD_WEIGHTS_NAME, **kwargs)
53 |         return torch.load(vhead_file, map_location="cpu")
54 |     except Exception as err:
55 |         err_text = str(err)
56 | 
57 |     logger.info_rank0(f"Provided path ({path_or_repo_id}) does not contain value head weights: {err_text}.")
58 |     logger.info_rank0("Ignore the above message if you are not resuming the training of a value head model.")
59 |     return None
60 | 
61 | 
62 | def prepare_valuehead_model(model: "PreTrainedModel") -> None:
63 |     if getattr(model.config, "model_type", None) == "llava":
64 |         setattr(model, "lm_head", model.language_model.get_output_embeddings())
65 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
66 | 
67 |     if getattr(model.config, "model_type", None) == "chatglm":
68 |         setattr(model, "lm_head", model.transformer.output_layer)
69 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
70 | 
71 |     if getattr(model.config, "model_type", None) == "internlm2":
72 |         setattr(model, "lm_head", model.output)
73 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
74 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/train/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/dpo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_dpo
16 | 
17 | 
18 | __all__ = ["run_dpo"]
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/kto/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_kto
16 | 
17 | 
18 | __all__ = ["run_kto"]
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_ppo
16 | 
17 | 
18 | __all__ = ["run_ppo"]
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/pt/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_pt
16 | 
17 | 
18 | __all__ = ["run_pt"]
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/pt/trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from types import MethodType
16 | from typing import TYPE_CHECKING, Optional
17 | 
18 | from transformers import Trainer
19 | from typing_extensions import override
20 | 
21 | from ...extras.packages import is_transformers_version_equal_to_4_46
22 | from ..callbacks import PissaConvertCallback, SaveProcessorCallback
23 | from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     import torch
28 |     from transformers import ProcessorMixin
29 | 
30 |     from ...hparams import FinetuningArguments
31 | 
32 | 
33 | class CustomTrainer(Trainer):
34 |     r"""
35 |     Inherits Trainer for custom optimizer.
36 |     """
37 | 
38 |     def __init__(
39 |         self, finetuning_args: "FinetuningArguments", processor: Optional["ProcessorMixin"], **kwargs
40 |     ) -> None:
41 |         super().__init__(**kwargs)
42 |         self.finetuning_args = finetuning_args
43 | 
44 |         if processor is not None:
45 |             self.add_callback(SaveProcessorCallback(processor))
46 | 
47 |         if finetuning_args.pissa_convert:
48 |             self.add_callback(PissaConvertCallback)
49 | 
50 |         if finetuning_args.use_badam:
51 |             from badam import BAdamCallback, clip_grad_norm_old_version  # type: ignore
52 | 
53 |             self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator)
54 |             self.add_callback(BAdamCallback)
55 | 
56 |     @override
57 |     def create_optimizer(self) -> "torch.optim.Optimizer":
58 |         if self.optimizer is None:
59 |             self.optimizer = create_custom_optimizer(self.model, self.args, self.finetuning_args)
60 |         return super().create_optimizer()
61 | 
62 |     @override
63 |     def create_scheduler(
64 |         self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
65 |     ) -> "torch.optim.lr_scheduler.LRScheduler":
66 |         create_custom_scheduler(self.args, num_training_steps, optimizer)
67 |         return super().create_scheduler(num_training_steps, optimizer)
68 | 
69 |     @override
70 |     def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
71 |         r"""
72 |         Fixes the loss value for transformers 4.46.0.
73 |         https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/trainer.py#L3605
74 |         """
75 |         loss = super().compute_loss(model, inputs, return_outputs, **kwargs)
76 |         if is_transformers_version_equal_to_4_46() and not getattr(self, "model_accepts_loss_kwargs", False):
77 |             # other model should not scale the loss
78 |             if return_outputs:
79 |                 return (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])
80 |             else:
81 |                 return loss / self.args.gradient_accumulation_steps
82 | 
83 |         return loss
84 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/pt/workflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import math
19 | from typing import TYPE_CHECKING, List, Optional
20 | 
21 | from transformers import DataCollatorForLanguageModeling
22 | 
23 | from ...data import get_dataset, get_template_and_fix_tokenizer
24 | from ...extras.ploting import plot_loss
25 | from ...model import load_model, load_tokenizer
26 | from ..trainer_utils import create_modelcard_and_push
27 | from .trainer import CustomTrainer
28 | 
29 | 
30 | if TYPE_CHECKING:
31 |     from transformers import Seq2SeqTrainingArguments, TrainerCallback
32 | 
33 |     from ...hparams import DataArguments, FinetuningArguments, ModelArguments
34 | 
35 | 
36 | def run_pt(
37 |     model_args: "ModelArguments",
38 |     data_args: "DataArguments",
39 |     training_args: "Seq2SeqTrainingArguments",
40 |     finetuning_args: "FinetuningArguments",
41 |     callbacks: Optional[List["TrainerCallback"]] = None,
42 | ):
43 |     tokenizer_module = load_tokenizer(model_args)
44 |     tokenizer = tokenizer_module["tokenizer"]
45 |     template = get_template_and_fix_tokenizer(tokenizer, data_args)
46 |     dataset_module = get_dataset(template, model_args, data_args, training_args, stage="pt", **tokenizer_module)
47 |     model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
48 |     data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
49 | 
50 |     # Initialize our Trainer
51 |     trainer = CustomTrainer(
52 |         model=model,
53 |         args=training_args,
54 |         finetuning_args=finetuning_args,
55 |         data_collator=data_collator,
56 |         callbacks=callbacks,
57 |         **dataset_module,
58 |         **tokenizer_module,
59 |     )
60 | 
61 |     # Training
62 |     if training_args.do_train:
63 |         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
64 |         trainer.save_model()
65 |         trainer.log_metrics("train", train_result.metrics)
66 |         trainer.save_metrics("train", train_result.metrics)
67 |         trainer.save_state()
68 |         if trainer.is_world_process_zero() and finetuning_args.plot_loss:
69 |             plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
70 | 
71 |     # Evaluation
72 |     if training_args.do_eval:
73 |         metrics = trainer.evaluate(metric_key_prefix="eval")
74 |         try:
75 |             perplexity = math.exp(metrics["eval_loss"])
76 |         except OverflowError:
77 |             perplexity = float("inf")
78 | 
79 |         metrics["perplexity"] = perplexity
80 |         trainer.log_metrics("eval", metrics)
81 |         trainer.save_metrics("eval", metrics)
82 | 
83 |     # Create model card
84 |     create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
85 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/rm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_rm
16 | 
17 | 
18 | __all__ = ["run_rm"]
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/rm/metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import TYPE_CHECKING, Dict, Optional
17 | 
18 | import numpy as np
19 | 
20 | from ...extras.misc import numpify
21 | 
22 | 
23 | if TYPE_CHECKING:
24 |     from transformers import EvalPrediction
25 | 
26 | 
27 | @dataclass
28 | class ComputeAccuracy:
29 |     r"""
30 |     Computes reward accuracy and supports `batch_eval_metrics`.
31 |     """
32 | 
33 |     def _dump(self) -> Optional[Dict[str, float]]:
34 |         result = None
35 |         if hasattr(self, "score_dict"):
36 |             result = {k: float(np.mean(v)) for k, v in self.score_dict.items()}
37 | 
38 |         self.score_dict = {"accuracy": []}
39 |         return result
40 | 
41 |     def __post_init__(self):
42 |         self._dump()
43 | 
44 |     def __call__(self, eval_preds: "EvalPrediction", compute_result: bool = True) -> Optional[Dict[str, float]]:
45 |         chosen_scores, rejected_scores = numpify(eval_preds.predictions[0]), numpify(eval_preds.predictions[1])
46 |         if not chosen_scores.shape:
47 |             self.score_dict["accuracy"].append(chosen_scores > rejected_scores)
48 |         else:
49 |             for i in range(len(chosen_scores)):
50 |                 self.score_dict["accuracy"].append(chosen_scores[i] > rejected_scores[i])
51 | 
52 |         if compute_result:
53 |             return self._dump()
54 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/train/sft/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_sft
16 | 
17 | 
18 | __all__ = ["run_sft"]
19 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/LLaMA-Factory/src/llamafactory/webui/__init__.py


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/components/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .chatbot import create_chat_box
16 | from .eval import create_eval_tab
17 | from .export import create_export_tab
18 | from .infer import create_infer_tab
19 | from .top import create_top
20 | from .train import create_train_tab
21 | 
22 | 
23 | __all__ = [
24 |     "create_chat_box",
25 |     "create_eval_tab",
26 |     "create_export_tab",
27 |     "create_infer_tab",
28 |     "create_top",
29 |     "create_train_tab",
30 | ]
31 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/components/chatbot.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict, Tuple
16 | 
17 | from ...data import Role
18 | from ...extras.packages import is_gradio_available
19 | from ..utils import check_json_schema
20 | 
21 | 
22 | if is_gradio_available():
23 |     import gradio as gr
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from gradio.components import Component
28 | 
29 |     from ..engine import Engine
30 | 
31 | 
32 | def create_chat_box(
33 |     engine: "Engine", visible: bool = False
34 | ) -> Tuple["Component", "Component", Dict[str, "Component"]]:
35 |     with gr.Column(visible=visible) as chat_box:
36 |         chatbot = gr.Chatbot(show_copy_button=True)
37 |         messages = gr.State([])
38 |         with gr.Row():
39 |             with gr.Column(scale=4):
40 |                 with gr.Row():
41 |                     with gr.Column():
42 |                         role = gr.Dropdown(choices=[Role.USER.value, Role.OBSERVATION.value], value=Role.USER.value)
43 |                         system = gr.Textbox(show_label=False)
44 |                         tools = gr.Textbox(show_label=False, lines=3)
45 | 
46 |                     with gr.Column() as mm_box:
47 |                         with gr.Tab("Image"):
48 |                             image = gr.Image(sources=["upload"], type="pil")
49 | 
50 |                         with gr.Tab("Video"):
51 |                             video = gr.Video(sources=["upload"])
52 | 
53 |                 query = gr.Textbox(show_label=False, lines=8)
54 |                 submit_btn = gr.Button(variant="primary")
55 | 
56 |             with gr.Column(scale=1):
57 |                 max_new_tokens = gr.Slider(minimum=8, maximum=4096, value=512, step=1)
58 |                 top_p = gr.Slider(minimum=0.01, maximum=1.0, value=0.7, step=0.01)
59 |                 temperature = gr.Slider(minimum=0.01, maximum=1.5, value=0.95, step=0.01)
60 |                 clear_btn = gr.Button()
61 | 
62 |     tools.input(check_json_schema, inputs=[tools, engine.manager.get_elem_by_id("top.lang")])
63 | 
64 |     submit_btn.click(
65 |         engine.chatter.append,
66 |         [chatbot, messages, role, query],
67 |         [chatbot, messages, query],
68 |     ).then(
69 |         engine.chatter.stream,
70 |         [chatbot, messages, system, tools, image, video, max_new_tokens, top_p, temperature],
71 |         [chatbot, messages],
72 |     )
73 |     clear_btn.click(lambda: ([], []), outputs=[chatbot, messages])
74 | 
75 |     return (
76 |         chatbot,
77 |         messages,
78 |         dict(
79 |             chat_box=chat_box,
80 |             role=role,
81 |             system=system,
82 |             tools=tools,
83 |             mm_box=mm_box,
84 |             image=image,
85 |             video=video,
86 |             query=query,
87 |             submit_btn=submit_btn,
88 |             max_new_tokens=max_new_tokens,
89 |             top_p=top_p,
90 |             temperature=temperature,
91 |             clear_btn=clear_btn,
92 |         ),
93 |     )
94 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/components/infer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict
16 | 
17 | from ...extras.packages import is_gradio_available
18 | from ..common import get_visual
19 | from .chatbot import create_chat_box
20 | 
21 | 
22 | if is_gradio_available():
23 |     import gradio as gr
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from gradio.components import Component
28 | 
29 |     from ..engine import Engine
30 | 
31 | 
32 | def create_infer_tab(engine: "Engine") -> Dict[str, "Component"]:
33 |     input_elems = engine.manager.get_base_elems()
34 |     elem_dict = dict()
35 | 
36 |     with gr.Row():
37 |         infer_backend = gr.Dropdown(choices=["huggingface", "vllm"], value="huggingface")
38 |         infer_dtype = gr.Dropdown(choices=["auto", "float16", "bfloat16", "float32"], value="auto")
39 | 
40 |     with gr.Row():
41 |         load_btn = gr.Button()
42 |         unload_btn = gr.Button()
43 | 
44 |     info_box = gr.Textbox(show_label=False, interactive=False)
45 | 
46 |     input_elems.update({infer_backend, infer_dtype})
47 |     elem_dict.update(
48 |         dict(
49 |             infer_backend=infer_backend,
50 |             infer_dtype=infer_dtype,
51 |             load_btn=load_btn,
52 |             unload_btn=unload_btn,
53 |             info_box=info_box,
54 |         )
55 |     )
56 | 
57 |     chatbot, messages, chat_elems = create_chat_box(engine, visible=False)
58 |     elem_dict.update(chat_elems)
59 | 
60 |     load_btn.click(engine.chatter.load_model, input_elems, [info_box]).then(
61 |         lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]]
62 |     )
63 | 
64 |     unload_btn.click(engine.chatter.unload_model, input_elems, [info_box]).then(
65 |         lambda: ([], []), outputs=[chatbot, messages]
66 |     ).then(lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]])
67 | 
68 |     engine.manager.get_elem_by_id("top.model_name").change(
69 |         lambda model_name: gr.Column(visible=get_visual(model_name)),
70 |         [engine.manager.get_elem_by_id("top.model_name")],
71 |         [chat_elems["mm_box"]],
72 |     )
73 | 
74 |     return elem_dict
75 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/components/top.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict
16 | 
17 | from ...data import TEMPLATES
18 | from ...extras.constants import METHODS, SUPPORTED_MODELS
19 | from ...extras.packages import is_gradio_available
20 | from ..common import get_model_info, list_checkpoints, save_config
21 | from ..utils import can_quantize, can_quantize_to
22 | 
23 | 
24 | if is_gradio_available():
25 |     import gradio as gr
26 | 
27 | 
28 | if TYPE_CHECKING:
29 |     from gradio.components import Component
30 | 
31 | 
32 | def create_top() -> Dict[str, "Component"]:
33 |     available_models = list(SUPPORTED_MODELS.keys()) + ["Custom"]
34 | 
35 |     with gr.Row():
36 |         lang = gr.Dropdown(choices=["en", "ru", "zh", "ko"], scale=1)
37 |         model_name = gr.Dropdown(choices=available_models, scale=3)
38 |         model_path = gr.Textbox(scale=3)
39 | 
40 |     with gr.Row():
41 |         finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
42 |         checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6)
43 | 
44 |     with gr.Row():
45 |         quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=2)
46 |         quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=2)
47 |         template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2)
48 |         rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3)
49 |         booster = gr.Radio(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto", scale=5)
50 | 
51 |     model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then(
52 |         list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
53 |     )
54 |     model_name.input(save_config, inputs=[lang, model_name], queue=False)
55 |     model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False)
56 |     finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False).then(
57 |         list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
58 |     )
59 |     checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False)
60 |     quantization_method.change(can_quantize_to, [quantization_method], [quantization_bit], queue=False)
61 | 
62 |     return dict(
63 |         lang=lang,
64 |         model_name=model_name,
65 |         model_path=model_path,
66 |         finetuning_type=finetuning_type,
67 |         checkpoint_path=checkpoint_path,
68 |         quantization_bit=quantization_bit,
69 |         quantization_method=quantization_method,
70 |         template=template,
71 |         rope_scaling=rope_scaling,
72 |         booster=booster,
73 |     )
74 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/css.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | CSS = r"""
16 | .duplicate-button {
17 |   margin: auto !important;
18 |   color: white !important;
19 |   background: black !important;
20 |   border-radius: 100vh !important;
21 | }
22 | 
23 | .modal-box {
24 |   position: fixed !important;
25 |   top: 50%;
26 |   left: 50%;
27 |   transform: translate(-50%, -50%); /* center horizontally */
28 |   max-width: 1000px;
29 |   max-height: 750px;
30 |   overflow-y: auto;
31 |   background-color: var(--input-background-fill);
32 |   flex-wrap: nowrap !important;
33 |   border: 2px solid black !important;
34 |   z-index: 1000;
35 |   padding: 10px;
36 | }
37 | 
38 | .dark .modal-box {
39 |   border: 2px solid white !important;
40 | }
41 | """
42 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/engine.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Any, Dict
16 | 
17 | from .chatter import WebChatModel
18 | from .common import load_config
19 | from .locales import LOCALES
20 | from .manager import Manager
21 | from .runner import Runner
22 | from .utils import create_ds_config, get_time
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from gradio.components import Component
27 | 
28 | 
29 | class Engine:
30 |     def __init__(self, demo_mode: bool = False, pure_chat: bool = False) -> None:
31 |         self.demo_mode = demo_mode
32 |         self.pure_chat = pure_chat
33 |         self.manager = Manager()
34 |         self.runner = Runner(self.manager, demo_mode)
35 |         self.chatter = WebChatModel(self.manager, demo_mode, lazy_init=(not pure_chat))
36 |         if not demo_mode:
37 |             create_ds_config()
38 | 
39 |     def _update_component(self, input_dict: Dict[str, Dict[str, Any]]) -> Dict["Component", "Component"]:
40 |         r"""
41 |         Gets the dict to update the components.
42 |         """
43 |         output_dict: Dict["Component", "Component"] = {}
44 |         for elem_id, elem_attr in input_dict.items():
45 |             elem = self.manager.get_elem_by_id(elem_id)
46 |             output_dict[elem] = elem.__class__(**elem_attr)
47 | 
48 |         return output_dict
49 | 
50 |     def resume(self):
51 |         user_config = load_config() if not self.demo_mode else {}
52 |         lang = user_config.get("lang", None) or "en"
53 | 
54 |         init_dict = {"top.lang": {"value": lang}, "infer.chat_box": {"visible": self.chatter.loaded}}
55 | 
56 |         if not self.pure_chat:
57 |             current_time = get_time()
58 |             init_dict["train.current_time"] = {"value": current_time}
59 |             init_dict["train.output_dir"] = {"value": f"train_{current_time}"}
60 |             init_dict["train.config_path"] = {"value": f"{current_time}.yaml"}
61 |             init_dict["eval.output_dir"] = {"value": f"eval_{current_time}"}
62 |             init_dict["infer.mm_box"] = {"visible": False}
63 | 
64 |             if user_config.get("last_model", None):
65 |                 init_dict["top.model_name"] = {"value": user_config["last_model"]}
66 | 
67 |         yield self._update_component(init_dict)
68 | 
69 |         if self.runner.running and not self.demo_mode and not self.pure_chat:
70 |             yield {elem: elem.__class__(value=value) for elem, value in self.runner.running_data.items()}
71 |             if self.runner.do_train:
72 |                 yield self._update_component({"train.resume_btn": {"value": True}})
73 |             else:
74 |                 yield self._update_component({"eval.resume_btn": {"value": True}})
75 | 
76 |     def change_lang(self, lang: str):
77 |         return {
78 |             elem: elem.__class__(**LOCALES[elem_name][lang])
79 |             for elem_name, elem in self.manager.get_elem_iter()
80 |             if elem_name in LOCALES
81 |         }
82 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/llamafactory/webui/manager.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict, Generator, List, Set, Tuple
16 | 
17 | 
18 | if TYPE_CHECKING:
19 |     from gradio.components import Component
20 | 
21 | 
22 | class Manager:
23 |     def __init__(self) -> None:
24 |         self._id_to_elem: Dict[str, "Component"] = {}
25 |         self._elem_to_id: Dict["Component", str] = {}
26 | 
27 |     def add_elems(self, tab_name: str, elem_dict: Dict[str, "Component"]) -> None:
28 |         r"""
29 |         Adds elements to manager.
30 |         """
31 |         for elem_name, elem in elem_dict.items():
32 |             elem_id = f"{tab_name}.{elem_name}"
33 |             self._id_to_elem[elem_id] = elem
34 |             self._elem_to_id[elem] = elem_id
35 | 
36 |     def get_elem_list(self) -> List["Component"]:
37 |         r"""
38 |         Returns the list of all elements.
39 |         """
40 |         return list(self._id_to_elem.values())
41 | 
42 |     def get_elem_iter(self) -> Generator[Tuple[str, "Component"], None, None]:
43 |         r"""
44 |         Returns an iterator over all elements with their names.
45 |         """
46 |         for elem_id, elem in self._id_to_elem.items():
47 |             yield elem_id.split(".")[-1], elem
48 | 
49 |     def get_elem_by_id(self, elem_id: str) -> "Component":
50 |         r"""
51 |         Gets element by id.
52 | 
53 |         Example: top.lang, train.dataset
54 |         """
55 |         return self._id_to_elem[elem_id]
56 | 
57 |     def get_id_by_elem(self, elem: "Component") -> str:
58 |         r"""
59 |         Gets id by element.
60 |         """
61 |         return self._elem_to_id[elem]
62 | 
63 |     def get_base_elems(self) -> Set["Component"]:
64 |         r"""
65 |         Gets the base elements that are commonly used.
66 |         """
67 |         return {
68 |             self._id_to_elem["top.lang"],
69 |             self._id_to_elem["top.model_name"],
70 |             self._id_to_elem["top.model_path"],
71 |             self._id_to_elem["top.finetuning_type"],
72 |             self._id_to_elem["top.checkpoint_path"],
73 |             self._id_to_elem["top.quantization_bit"],
74 |             self._id_to_elem["top.quantization_method"],
75 |             self._id_to_elem["top.template"],
76 |             self._id_to_elem["top.rope_scaling"],
77 |             self._id_to_elem["top.booster"],
78 |         }
79 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp
16 | 
17 | 
18 | def main():
19 |     run_exp()
20 | 
21 | 
22 | def _mp_fn(index):
23 |     # For xla_spawn (TPUs)
24 |     run_exp()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/src/webui.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.webui.interface import create_ui
18 | 
19 | 
20 | def main():
21 |     gradio_ipv6 = os.getenv("GRADIO_IPV6", "0").lower() in ["true", "1"]
22 |     gradio_share = os.getenv("GRADIO_SHARE", "0").lower() in ["true", "1"]
23 |     server_name = os.getenv("GRADIO_SERVER_NAME", "[::]" if gradio_ipv6 else "0.0.0.0")
24 |     create_ui().queue().launch(share=gradio_share, server_name=server_name, inbrowser=True)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/data/processors/test_feedback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import random
17 | 
18 | import pytest
19 | from datasets import load_dataset
20 | from transformers import AutoTokenizer
21 | 
22 | from llamafactory.extras.constants import IGNORE_INDEX
23 | from llamafactory.train.test_utils import load_train_dataset
24 | 
25 | 
26 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
27 | 
28 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
29 | 
30 | TRAIN_ARGS = {
31 |     "model_name_or_path": TINY_LLAMA,
32 |     "stage": "kto",
33 |     "do_train": True,
34 |     "finetuning_type": "full",
35 |     "dataset": "kto_en_demo",
36 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
37 |     "template": "llama3",
38 |     "cutoff_len": 8192,
39 |     "overwrite_cache": True,
40 |     "output_dir": "dummy_dir",
41 |     "overwrite_output_dir": True,
42 |     "fp16": True,
43 | }
44 | 
45 | 
46 | @pytest.mark.parametrize("num_samples", [16])
47 | def test_feedback_data(num_samples: int):
48 |     train_dataset = load_train_dataset(**TRAIN_ARGS)
49 |     ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
50 |     original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train")
51 |     indexes = random.choices(range(len(original_data)), k=num_samples)
52 |     for index in indexes:
53 |         messages = original_data["messages"][index]
54 |         ref_input_ids = ref_tokenizer.apply_chat_template(messages)
55 |         prompt_len = len(ref_tokenizer.apply_chat_template(messages[:-1], add_generation_prompt=True))
56 |         ref_labels = [IGNORE_INDEX] * prompt_len + ref_input_ids[prompt_len:]
57 |         assert train_dataset["input_ids"][index] == ref_input_ids
58 |         assert train_dataset["labels"][index] == ref_labels
59 |         assert train_dataset["kto_tags"][index] == original_data["label"][index]
60 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/data/processors/test_processor_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Tuple
16 | 
17 | import pytest
18 | 
19 | from llamafactory.data.processors.processor_utils import infer_seqlen
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     "test_input,test_output",
24 |     [
25 |         ((3000, 2000, 1000), (600, 400)),
26 |         ((2000, 3000, 1000), (400, 600)),
27 |         ((1000, 100, 1000), (900, 100)),
28 |         ((100, 1000, 1000), (100, 900)),
29 |         ((100, 500, 1000), (100, 500)),
30 |         ((500, 100, 1000), (500, 100)),
31 |         ((10, 10, 1000), (10, 10)),
32 |     ],
33 | )
34 | def test_infer_seqlen(test_input: Tuple[int, int, int], test_output: Tuple[int, int]):
35 |     assert test_output == infer_seqlen(*test_input)
36 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/data/processors/test_unsupervised.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import random
17 | 
18 | import pytest
19 | from datasets import load_dataset
20 | from transformers import AutoTokenizer
21 | 
22 | from llamafactory.train.test_utils import load_train_dataset
23 | 
24 | 
25 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
26 | 
27 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
28 | 
29 | TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
30 | 
31 | TRAIN_ARGS = {
32 |     "model_name_or_path": TINY_LLAMA,
33 |     "stage": "ppo",
34 |     "do_train": True,
35 |     "finetuning_type": "full",
36 |     "reward_model": "",
37 |     "reward_model_type": "full",
38 |     "dataset": "system_chat",
39 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
40 |     "template": "llama3",
41 |     "cutoff_len": 8192,
42 |     "overwrite_cache": True,
43 |     "output_dir": "dummy_dir",
44 |     "overwrite_output_dir": True,
45 |     "fp16": True,
46 | }
47 | 
48 | 
49 | @pytest.mark.parametrize("num_samples", [16])
50 | def test_unsupervised_data(num_samples: int):
51 |     train_dataset = load_train_dataset(**TRAIN_ARGS)
52 |     ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
53 |     original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
54 |     indexes = random.choices(range(len(original_data)), k=num_samples)
55 |     for index in indexes:
56 |         messages = original_data["messages"][index]
57 |         ref_ids = ref_tokenizer.apply_chat_template(messages)
58 |         ref_input_ids = ref_tokenizer.apply_chat_template(messages[:-1], add_generation_prompt=True)
59 |         ref_labels = ref_ids[len(ref_input_ids) :]
60 |         assert train_dataset["input_ids"][index] == ref_input_ids
61 |         assert train_dataset["labels"][index] == ref_labels
62 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/data/test_collator.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | 
17 | from llamafactory.data.collator import prepare_4d_attention_mask
18 | 
19 | 
20 | def test_4d_attention_mask():
21 |     o = 0.0
22 |     x = torch.finfo(torch.float16).min
23 |     attention_mask_with_indices = torch.tensor(
24 |         [
25 |             [1, 1, 2, 2, 2, 0],
26 |             [1, 2, 2, 3, 3, 3],
27 |         ]
28 |     )
29 |     attention_mask_computed = prepare_4d_attention_mask(attention_mask_with_indices, torch.float16)
30 |     attention_mask_expected = torch.tensor(
31 |         [
32 |             [
33 |                 [
34 |                     [o, x, x, x, x, x],
35 |                     [o, o, x, x, x, x],
36 |                     [x, x, o, x, x, x],
37 |                     [x, x, o, o, x, x],
38 |                     [x, x, o, o, o, x],
39 |                     [x, x, x, x, x, x],
40 |                 ]
41 |             ],
42 |             [
43 |                 [
44 |                     [o, x, x, x, x, x],
45 |                     [x, o, x, x, x, x],
46 |                     [x, o, o, x, x, x],
47 |                     [x, x, x, o, x, x],
48 |                     [x, x, x, o, o, x],
49 |                     [x, x, x, o, o, o],
50 |                 ]
51 |             ],
52 |         ],
53 |         dtype=torch.float16,
54 |     )
55 |     assert list(attention_mask_computed.size()) == [2, 1, 6, 6]
56 |     assert torch.all(attention_mask_computed == attention_mask_expected)
57 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/e2e/test_chat.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.chat import ChatModel
18 | 
19 | 
20 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
21 | 
22 | INFER_ARGS = {
23 |     "model_name_or_path": TINY_LLAMA,
24 |     "finetuning_type": "lora",
25 |     "template": "llama3",
26 |     "infer_dtype": "float16",
27 |     "do_sample": False,
28 |     "max_new_tokens": 1,
29 | }
30 | 
31 | MESSAGES = [
32 |     {"role": "user", "content": "Hi"},
33 | ]
34 | 
35 | EXPECTED_RESPONSE = "_rho"
36 | 
37 | 
38 | def test_chat():
39 |     chat_model = ChatModel(INFER_ARGS)
40 |     assert chat_model.chat(MESSAGES)[0].response_text == EXPECTED_RESPONSE
41 | 
42 | 
43 | def test_stream_chat():
44 |     chat_model = ChatModel(INFER_ARGS)
45 |     response = ""
46 |     for token in chat_model.stream_chat(MESSAGES):
47 |         response += token
48 | 
49 |     assert response == EXPECTED_RESPONSE
50 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/e2e/test_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.tuner import export_model, run_exp
20 | 
21 | 
22 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
23 | 
24 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
25 | 
26 | TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
27 | 
28 | TRAIN_ARGS = {
29 |     "model_name_or_path": TINY_LLAMA,
30 |     "do_train": True,
31 |     "finetuning_type": "lora",
32 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
33 |     "template": "llama3",
34 |     "cutoff_len": 1,
35 |     "overwrite_cache": False,
36 |     "overwrite_output_dir": True,
37 |     "per_device_train_batch_size": 1,
38 |     "max_steps": 1,
39 | }
40 | 
41 | INFER_ARGS = {
42 |     "model_name_or_path": TINY_LLAMA,
43 |     "adapter_name_or_path": TINY_LLAMA_ADAPTER,
44 |     "finetuning_type": "lora",
45 |     "template": "llama3",
46 |     "infer_dtype": "float16",
47 | }
48 | 
49 | OS_NAME = os.getenv("OS_NAME", "")
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     "stage,dataset",
54 |     [
55 |         ("pt", "c4_demo"),
56 |         ("sft", "alpaca_en_demo"),
57 |         ("dpo", "dpo_en_demo"),
58 |         ("kto", "kto_en_demo"),
59 |         pytest.param("rm", "dpo_en_demo", marks=pytest.mark.xfail(OS_NAME.startswith("windows"), reason="OS error.")),
60 |     ],
61 | )
62 | def test_run_exp(stage: str, dataset: str):
63 |     output_dir = os.path.join("output", f"train_{stage}")
64 |     run_exp({"stage": stage, "dataset": dataset, "output_dir": output_dir, **TRAIN_ARGS})
65 |     assert os.path.exists(output_dir)
66 | 
67 | 
68 | def test_export():
69 |     export_dir = os.path.join("output", "llama3_export")
70 |     export_model({"export_dir": export_dir, **INFER_ARGS})
71 |     assert os.path.exists(export_dir)
72 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/eval/test_eval_template.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.eval.template import get_eval_template
16 | 
17 | 
18 | def test_eval_template_en():
19 |     support_set = [
20 |         {
21 |             "question": "Fewshot question",
22 |             "A": "Fewshot1",
23 |             "B": "Fewshot2",
24 |             "C": "Fewshot3",
25 |             "D": "Fewshot4",
26 |             "answer": "B",
27 |         }
28 |     ]
29 |     example = {
30 |         "question": "Target question",
31 |         "A": "Target1",
32 |         "B": "Target2",
33 |         "C": "Target3",
34 |         "D": "Target4",
35 |         "answer": "C",
36 |     }
37 |     template = get_eval_template(name="en")
38 |     messages = template.format_example(example, support_set=support_set, subject_name="SubName")
39 |     assert messages == [
40 |         {
41 |             "role": "user",
42 |             "content": (
43 |                 "The following are multiple choice questions (with answers) about SubName.\n\n"
44 |                 "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
45 |             ),
46 |         },
47 |         {"role": "assistant", "content": "B"},
48 |         {
49 |             "role": "user",
50 |             "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
51 |         },
52 |         {"role": "assistant", "content": "C"},
53 |     ]
54 | 
55 | 
56 | def test_eval_template_zh():
57 |     support_set = [
58 |         {
59 |             "question": "示例问题",
60 |             "A": "示例答案1",
61 |             "B": "示例答案2",
62 |             "C": "示例答案3",
63 |             "D": "示例答案4",
64 |             "answer": "B",
65 |         }
66 |     ]
67 |     example = {
68 |         "question": "目标问题",
69 |         "A": "目标答案1",
70 |         "B": "目标答案2",
71 |         "C": "目标答案3",
72 |         "D": "目标答案4",
73 |         "answer": "C",
74 |     }
75 |     template = get_eval_template(name="zh")
76 |     messages = template.format_example(example, support_set=support_set, subject_name="主题")
77 |     assert messages == [
78 |         {
79 |             "role": "user",
80 |             "content": (
81 |                 "以下是中国关于主题考试的单项选择题，请选出其中的正确答案。\n\n"
82 |                 "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案："
83 |             ),
84 |         },
85 |         {"role": "assistant", "content": "B"},
86 |         {
87 |             "role": "user",
88 |             "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案：",
89 |         },
90 |         {"role": "assistant", "content": "C"},
91 |     ]
92 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/model_utils/test_attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
18 | 
19 | from llamafactory.train.test_utils import load_infer_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | INFER_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA,
26 |     "template": "llama3",
27 | }
28 | 
29 | 
30 | def test_attention():
31 |     attention_available = ["disabled"]
32 |     if is_torch_sdpa_available():
33 |         attention_available.append("sdpa")
34 | 
35 |     if is_flash_attn_2_available():
36 |         attention_available.append("fa2")
37 | 
38 |     llama_attention_classes = {
39 |         "disabled": "LlamaAttention",
40 |         "sdpa": "LlamaSdpaAttention",
41 |         "fa2": "LlamaFlashAttention2",
42 |     }
43 |     for requested_attention in attention_available:
44 |         model = load_infer_model(flash_attn=requested_attention, **INFER_ARGS)
45 |         for module in model.modules():
46 |             if "Attention" in module.__class__.__name__:
47 |                 assert module.__class__.__name__ == llama_attention_classes[requested_attention]
48 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/model_utils/test_checkpointing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.extras.misc import get_current_device
20 | from llamafactory.train.test_utils import load_train_model
21 | 
22 | 
23 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
24 | 
25 | TRAIN_ARGS = {
26 |     "model_name_or_path": TINY_LLAMA,
27 |     "stage": "sft",
28 |     "do_train": True,
29 |     "finetuning_type": "lora",
30 |     "lora_target": "all",
31 |     "dataset": "llamafactory/tiny-supervised-dataset",
32 |     "dataset_dir": "ONLINE",
33 |     "template": "llama3",
34 |     "cutoff_len": 1024,
35 |     "overwrite_cache": True,
36 |     "output_dir": "dummy_dir",
37 |     "overwrite_output_dir": True,
38 |     "fp16": True,
39 | }
40 | 
41 | 
42 | def test_checkpointing_enable():
43 |     model = load_train_model(disable_gradient_checkpointing=False, **TRAIN_ARGS)
44 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
45 |         assert getattr(module, "gradient_checkpointing") is True
46 | 
47 | 
48 | def test_checkpointing_disable():
49 |     model = load_train_model(disable_gradient_checkpointing=True, **TRAIN_ARGS)
50 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
51 |         assert getattr(module, "gradient_checkpointing") is False
52 | 
53 | 
54 | def test_unsloth_gradient_checkpointing():
55 |     model = load_train_model(use_unsloth_gc=True, **TRAIN_ARGS)
56 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
57 |         assert module._gradient_checkpointing_func.__self__.__name__ == "UnslothGradientCheckpointing"
58 | 
59 | 
60 | def test_upcast_layernorm():
61 |     model = load_train_model(upcast_layernorm=True, **TRAIN_ARGS)
62 |     for name, param in model.named_parameters():
63 |         if param.ndim == 1 and "norm" in name:
64 |             assert param.dtype == torch.float32
65 | 
66 | 
67 | def test_upcast_lmhead_output():
68 |     model = load_train_model(upcast_lmhead_output=True, **TRAIN_ARGS)
69 |     inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device())
70 |     outputs: "torch.Tensor" = model.get_output_embeddings()(inputs)
71 |     assert outputs.dtype == torch.float32
72 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/model_utils/test_packing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | import torch
17 | 
18 | from llamafactory.model.model_utils.packing import get_seqlens_in_batch, get_unpad_data
19 | 
20 | 
21 | @pytest.mark.parametrize(
22 |     "attention_mask,golden_seq_lens",
23 |     [
24 |         (
25 |             [
26 |                 [1, 1, 2, 2, 2, 0],
27 |                 [1, 2, 2, 3, 3, 3],
28 |             ],
29 |             [2, 3, 1, 2, 3],
30 |         ),
31 |         (
32 |             [[1]],
33 |             [1],
34 |         ),
35 |     ],
36 | )
37 | def test_get_seqlens_in_batch(attention_mask, golden_seq_lens):
38 |     attention_mask_with_indices = torch.tensor(attention_mask)
39 |     seqlens_in_batch = get_seqlens_in_batch(attention_mask_with_indices)
40 |     assert torch.all(seqlens_in_batch == torch.tensor(golden_seq_lens))
41 | 
42 | 
43 | @pytest.mark.parametrize(
44 |     "attention_mask,golden_indices,golden_cu_seqlens,golden_max_seqlen",
45 |     [
46 |         (
47 |             [
48 |                 [1, 1, 2, 2, 2, 0],
49 |                 [1, 2, 2, 3, 3, 3],
50 |             ],
51 |             [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11],
52 |             [0, 2, 5, 6, 8, 11],
53 |             3,
54 |         ),
55 |         (
56 |             [[1]],
57 |             [0],
58 |             [0, 1],
59 |             1,
60 |         ),
61 |     ],
62 | )
63 | def test_get_unpad_data(attention_mask, golden_indices, golden_cu_seqlens, golden_max_seqlen):
64 |     attention_mask_with_indices = torch.tensor(attention_mask)
65 |     indices, cu_seqlens, max_seqlen_in_batch = get_unpad_data(attention_mask_with_indices)
66 |     assert torch.all(indices == torch.tensor(golden_indices))
67 |     assert torch.all(cu_seqlens == torch.tensor(golden_cu_seqlens, dtype=torch.int32))
68 |     assert max_seqlen_in_batch == golden_max_seqlen
69 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/test_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, patch_valuehead_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
25 | 
26 | INFER_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA,
28 |     "template": "llama3",
29 |     "infer_dtype": "float16",
30 | }
31 | 
32 | 
33 | @pytest.fixture
34 | def fix_valuehead_cpu_loading():
35 |     patch_valuehead_model()
36 | 
37 | 
38 | def test_base():
39 |     model = load_infer_model(**INFER_ARGS)
40 |     ref_model = load_reference_model(TINY_LLAMA)
41 |     compare_model(model, ref_model)
42 | 
43 | 
44 | @pytest.mark.usefixtures("fix_valuehead_cpu_loading")
45 | def test_valuehead():
46 |     model = load_infer_model(add_valuehead=True, **INFER_ARGS)
47 |     ref_model = load_reference_model(TINY_LLAMA_VALUEHEAD, add_valuehead=True)
48 |     compare_model(model, ref_model)
49 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/test_freeze.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.train.test_utils import load_infer_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TRAIN_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA,
26 |     "stage": "sft",
27 |     "do_train": True,
28 |     "finetuning_type": "freeze",
29 |     "dataset": "llamafactory/tiny-supervised-dataset",
30 |     "dataset_dir": "ONLINE",
31 |     "template": "llama3",
32 |     "cutoff_len": 1024,
33 |     "overwrite_cache": True,
34 |     "output_dir": "dummy_dir",
35 |     "overwrite_output_dir": True,
36 |     "fp16": True,
37 | }
38 | 
39 | INFER_ARGS = {
40 |     "model_name_or_path": TINY_LLAMA,
41 |     "finetuning_type": "freeze",
42 |     "template": "llama3",
43 |     "infer_dtype": "float16",
44 | }
45 | 
46 | 
47 | def test_freeze_train_all_modules():
48 |     model = load_train_model(freeze_trainable_layers=1, **TRAIN_ARGS)
49 |     for name, param in model.named_parameters():
50 |         if name.startswith("model.layers.1."):
51 |             assert param.requires_grad is True
52 |             assert param.dtype == torch.float32
53 |         else:
54 |             assert param.requires_grad is False
55 |             assert param.dtype == torch.float16
56 | 
57 | 
58 | def test_freeze_train_extra_modules():
59 |     model = load_train_model(freeze_trainable_layers=1, freeze_extra_modules="embed_tokens,lm_head", **TRAIN_ARGS)
60 |     for name, param in model.named_parameters():
61 |         if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]):
62 |             assert param.requires_grad is True
63 |             assert param.dtype == torch.float32
64 |         else:
65 |             assert param.requires_grad is False
66 |             assert param.dtype == torch.float16
67 | 
68 | 
69 | def test_freeze_inference():
70 |     model = load_infer_model(**INFER_ARGS)
71 |     for param in model.parameters():
72 |         assert param.requires_grad is False
73 |         assert param.dtype == torch.float16
74 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/test_full.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.train.test_utils import load_infer_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TRAIN_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA,
26 |     "stage": "sft",
27 |     "do_train": True,
28 |     "finetuning_type": "full",
29 |     "dataset": "llamafactory/tiny-supervised-dataset",
30 |     "dataset_dir": "ONLINE",
31 |     "template": "llama3",
32 |     "cutoff_len": 1024,
33 |     "overwrite_cache": True,
34 |     "output_dir": "dummy_dir",
35 |     "overwrite_output_dir": True,
36 |     "fp16": True,
37 | }
38 | 
39 | INFER_ARGS = {
40 |     "model_name_or_path": TINY_LLAMA,
41 |     "finetuning_type": "full",
42 |     "template": "llama3",
43 |     "infer_dtype": "float16",
44 | }
45 | 
46 | 
47 | def test_full_train():
48 |     model = load_train_model(**TRAIN_ARGS)
49 |     for param in model.parameters():
50 |         assert param.requires_grad is True
51 |         assert param.dtype == torch.float32
52 | 
53 | 
54 | def test_full_inference():
55 |     model = load_infer_model(**INFER_ARGS)
56 |     for param in model.parameters():
57 |         assert param.requires_grad is False
58 |         assert param.dtype == torch.float16
59 | 


--------------------------------------------------------------------------------
/LLaMA-Factory/tests/model/test_pissa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_LLAMA_PISSA = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
25 | 
26 | TRAIN_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA,
28 |     "stage": "sft",
29 |     "do_train": True,
30 |     "finetuning_type": "lora",
31 |     "pissa_init": True,
32 |     "pissa_iter": -1,
33 |     "dataset": "llamafactory/tiny-supervised-dataset",
34 |     "dataset_dir": "ONLINE",
35 |     "template": "llama3",
36 |     "cutoff_len": 1024,
37 |     "overwrite_cache": True,
38 |     "output_dir": "dummy_dir",
39 |     "overwrite_output_dir": True,
40 |     "fp16": True,
41 | }
42 | 
43 | INFER_ARGS = {
44 |     "model_name_or_path": TINY_LLAMA_PISSA,
45 |     "adapter_name_or_path": TINY_LLAMA_PISSA,
46 |     "adapter_folder": "pissa_init",
47 |     "finetuning_type": "lora",
48 |     "template": "llama3",
49 |     "infer_dtype": "float16",
50 | }
51 | 
52 | OS_NAME = os.getenv("OS_NAME", "")
53 | 
54 | 
55 | @pytest.mark.xfail(reason="PiSSA initialization is not stable in different platform.")
56 | def test_pissa_train():
57 |     model = load_train_model(**TRAIN_ARGS)
58 |     ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=True)
59 |     compare_model(model, ref_model)
60 | 
61 | 
62 | @pytest.mark.xfail(OS_NAME.startswith("windows"), reason="Known connection error on Windows.")
63 | def test_pissa_inference():
64 |     model = load_infer_model(**INFER_ARGS)
65 |     ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)
66 |     ref_model = ref_model.merge_and_unload()
67 |     compare_model(model, ref_model)
68 | 


--------------------------------------------------------------------------------
/assets/AutoMathCritique.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/assets/AutoMathCritique.jpg


--------------------------------------------------------------------------------
/assets/case_study_1.pdf-1-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/assets/case_study_1.pdf-1-10.jpg


--------------------------------------------------------------------------------
/assets/train-test-flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WooooDyy/MathCritique/569e62eb4864610b4190c37ff4ea28364e474563/assets/train-test-flowchart.png


--------------------------------------------------------------------------------
/selfimprove/evaluate-all.sh:
--------------------------------------------------------------------------------
 1 | # 设置变量
 2 | dataset_name="lighteval/MATH openai/gsm8k"
 3 | dataset_type="math gsm8k"
 4 | sample_num=1
 5 | reserved_new_data=1
 6 | temperature=0
 7 | model_name="meta-llama/Meta-Llama-3-8B"
 8 | model_type="Base"
 9 | ITER_NUM=1
10 | MV_NUM=1
11 | ONLY_FINAL_SEQUENTIAL=0
12 | EXP_NUM="test"
13 | actor_model_name=${model_name}
14 | USE_CRITIC=1
15 | TEST_USE_CRITIC=1
16 | TEST_KNOW_ANSWER=0
17 | critic_model_name="meta-llama/Meta-Llama-3-8B-Instruct"
18 | 
19 | for ((ITER = 0; ITER <= ITER_NUM; ITER++))
20 | do
21 |     results_file="${workdir}/selfimprove/${model_name}-all-selfimprove${EXP_NUM}_${ITER}_test_${sample_num}-${MV_NUM}_${TEST_USE_CRITIC}${TEST_KNOW_ANSWER}.json"
22 |     cd ${workdir}/selfimprove
23 |     actor_model_name="${workdir}/models/${model_name}-all-selfimprove${EXP_NUM}_${USE_CRITIC}_${ITER}"
24 |     
25 |     echo "Evaluating start."
26 | 
27 |     python inference.py \
28 |         --actor_name ${actor_model_name} \
29 |         --model_name ${model_name} \
30 |         --dataset_name ${dataset_name} \
31 |         --dataset_type ${dataset_type} \
32 |         --temperature ${temperature} \
33 |         --sample_num ${sample_num} \
34 |         --results_file ${results_file} \
35 |         --mode 'test' \
36 |         --test_know_answer ${TEST_KNOW_ANSWER} \
37 |         --need_false_data 0
38 | 
39 |     for ((MV = 1; MV <= MV_NUM; MV++))
40 |     do
41 |         echo "MV ${MV} started"
42 |         if [ "${TEST_USE_CRITIC}" -eq 1 ]; then
43 |             sleep 5
44 | 
45 |             python critic.py \
46 |                     --critic_name ${critic_model_name} \
47 |                     --temperature 0 \
48 |                     --sample_num 1   
49 |             sleep 5
50 | 
51 | 
52 |             python inference.py \
53 |                     --actor_name ${actor_model_name} \
54 |                     --model_name ${model_name} \
55 |                     --dataset_name ${dataset_name} \
56 |                     --dataset_type ${dataset_type} \
57 |                     --temperature 0 \
58 |                     --sample_num 1 \
59 |                     --results_file ${results_file} \
60 |                     --mode 'new' \
61 |                     --reserved_new_data ${reserved_new_data} \
62 |                     --need_false_data 1
63 |             
64 |             sleep 5
65 |             python test_filter.py \
66 |                 --mode 'sequential'
67 |         fi
68 |     done
69 |     
70 |     if [ "${ONLY_FINAL_SEQUENTIAL}" -eq 1 ]; then
71 |         python test_filter.py \
72 |             --results_file ${results_file} \
73 |             --mode 'only_final_sequential'
74 |     fi
75 | 
76 |     echo "Evaluating finished."
77 |     sleep 5
78 |     echo ${results_file}
79 | 
80 |     python test_filter.py \
81 |         --dataset_name ${dataset_name} \
82 |         --dataset_type ${dataset_type} \
83 |         --results_file ${results_file} \
84 |         --mode "majority"
85 | done
86 | 
87 | 


--------------------------------------------------------------------------------