├── .github
    ├── CONTRIBUTING.md
    └── workflows
    │   ├── deploy.yml
    │   └── lint.yml
├── .gitignore
├── .owners.yml
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_zh-CN.md
├── docs
    ├── en
    │   ├── .readthedocs.yaml
    │   ├── Makefile
    │   ├── _static
    │   │   ├── css
    │   │   │   └── readthedocs.css
    │   │   └── image
    │   │   │   └── logo.png
    │   ├── acceleration
    │   │   ├── benchmark.rst
    │   │   ├── deepspeed.rst
    │   │   ├── flash_attn.rst
    │   │   ├── hyper_parameters.rst
    │   │   ├── length_grouped_sampler.rst
    │   │   ├── pack_to_max_length.rst
    │   │   ├── train_extreme_long_sequence.rst
    │   │   ├── train_large_scale_dataset.rst
    │   │   └── varlen_flash_attn.rst
    │   ├── chat
    │   │   ├── agent.md
    │   │   ├── llm.md
    │   │   ├── lmdeploy.md
    │   │   └── vlm.md
    │   ├── conf.py
    │   ├── dpo
    │   │   ├── modify_settings.md
    │   │   ├── overview.md
    │   │   └── quick_start.md
    │   ├── evaluation
    │   │   ├── hook.md
    │   │   ├── mmbench.md
    │   │   ├── mmlu.md
    │   │   └── opencompass.md
    │   ├── get_started
    │   │   ├── installation.md
    │   │   ├── overview.md
    │   │   └── quickstart.md
    │   ├── index.rst
    │   ├── internevo_migration
    │   │   ├── ftdp_dataset
    │   │   │   ├── Case1.rst
    │   │   │   ├── Case2.rst
    │   │   │   ├── Case3.rst
    │   │   │   ├── Case4.rst
    │   │   │   └── ftdp.rst
    │   │   └── internevo_migration.rst
    │   ├── make.bat
    │   ├── models
    │   │   └── supported.md
    │   ├── notes
    │   │   └── changelog.md
    │   ├── preparation
    │   │   ├── pretrained_model.rst
    │   │   └── prompt_template.rst
    │   ├── reward_model
    │   │   ├── modify_settings.md
    │   │   ├── overview.md
    │   │   ├── preference_data.md
    │   │   └── quick_start.md
    │   ├── switch_language.md
    │   ├── training
    │   │   ├── custom_agent_dataset.rst
    │   │   ├── custom_pretrain_dataset.rst
    │   │   ├── custom_sft_dataset.rst
    │   │   ├── modify_settings.rst
    │   │   ├── multi_modal_dataset.rst
    │   │   ├── open_source_dataset.rst
    │   │   └── visualization.rst
    │   └── user_guides
    │   │   ├── chat.md
    │   │   ├── dataset_format.md
    │   │   ├── dataset_prepare.md
    │   │   ├── finetune.md
    │   │   ├── incremental_pretraining.md
    │   │   ├── intern_repo_dataset.md
    │   │   ├── multi_turn_conversation.md
    │   │   ├── prompt_template.md
    │   │   └── single_turn_conversation.md
    └── zh_cn
    │   ├── .readthedocs.yaml
    │   ├── Makefile
    │   ├── _static
    │       └── image
    │       │   └── logo.png
    │   ├── acceleration
    │       ├── benchmark.rst
    │       ├── deepspeed.rst
    │       ├── flash_attn.rst
    │       ├── hyper_parameters.rst
    │       ├── length_grouped_sampler.rst
    │       ├── pack_to_max_length.rst
    │       ├── train_extreme_long_sequence.rst
    │       ├── train_large_scale_dataset.rst
    │       └── varlen_flash_attn.rst
    │   ├── chat
    │       ├── agent.md
    │       ├── llm.md
    │       ├── lmdeploy.md
    │       └── vlm.md
    │   ├── conf.py
    │   ├── dpo
    │       ├── modify_settings.md
    │       ├── overview.md
    │       └── quick_start.md
    │   ├── evaluation
    │       ├── hook.md
    │       ├── mmbench.md
    │       ├── mmlu.md
    │       └── opencompass.md
    │   ├── get_started
    │       ├── installation.rst
    │       └── quickstart.rst
    │   ├── index.rst
    │   ├── internevo_migration
    │       ├── differences.rst
    │       └── ftdp_dataset
    │       │   ├── processed_and_internlm2.rst
    │       │   ├── processed_and_others.rst
    │       │   ├── processed_normal_chat.rst
    │       │   └── tokenized_and_internlm2.rst
    │   ├── make.bat
    │   ├── models
    │       └── supported.md
    │   ├── notes
    │       └── changelog.md
    │   ├── preparation
    │       ├── pretrained_model.rst
    │       └── prompt_template.rst
    │   ├── reward_model
    │       ├── images
    │       │   ├── preference_data.png
    │       │   ├── sequence_parallel.png
    │       │   └── var_len_atten.png
    │       ├── modify_settings.md
    │       ├── overview.md
    │       ├── preference_data.md
    │       └── quick_start.md
    │   ├── switch_language.md
    │   ├── training
    │       ├── custom_pretrain_dataset.rst
    │       ├── custom_sft_dataset.rst
    │       ├── modify_settings.rst
    │       ├── multi_modal_dataset.rst
    │       ├── open_source_dataset.rst
    │       └── visualization.rst
    │   └── user_guides
    │       ├── ceph.md
    │       ├── chat.md
    │       ├── config.md
    │       ├── custom_dataset
    │           ├── Offline.md
    │           └── Online.md
    │       ├── dataset_format.md
    │       ├── dataset_prepare.md
    │       ├── finetune.md
    │       ├── ftdp_dataset
    │           ├── Case1.md
    │           ├── Case2.md
    │           ├── Case3.md
    │           ├── Case4.md
    │           └── README.md
    │       ├── incremental_pretraining.md
    │       ├── intern_repo_dataset.md
    │       ├── llava_offline.md
    │       ├── multi_turn_conversation.md
    │       ├── prompt_template.md
    │       ├── sequence_parallel.md
    │       ├── single_turn_conversation.md
    │       └── varlen_attention.md
├── examples
    ├── demo_data
    │   ├── multi_turn_1
    │   │   ├── README.md
    │   │   ├── config.py
    │   │   ├── data.json
    │   │   └── map_fn.py
    │   ├── multi_turn_2
    │   │   ├── README.md
    │   │   ├── config.py
    │   │   ├── data.json
    │   │   └── map_fn.py
    │   ├── pretrain
    │   │   ├── README.md
    │   │   ├── config.py
    │   │   ├── data.json
    │   │   └── map_fn.py
    │   └── single_turn
    │   │   ├── README.md
    │   │   ├── config.py
    │   │   ├── data.json
    │   │   └── map_fn.py
    └── huggingface_trainer
    │   ├── README.md
    │   ├── train_hf.py
    │   ├── train_lora_hf.py
    │   └── train_qlora_hf.py
├── requirements.txt
├── requirements
    ├── deepspeed.txt
    ├── docs.txt
    ├── lmdeploy.txt
    ├── modelscope.txt
    └── runtime.txt
├── setup.cfg
├── setup.py
└── xtuner
    ├── __init__.py
    ├── _lite
        ├── __init__.py
        ├── accelerate
        │   ├── __init__.py
        │   ├── lora.py
        │   ├── ops
        │   │   ├── __init__.py
        │   │   └── moe_permute.py
        │   ├── packed.py
        │   └── utils.py
        ├── algorithms
        │   ├── __init__.py
        │   ├── ppo
        │   │   ├── __init__.py
        │   │   ├── dataset.py
        │   │   ├── loss.py
        │   │   └── model.py
        │   └── sft
        │   │   ├── __init__.py
        │   │   └── dataset.py
        ├── chat
        │   ├── __init__.py
        │   ├── backends
        │   │   └── __init__.py
        │   ├── messages
        │   │   ├── __init__.py
        │   │   ├── base.py
        │   │   └── chat.py
        │   └── templates
        │   │   ├── __init__.py
        │   │   ├── chat.py
        │   │   └── hybrid.py
        ├── datasets
        │   ├── __init__.py
        │   ├── json.py
        │   ├── jsonl.py
        │   ├── pack.py
        │   ├── streaming.py
        │   └── utils
        │   │   ├── __init__.py
        │   │   ├── convert.py
        │   │   ├── load.py
        │   │   └── utils.py
        ├── device.py
        ├── modelings
        │   ├── __init__.py
        │   ├── internlm2
        │   │   ├── __init__.py
        │   │   ├── configuration_internlm2.py
        │   │   └── modeling_internlm2.py
        │   ├── internlm3
        │   │   ├── __init__.py
        │   │   ├── configuration_internlm3.py
        │   │   ├── modeling_internlm3.py
        │   │   └── tokenization_internlm3.py
        │   ├── internvl2
        │   │   ├── __init__.py
        │   │   ├── configuration_intern_vit.py
        │   │   └── modeling_intern_vit.py
        │   └── llava
        │   │   ├── __init__.py
        │   │   ├── configuration_internlm2.py
        │   │   ├── configuration_llava.py
        │   │   ├── modeling_internlm2.py
        │   │   ├── modeling_llava.py
        │   │   └── processing_llava.py
        ├── parallel
        │   ├── __init__.py
        │   ├── comm.py
        │   ├── sampler.py
        │   ├── sequence
        │   │   ├── __init__.py
        │   │   ├── attention.py
        │   │   └── ops.py
        │   └── setup.py
        └── patches
        │   ├── __init__.py
        │   ├── auto.py
        │   ├── base.py
        │   ├── internlm3.py
        │   ├── llama.py
        │   ├── mixins
        │       ├── __init__.py
        │       └── generate.py
        │   ├── qwen2.py
        │   └── utils.py
    ├── apis
        ├── __init__.py
        ├── datasets
        │   ├── __init__.py
        │   ├── alpaca.py
        │   ├── arxiv.py
        │   ├── code_alpaca.py
        │   ├── colorist.py
        │   ├── lawyer.py
        │   ├── medical.py
        │   ├── moss_003_sft.py
        │   ├── oasst1.py
        │   ├── open_orca.py
        │   ├── sql.py
        │   ├── tiny_codes.py
        │   └── wizardlm.py
        ├── model.py
        └── training_args.py
    ├── configs
        ├── __init__.py
        ├── baichuan
        │   ├── baichuan2_13b_base
        │   │   ├── baichuan2_13b_base_qlora_alpaca_e3.py
        │   │   ├── baichuan2_13b_base_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan2_13b_base_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
        │   │   ├── baichuan2_13b_base_qlora_code_alpaca_e3.py
        │   │   ├── baichuan2_13b_base_qlora_colorist_e5.py
        │   │   ├── baichuan2_13b_base_qlora_lawyer_e3.py
        │   │   ├── baichuan2_13b_base_qlora_oasst1_512_e3.py
        │   │   ├── baichuan2_13b_base_qlora_oasst1_e3.py
        │   │   ├── baichuan2_13b_base_qlora_open_platypus_e3.py
        │   │   └── baichuan2_13b_base_qlora_sql_e3.py
        │   ├── baichuan2_13b_chat
        │   │   ├── baichuan2_13b_chat_qlora_alpaca_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_code_alpaca_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_lawyer_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_oasst1_512_e3.py
        │   │   ├── baichuan2_13b_chat_qlora_oasst1_e3.py
        │   │   └── baichuan2_13b_chat_qlora_open_platypus_e3.py
        │   ├── baichuan2_7b_base
        │   │   ├── baichuan2_7b_base_qlora_alpaca_e3.py
        │   │   ├── baichuan2_7b_base_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan2_7b_base_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan2_7b_base_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan2_7b_base_qlora_arxiv_gentitle_e3.py
        │   │   ├── baichuan2_7b_base_qlora_code_alpaca_e3.py
        │   │   ├── baichuan2_7b_base_qlora_colorist_e5.py
        │   │   ├── baichuan2_7b_base_qlora_lawyer_e3.py
        │   │   ├── baichuan2_7b_base_qlora_oasst1_512_e3.py
        │   │   ├── baichuan2_7b_base_qlora_oasst1_e3.py
        │   │   ├── baichuan2_7b_base_qlora_open_platypus_e3.py
        │   │   └── baichuan2_7b_base_qlora_sql_e3.py
        │   ├── baichuan2_7b_chat
        │   │   ├── baichuan2_7b_chat_qlora_alpaca_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_code_alpaca_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_lawyer_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_oasst1_512_e3.py
        │   │   ├── baichuan2_7b_chat_qlora_oasst1_e3.py
        │   │   └── baichuan2_7b_chat_qlora_open_platypus_e3.py
        │   ├── baichuan_13b_base
        │   │   ├── baichuan_13b_base_qlora_alpaca_e3.py
        │   │   ├── baichuan_13b_base_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan_13b_base_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan_13b_base_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan_13b_base_qlora_arxiv_gentitle_e3.py
        │   │   ├── baichuan_13b_base_qlora_code_alpaca_e3.py
        │   │   ├── baichuan_13b_base_qlora_colorist_e5.py
        │   │   ├── baichuan_13b_base_qlora_lawyer_e3.py
        │   │   ├── baichuan_13b_base_qlora_medical_e1.py
        │   │   ├── baichuan_13b_base_qlora_moss_sft_all_e1.py
        │   │   ├── baichuan_13b_base_qlora_moss_sft_all_e2_gpu8.py
        │   │   ├── baichuan_13b_base_qlora_moss_sft_plugins_e1.py
        │   │   ├── baichuan_13b_base_qlora_oasst1_512_e3.py
        │   │   ├── baichuan_13b_base_qlora_oasst1_e3.py
        │   │   ├── baichuan_13b_base_qlora_open_platypus_e3.py
        │   │   ├── baichuan_13b_base_qlora_openorca_e1.py
        │   │   ├── baichuan_13b_base_qlora_sql_e3.py
        │   │   └── baichuan_13b_base_qlora_tiny_codes_e1.py
        │   ├── baichuan_13b_chat
        │   │   ├── baichuan_13b_chat_qlora_alpaca_e3.py
        │   │   ├── baichuan_13b_chat_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan_13b_chat_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan_13b_chat_qlora_arxiv_gentitle_e3.py
        │   │   ├── baichuan_13b_chat_qlora_code_alpaca_e3.py
        │   │   ├── baichuan_13b_chat_qlora_colorist_e5.py
        │   │   ├── baichuan_13b_chat_qlora_lawyer_e3.py
        │   │   ├── baichuan_13b_chat_qlora_medical_e1.py
        │   │   ├── baichuan_13b_chat_qlora_oasst1_512_e3.py
        │   │   ├── baichuan_13b_chat_qlora_oasst1_e3.py
        │   │   ├── baichuan_13b_chat_qlora_open_platypus_e3.py
        │   │   ├── baichuan_13b_chat_qlora_openorca_e1.py
        │   │   ├── baichuan_13b_chat_qlora_sql_e3.py
        │   │   └── baichuan_13b_chat_qlora_tiny_codes_e1.py
        │   └── baichuan_7b
        │   │   ├── baichuan_7b_qlora_alpaca_e3.py
        │   │   ├── baichuan_7b_qlora_alpaca_enzh_e3.py
        │   │   ├── baichuan_7b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── baichuan_7b_qlora_alpaca_zh_e3.py
        │   │   ├── baichuan_7b_qlora_arxiv_gentitle_e3.py
        │   │   ├── baichuan_7b_qlora_code_alpaca_e3.py
        │   │   ├── baichuan_7b_qlora_colorist_e5.py
        │   │   ├── baichuan_7b_qlora_lawyer_e3.py
        │   │   ├── baichuan_7b_qlora_medical_e1.py
        │   │   ├── baichuan_7b_qlora_moss_sft_all_e1.py
        │   │   ├── baichuan_7b_qlora_moss_sft_all_e2_gpu8.py
        │   │   ├── baichuan_7b_qlora_moss_sft_plugins_e1.py
        │   │   ├── baichuan_7b_qlora_oasst1_512_e3.py
        │   │   ├── baichuan_7b_qlora_oasst1_e3.py
        │   │   ├── baichuan_7b_qlora_open_platypus_e3.py
        │   │   ├── baichuan_7b_qlora_openorca_e1.py
        │   │   ├── baichuan_7b_qlora_sql_e3.py
        │   │   └── baichuan_7b_qlora_tiny_codes_e1.py
        ├── chatglm
        │   ├── chatglm2_6b
        │   │   ├── chatglm2_6b_qlora_alpaca_e3.py
        │   │   ├── chatglm2_6b_qlora_alpaca_enzh_e3.py
        │   │   ├── chatglm2_6b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── chatglm2_6b_qlora_alpaca_zh_e3.py
        │   │   ├── chatglm2_6b_qlora_arxiv_gentitle_e3.py
        │   │   ├── chatglm2_6b_qlora_code_alpaca_e3.py
        │   │   ├── chatglm2_6b_qlora_colorist_e5.py
        │   │   ├── chatglm2_6b_qlora_lawyer_e3.py
        │   │   ├── chatglm2_6b_qlora_medical_e1.py
        │   │   ├── chatglm2_6b_qlora_oasst1_512_e3.py
        │   │   ├── chatglm2_6b_qlora_oasst1_e3.py
        │   │   ├── chatglm2_6b_qlora_open_platypus_e3.py
        │   │   ├── chatglm2_6b_qlora_openorca_e1.py
        │   │   ├── chatglm2_6b_qlora_sql_e3.py
        │   │   └── chatglm2_6b_qlora_tiny_codes_e1.py
        │   ├── chatglm3_6b
        │   │   ├── chatglm3_6b_qlora_alpaca_e3.py
        │   │   ├── chatglm3_6b_qlora_alpaca_enzh_e3.py
        │   │   ├── chatglm3_6b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── chatglm3_6b_qlora_alpaca_zh_e3.py
        │   │   ├── chatglm3_6b_qlora_arxiv_gentitle_e3.py
        │   │   ├── chatglm3_6b_qlora_code_alpaca_e3.py
        │   │   ├── chatglm3_6b_qlora_colorist_e5.py
        │   │   ├── chatglm3_6b_qlora_lawyer_e3.py
        │   │   ├── chatglm3_6b_qlora_medical_e1.py
        │   │   ├── chatglm3_6b_qlora_oasst1_512_e3.py
        │   │   ├── chatglm3_6b_qlora_oasst1_e3.py
        │   │   ├── chatglm3_6b_qlora_open_platypus_e3.py
        │   │   ├── chatglm3_6b_qlora_openorca_e1.py
        │   │   ├── chatglm3_6b_qlora_sql_e3.py
        │   │   └── chatglm3_6b_qlora_tiny_codes_e1.py
        │   └── chatglm3_6b_base
        │   │   ├── chatglm3_6b_base_qlora_alpaca_e3.py
        │   │   ├── chatglm3_6b_base_qlora_alpaca_enzh_e3.py
        │   │   ├── chatglm3_6b_base_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── chatglm3_6b_base_qlora_alpaca_zh_e3.py
        │   │   ├── chatglm3_6b_base_qlora_arxiv_gentitle_e3.py
        │   │   ├── chatglm3_6b_base_qlora_code_alpaca_e3.py
        │   │   ├── chatglm3_6b_base_qlora_colorist_e5.py
        │   │   ├── chatglm3_6b_base_qlora_lawyer_e3.py
        │   │   ├── chatglm3_6b_base_qlora_medical_e1.py
        │   │   ├── chatglm3_6b_base_qlora_oasst1_512_e3.py
        │   │   ├── chatglm3_6b_base_qlora_oasst1_e3.py
        │   │   ├── chatglm3_6b_base_qlora_open_platypus_e3.py
        │   │   ├── chatglm3_6b_base_qlora_openorca_e1.py
        │   │   ├── chatglm3_6b_base_qlora_sql_e3.py
        │   │   └── chatglm3_6b_base_qlora_tiny_codes_e1.py
        ├── cohere
        │   ├── README.md
        │   └── cohere_104b
        │   │   └── cohere_100b_128k_sp32.py
        ├── custom_dataset
        │   ├── pretrain
        │   │   ├── baichuan
        │   │   │   ├── baichuan2_13b_base_full_custom_pretrain_e1.py
        │   │   │   └── baichuan2_7b_base_full_custom_pretrain_e1.py
        │   │   ├── chatglm
        │   │   │   ├── chatglm2_6b_full_custom_pretrain_e1.py
        │   │   │   └── chatglm3_6b_full_custom_pretrain_e1.py
        │   │   ├── deepseek
        │   │   │   └── deepseek_moe_16b_base_full_custom_pretrain_e1.py
        │   │   ├── gemma
        │   │   │   ├── gemma_2b_full_custom_pretrain_e1.py
        │   │   │   └── gemma_7b_full_custom_pretrain_e1.py
        │   │   ├── internlm
        │   │   │   ├── internlm2_1_8b_full_custom_pretrain_e1.py
        │   │   │   ├── internlm2_20b_full_custom_pretrain_e1.py
        │   │   │   └── internlm2_7b_full_custom_pretrain_e1.py
        │   │   ├── llama
        │   │   │   ├── llama2_70b_full_custom_pretrain_e1.py
        │   │   │   └── llama2_7b_full_custom_pretrain_e1.py
        │   │   ├── minicpm
        │   │   │   ├── minicpm3_4b_full_custom_pretrain_e1.py
        │   │   │   ├── minicpm_1b_full_custom_pretrain_e1.py
        │   │   │   └── minicpm_2b_full_custom_pretrain_e1.py
        │   │   ├── mistral
        │   │   │   └── mistral_7b_full_custom_pretrain_e1.py
        │   │   ├── mixtral
        │   │   │   └── mixtral_8x7b_full_custom_pretrain_e1.py
        │   │   ├── qwen
        │   │   │   ├── qwen1_5_0_5b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen1_5_14b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen1_5_1_8b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen1_5_4b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen1_5_72b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen1_5_7b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen_1_8b_full_custom_pretrain_e1.py
        │   │   │   ├── qwen_72b_full_custom_pretrain_e1.py
        │   │   │   └── qwen_7b_full_custom_pretrain_e1.py
        │   │   ├── starcoder
        │   │   │   └── starcoder_full_custom_pretrain_e1.py
        │   │   ├── yi
        │   │   │   ├── yi_34b_full_custom_pretrain_e1.py
        │   │   │   └── yi_6b_full_custom_pretrain_e1.py
        │   │   └── zephyr
        │   │   │   └── zephyr_7b_beta_full_custom_pretrain_e1.py
        │   └── sft
        │   │   ├── baichuan
        │   │       ├── baichuan2_13b_chat_qlora_custom_sft_e1.py
        │   │       ├── baichuan2_7b_chat_qlora_custom_sft_e1.py
        │   │       ├── baichuan_13b_chat_qlora_custom_sft_e1.py
        │   │       └── baichuan_7b_qlora_custom_sft_e1.py
        │   │   ├── chatglm
        │   │       ├── chatglm2_6b_qlora_custom_sft_e1.py
        │   │       └── chatglm3_6b_qlora_custom_sft_e1.py
        │   │   ├── deepseek
        │   │       ├── deepseek_moe_16b_chat_qlora_custom_sft_e1.py
        │   │       └── deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py
        │   │   ├── gemma
        │   │       ├── gemma_2b_it_qlora_custom_sft_e1.py
        │   │       ├── gemma_2b_qlora_custom_sft_e1.py
        │   │       ├── gemma_7b_it_qlora_custom_sft_e1.py
        │   │       └── gemma_7b_qlora_custom_sft_e1.py
        │   │   ├── internlm
        │   │       ├── internlm2_chat_1_8b_qlora_custom_sft_e1.py
        │   │       ├── internlm2_chat_20b_qlora_custom_sft_e1.py
        │   │       └── internlm2_chat_7b_qlora_custom_sft_e1.py
        │   │   ├── llama
        │   │       ├── llama2_70b_qlora_custom_sft_e1.py
        │   │       └── llama2_7b_chat_qlora_custom_sft_e1.py
        │   │   ├── minicpm
        │   │       ├── minicpm3_4b_chat_qlora_custom_sft_e1.py
        │   │       ├── minicpm_1b_full_custom_pretrain_e1.py
        │   │       └── minicpm_2b_full_custom_pretrain_e1.py
        │   │   ├── mistral
        │   │       └── mistral_7b_full_finetune_custom_sft_e1.py
        │   │   ├── mixtral
        │   │       └── mixtral_8x7b_instruct_qlora_custom_sft_e1.py
        │   │   ├── qwen
        │   │       ├── qwen1_5_0_5b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen1_5_14b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen1_5_1_8b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen1_5_4b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen1_5_72b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen1_5_7b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen_1_8b_chat_qlora_custom_sft_e1.py
        │   │       ├── qwen_72b_qlora_custom_sft_e1.py
        │   │       └── qwen_7b_chat_qlora_custom_sft_e1.py
        │   │   ├── starcoder
        │   │       └── starcoder_qlora_custom_sft_e1.py
        │   │   ├── yi
        │   │       ├── yi_34b_qlora_custom_sft_e1.py
        │   │       └── yi_6b_qlora_custom_sft_e1.py
        │   │   └── zephyr
        │   │       └── zephyr_7b_beta_qlora_custom_sft_e1.py
        ├── deepseek
        │   ├── README.md
        │   ├── deepseek_coder_6_7b_base
        │   │   └── deepseek_coder_6_7b_base_qlora_code_alpaca_e3.py
        │   ├── deepseek_coder_6_7b_instruct
        │   │   └── deepseekcoder_6_7b_instruct_qlora_code_alpaca_e3.py
        │   ├── deepseek_moe_16b_base
        │   │   ├── deepseek_moe_16b_base_full_oasst1_e3.py
        │   │   └── deepseek_moe_16b_base_qlora_oasst1_e3.py
        │   ├── deepseek_moe_16b_chat
        │   │   ├── deepseek_moe_16b_chat_full_oasst1_e3.py
        │   │   └── deepseek_moe_16b_chat_qlora_oasst1_e3.py
        │   ├── deepseek_v2_chat
        │   │   └── deepseek_v2_chat_full_alpaca_e3.py
        │   └── deepseek_v2_lite_chat
        │   │   ├── deepseek_v2_lite_chat_full_alpaca_e3.py
        │   │   └── deepseek_v2_lite_chat_full_alpaca_e3_32k_varlen.py
        ├── deepspeed
        │   ├── deepspeed_zero1.json
        │   ├── deepspeed_zero2.json
        │   ├── deepspeed_zero2_offload.json
        │   ├── deepspeed_zero3.json
        │   └── deepspeed_zero3_offload.json
        ├── dpo
        │   ├── internlm
        │   │   ├── internlm2_chat_1_8b_dpo_full.py
        │   │   ├── internlm2_chat_1_8b_dpo_full_varlenattn.py
        │   │   ├── internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py
        │   │   └── internlm2_chat_7b_dpo_qlora_varlenattn.py
        │   └── llama
        │   │   └── llama3_8b_instruct_dpo_qlora_varlenattn.py
        ├── gemma
        │   ├── gemma_2b
        │   │   ├── gemma_2b_full_alpaca_e3.py
        │   │   └── gemma_2b_qlora_alpaca_e3.py
        │   ├── gemma_2b_it
        │   │   ├── gemma_2b_it_full_alpaca_e3.py
        │   │   └── gemma_2b_it_qlora_alpaca_e3.py
        │   ├── gemma_7b
        │   │   ├── gemma_7b_full_alpaca_e3.py
        │   │   └── gemma_7b_qlora_alpaca_e3.py
        │   └── gemma_7b_it
        │   │   ├── gemma_7b_it_full_alpaca_e3.py
        │   │   └── gemma_7b_it_qlora_alpaca_e3.py
        ├── internlm
        │   ├── internlm2_1_8b
        │   │   ├── internlm2_1_8b_full_alpaca_e3.py
        │   │   └── internlm2_1_8b_qlora_alpaca_e3.py
        │   ├── internlm2_20b
        │   │   ├── internlm2_20b_full_finetune_custom_dataset_e1.py
        │   │   ├── internlm2_20b_qlora_alpaca_e3.py
        │   │   ├── internlm2_20b_qlora_arxiv_gentitle_e3.py
        │   │   ├── internlm2_20b_qlora_code_alpaca_e3.py
        │   │   ├── internlm2_20b_qlora_colorist_e5.py
        │   │   ├── internlm2_20b_qlora_lawyer_e3.py
        │   │   ├── internlm2_20b_qlora_msagent_react_e3_gpu8.py
        │   │   ├── internlm2_20b_qlora_oasst1_512_e3.py
        │   │   ├── internlm2_20b_qlora_oasst1_e3.py
        │   │   └── internlm2_20b_qlora_sql_e3.py
        │   ├── internlm2_5_chat_20b
        │   │   ├── internlm2_5_chat_20b_alpaca_e3.py
        │   │   └── internlm2_5_chat_20b_qlora_alpaca_e3.py
        │   ├── internlm2_5_chat_7b
        │   │   ├── internlm2_5_chat_7b_full_finetune_custom_dataset_e1.py
        │   │   ├── internlm2_5_chat_7b_qlora_alpaca_e3.py
        │   │   └── internlm2_5_chat_7b_qlora_oasst1_e3.py
        │   ├── internlm2_7b
        │   │   ├── internlm2_7b_full_finetune_custom_dataset_e1.py
        │   │   ├── internlm2_7b_full_finetune_custom_dataset_e1_sequence_parallel_4.py
        │   │   ├── internlm2_7b_qlora_alpaca_e3.py
        │   │   ├── internlm2_7b_qlora_arxiv_gentitle_e3.py
        │   │   ├── internlm2_7b_qlora_code_alpaca_e3.py
        │   │   ├── internlm2_7b_qlora_colorist_e5.py
        │   │   ├── internlm2_7b_qlora_json_e3.py
        │   │   ├── internlm2_7b_qlora_lawyer_e3.py
        │   │   ├── internlm2_7b_qlora_msagent_react_e3_gpu8.py
        │   │   ├── internlm2_7b_qlora_oasst1_512_e3.py
        │   │   ├── internlm2_7b_qlora_oasst1_e3.py
        │   │   ├── internlm2_7b_qlora_sql_e3.py
        │   │   ├── internlm2_7b_w_internevo_dataset.py
        │   │   ├── internlm2_7b_w_tokenized_dataset.py
        │   │   └── internlm2_7b_w_untokenized_dataset.py
        │   ├── internlm2_chat_1_8b
        │   │   ├── internlm2_chat_1_8b_full_alpaca_e3.py
        │   │   └── internlm2_chat_1_8b_qlora_alpaca_e3.py
        │   ├── internlm2_chat_20b
        │   │   ├── internlm2_chat_20b_full_finetune_custom_dataset_e1.py
        │   │   ├── internlm2_chat_20b_qlora_alpaca_e3.py
        │   │   ├── internlm2_chat_20b_qlora_code_alpaca_e3.py
        │   │   ├── internlm2_chat_20b_qlora_lawyer_e3.py
        │   │   ├── internlm2_chat_20b_qlora_oasst1_512_e3.py
        │   │   └── internlm2_chat_20b_qlora_oasst1_e3.py
        │   ├── internlm2_chat_7b
        │   │   ├── internlm2_chat_7b_full_finetune_custom_dataset_e1.py
        │   │   ├── internlm2_chat_7b_qlora_alpaca_e3.py
        │   │   ├── internlm2_chat_7b_qlora_code_alpaca_e3.py
        │   │   ├── internlm2_chat_7b_qlora_lawyer_e3.py
        │   │   ├── internlm2_chat_7b_qlora_oasst1_512_e3.py
        │   │   └── internlm2_chat_7b_qlora_oasst1_e3.py
        │   ├── internlm_20b
        │   │   ├── internlm_20b_qlora_alpaca_e3.py
        │   │   ├── internlm_20b_qlora_alpaca_enzh_e3.py
        │   │   ├── internlm_20b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── internlm_20b_qlora_alpaca_zh_e3.py
        │   │   ├── internlm_20b_qlora_arxiv_gentitle_e3.py
        │   │   ├── internlm_20b_qlora_code_alpaca_e3.py
        │   │   ├── internlm_20b_qlora_colorist_e5.py
        │   │   ├── internlm_20b_qlora_lawyer_e3.py
        │   │   ├── internlm_20b_qlora_msagent_react_e3_gpu8.py
        │   │   ├── internlm_20b_qlora_oasst1_512_e3.py
        │   │   ├── internlm_20b_qlora_oasst1_e3.py
        │   │   ├── internlm_20b_qlora_open_platypus_e3.py
        │   │   └── internlm_20b_qlora_sql_e3.py
        │   ├── internlm_7b
        │   │   ├── internlm_7b_full_alpaca_e3.py
        │   │   ├── internlm_7b_full_alpaca_enzh_e3.py
        │   │   ├── internlm_7b_full_alpaca_enzh_oasst1_e3.py
        │   │   ├── internlm_7b_full_alpaca_zh_e3.py
        │   │   ├── internlm_7b_full_intern_repo_dataset_template.py
        │   │   ├── internlm_7b_full_oasst1_e3.py
        │   │   ├── internlm_7b_qlora_alpaca_e3.py
        │   │   ├── internlm_7b_qlora_alpaca_enzh_e3.py
        │   │   ├── internlm_7b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── internlm_7b_qlora_alpaca_zh_e3.py
        │   │   ├── internlm_7b_qlora_arxiv_gentitle_e3.py
        │   │   ├── internlm_7b_qlora_code_alpaca_e3.py
        │   │   ├── internlm_7b_qlora_colorist_e5.py
        │   │   ├── internlm_7b_qlora_json_e3.py
        │   │   ├── internlm_7b_qlora_lawyer_e3.py
        │   │   ├── internlm_7b_qlora_medical_e1.py
        │   │   ├── internlm_7b_qlora_moss_sft_all_e1.py
        │   │   ├── internlm_7b_qlora_moss_sft_all_e2_gpu8.py
        │   │   ├── internlm_7b_qlora_moss_sft_plugins_e1.py
        │   │   ├── internlm_7b_qlora_msagent_react_e3_gpu8.py
        │   │   ├── internlm_7b_qlora_oasst1_512_e3.py
        │   │   ├── internlm_7b_qlora_oasst1_e3.py
        │   │   ├── internlm_7b_qlora_oasst1_e3_hf.py
        │   │   ├── internlm_7b_qlora_oasst1_mmlu_e3.py
        │   │   ├── internlm_7b_qlora_open_platypus_e3.py
        │   │   ├── internlm_7b_qlora_openorca_e1.py
        │   │   ├── internlm_7b_qlora_sql_e3.py
        │   │   └── internlm_7b_qlora_tiny_codes_e1.py
        │   ├── internlm_chat_20b
        │   │   ├── internlm_chat_20b_qlora_alpaca_e3.py
        │   │   ├── internlm_chat_20b_qlora_alpaca_enzh_e3.py
        │   │   ├── internlm_chat_20b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── internlm_chat_20b_qlora_alpaca_zh_e3.py
        │   │   ├── internlm_chat_20b_qlora_code_alpaca_e3.py
        │   │   ├── internlm_chat_20b_qlora_lawyer_e3.py
        │   │   ├── internlm_chat_20b_qlora_oasst1_512_e3.py
        │   │   ├── internlm_chat_20b_qlora_oasst1_e3.py
        │   │   └── internlm_chat_20b_qlora_open_platypus_e3.py
        │   └── internlm_chat_7b
        │   │   ├── internlm_chat_7b_qlora_alpaca_e3.py
        │   │   ├── internlm_chat_7b_qlora_alpaca_enzh_e3.py
        │   │   ├── internlm_chat_7b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── internlm_chat_7b_qlora_alpaca_zh_e3.py
        │   │   ├── internlm_chat_7b_qlora_arxiv_gentitle_e3.py
        │   │   ├── internlm_chat_7b_qlora_code_alpaca_e3.py
        │   │   ├── internlm_chat_7b_qlora_colorist_e5.py
        │   │   ├── internlm_chat_7b_qlora_lawyer_e3.py
        │   │   ├── internlm_chat_7b_qlora_medical_e1.py
        │   │   ├── internlm_chat_7b_qlora_oasst1_512_e3.py
        │   │   ├── internlm_chat_7b_qlora_oasst1_e3.py
        │   │   ├── internlm_chat_7b_qlora_open_platypus_e3.py
        │   │   ├── internlm_chat_7b_qlora_openorca_e1.py
        │   │   ├── internlm_chat_7b_qlora_sql_e3.py
        │   │   └── internlm_chat_7b_qlora_tiny_codes_e1.py
        ├── internvl
        │   ├── README.md
        │   ├── README_zh-CN.md
        │   ├── v1_5
        │   │   ├── convert_to_official.py
        │   │   ├── internvl_v1_5_internlm2_26b_finetune.py
        │   │   ├── internvl_v1_5_internlm2_26b_lora_finetune.py
        │   │   ├── internvl_v1_5_internlm2_26b_qlora_finetune.py
        │   │   ├── internvl_v1_5_internlm2_2b_finetune.py
        │   │   ├── internvl_v1_5_internlm2_2b_lora_finetune.py
        │   │   ├── internvl_v1_5_internlm2_2b_qlora_finetune.py
        │   │   ├── internvl_v1_5_phi3_4b_finetune.py
        │   │   ├── internvl_v1_5_phi3_4b_lora_finetune.py
        │   │   └── internvl_v1_5_phi3_4b_qlora_finetune.py
        │   └── v2
        │   │   ├── internvl_v2_internlm2_26b_finetune.py
        │   │   ├── internvl_v2_internlm2_26b_lora_finetune.py
        │   │   ├── internvl_v2_internlm2_26b_qlora_finetune.py
        │   │   ├── internvl_v2_internlm2_2b_finetune.py
        │   │   ├── internvl_v2_internlm2_2b_lora_finetune.py
        │   │   ├── internvl_v2_internlm2_2b_qlora_finetune.py
        │   │   ├── internvl_v2_internlm2_5_8b_finetune.py
        │   │   ├── internvl_v2_internlm2_5_8b_lora_finetune.py
        │   │   ├── internvl_v2_internlm2_5_8b_qlora_finetune.py
        │   │   ├── internvl_v2_phi3_4b_finetune.py
        │   │   ├── internvl_v2_phi3_4b_lora_finetune.py
        │   │   └── internvl_v2_phi3_4b_qlora_finetune.py
        ├── llama
        │   ├── llama2_70b
        │   │   ├── llama2_70b_full_wizardlm_e1.py
        │   │   ├── llama2_70b_int8_lora_open_platypus_e1.py
        │   │   ├── llama2_70b_int8_lora_open_platypus_e1_hf.py
        │   │   ├── llama2_70b_qlora_open_platypus_e1.py
        │   │   └── llama2_70b_qlora_open_platypus_e1_hf.py
        │   ├── llama2_7b
        │   │   ├── llama2_7b_full_pgbooks_400iters_sp1.py
        │   │   ├── llama2_7b_full_pgbooks_400iters_sp4.py
        │   │   ├── llama2_7b_full_wizardlm_e1.py
        │   │   ├── llama2_7b_qlora_alpaca_e3.py
        │   │   ├── llama2_7b_qlora_alpaca_enzh_e3.py
        │   │   ├── llama2_7b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── llama2_7b_qlora_alpaca_zh_e3.py
        │   │   ├── llama2_7b_qlora_arxiv_gentitle_e3.py
        │   │   ├── llama2_7b_qlora_code_alpaca_e3.py
        │   │   ├── llama2_7b_qlora_colorist_e5.py
        │   │   ├── llama2_7b_qlora_lawyer_e3.py
        │   │   ├── llama2_7b_qlora_medical_e1.py
        │   │   ├── llama2_7b_qlora_moss_sft_all_e1.py
        │   │   ├── llama2_7b_qlora_moss_sft_all_e2_gpu8.py
        │   │   ├── llama2_7b_qlora_moss_sft_plugins_e1.py
        │   │   ├── llama2_7b_qlora_msagent_react_e3_gpu8.py
        │   │   ├── llama2_7b_qlora_oasst1_512_e3.py
        │   │   ├── llama2_7b_qlora_oasst1_e3.py
        │   │   ├── llama2_7b_qlora_open_platypus_e3.py
        │   │   ├── llama2_7b_qlora_openorca_e1.py
        │   │   ├── llama2_7b_qlora_sql_e3.py
        │   │   └── llama2_7b_qlora_tiny_codes_e1.py
        │   ├── llama2_7b_chat
        │   │   ├── llama2_7b_chat_qlora_alpaca_e3.py
        │   │   ├── llama2_7b_chat_qlora_alpaca_enzh_e3.py
        │   │   ├── llama2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── llama2_7b_chat_qlora_alpaca_zh_e3.py
        │   │   ├── llama2_7b_chat_qlora_arxiv_gentitle_e3.py
        │   │   ├── llama2_7b_chat_qlora_code_alpaca_e3.py
        │   │   ├── llama2_7b_chat_qlora_colorist_e5.py
        │   │   ├── llama2_7b_chat_qlora_lawyer_e3.py
        │   │   ├── llama2_7b_chat_qlora_medical_e1.py
        │   │   ├── llama2_7b_chat_qlora_oasst1_512_e3.py
        │   │   ├── llama2_7b_chat_qlora_oasst1_e3.py
        │   │   ├── llama2_7b_chat_qlora_open_platypus_e3.py
        │   │   ├── llama2_7b_chat_qlora_openorca_e1.py
        │   │   ├── llama2_7b_chat_qlora_sql_e3.py
        │   │   └── llama2_7b_chat_qlora_tiny_codes_e1.py
        │   ├── llama3_70b_instruct
        │   │   └── llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py
        │   ├── llama3_8b
        │   │   ├── README.md
        │   │   └── llama3_8b_full_alpaca_e3.py
        │   ├── llama3_8b_instruct
        │   │   ├── llama3_8b_instruct_full_alpaca_e3.py
        │   │   └── llama3_8b_instruct_qlora_alpaca_e3.py
        │   └── llama_7b
        │   │   ├── llama_7b_qlora_alpaca_e3.py
        │   │   ├── llama_7b_qlora_alpaca_enzh_e3.py
        │   │   ├── llama_7b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   ├── llama_7b_qlora_alpaca_zh_e3.py
        │   │   ├── llama_7b_qlora_arxiv_gentitle_e3.py
        │   │   ├── llama_7b_qlora_code_alpaca_e3.py
        │   │   ├── llama_7b_qlora_colorist_e5.py
        │   │   ├── llama_7b_qlora_lawyer_e3.py
        │   │   ├── llama_7b_qlora_medical_e1.py
        │   │   ├── llama_7b_qlora_moss_sft_all_e1.py
        │   │   ├── llama_7b_qlora_moss_sft_all_e2_gpu8.py
        │   │   ├── llama_7b_qlora_moss_sft_plugins_e1.py
        │   │   ├── llama_7b_qlora_oasst1_512_e3.py
        │   │   ├── llama_7b_qlora_oasst1_e3.py
        │   │   ├── llama_7b_qlora_open_platypus_e3.py
        │   │   ├── llama_7b_qlora_openorca_e1.py
        │   │   ├── llama_7b_qlora_sql_e3.py
        │   │   └── llama_7b_qlora_tiny_codes_e1.py
        ├── llama_speed_benchmark
        │   ├── llama2_70b
        │   │   ├── llama2_70b_full_alpaca_enzh_128k_sp8.py
        │   │   ├── llama2_70b_full_alpaca_enzh_256k_sp16.py
        │   │   ├── llama2_70b_full_alpaca_enzh_32k_sp4.py
        │   │   └── llama2_70b_full_alpaca_enzh_8k_sp1.py
        │   ├── llama2_7b
        │   │   ├── llama2_7b_full_alpaca_enzh_128k_sp8.py
        │   │   ├── llama2_7b_full_alpaca_enzh_1M_sp16.py
        │   │   ├── llama2_7b_full_alpaca_enzh_256k_sp8.py
        │   │   ├── llama2_7b_full_alpaca_enzh_32k_sp1.py
        │   │   └── llama2_7b_full_alpaca_enzh_8k_sp1.py
        │   └── yi_34b
        │   │   ├── yi_34b_200k_full_alpaca_enzh_128k_sp8.py
        │   │   ├── yi_34b_200k_full_alpaca_enzh_256k_sp8.py
        │   │   ├── yi_34b_200k_full_alpaca_enzh_32k_sp2.py
        │   │   └── yi_34b_200k_full_alpaca_enzh_8k_sp1.py
        ├── llava
        │   ├── README.md
        │   ├── README_zh-CN.md
        │   ├── internlm2_chat_1_8b_clip_vit_large_p14_336
        │   │   ├── finetune
        │   │   │   └── llava_internlm2_chat_1_8b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │   └── pretrain
        │   │   │   └── llava_internlm2_chat_1_8b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   ├── internlm2_chat_20b_clip_vit_large_p14_336
        │   │   ├── finetune
        │   │   │   ├── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_finetune.py
        │   │   │   └── llava_internlm2_chat_20b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │   └── pretrain
        │   │   │   └── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   ├── internlm2_chat_7b_clip_vit_large_p14_336
        │   │   ├── finetune
        │   │   │   ├── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_finetune.py
        │   │   │   └── llava_internlm2_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │   └── pretrain
        │   │   │   └── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   ├── internlm_chat_7b_clip_vit_large_p14_336
        │   │   ├── finetune
        │   │   │   └── llava_internlm_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │   └── pretrain
        │   │   │   └── llava_internlm_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   ├── llama3_70b_instruct_clip_vit_large_p14_336
        │   │   └── pretrain
        │   │   │   └── llava_llama3_70b_instruct_quant_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   ├── llama3_8b_instruct_clip_vit_large_p14_336
        │   │   ├── README.md
        │   │   ├── convert_xtuner_weights_to_hf.py
        │   │   ├── convert_xtuner_weights_to_llava.py
        │   │   ├── finetune
        │   │   │   ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
        │   │   │   ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │   │   ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_internvl_finetune.py
        │   │   │   └── llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_e1_gpu1_finetune.py
        │   │   └── pretrain
        │   │   │   ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   │   │   ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
        │   │   │   └── llava_llama3_8b_instruct_quant_clip_vit_large_p14_336_e1_gpu1_pretrain.py
        │   ├── official
        │   │   ├── llava_v15_13b
        │   │   │   ├── llava_v15_13b_finetune.py
        │   │   │   ├── llava_v15_13b_finetune_lora.py
        │   │   │   └── llava_v15_13b_pretrain.py
        │   │   └── llava_v15_7b
        │   │   │   ├── llava_v15_7b_finetune.py
        │   │   │   ├── llava_v15_7b_finetune_lora.py
        │   │   │   └── llava_v15_7b_pretrain.py
        │   ├── phi3_mini_4k_instruct_clip_vit_large_p14_336
        │   │   ├── README.md
        │   │   ├── convert_phi_to_llama.py
        │   │   ├── convert_xtuner_weights_to_hf.py
        │   │   ├── convert_xtuner_weights_to_llava.py
        │   │   ├── finetune
        │   │   │   ├── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
        │   │   │   └── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_full_e2_gpu8_internvl_finetune.py
        │   │   └── pretrain
        │   │   │   ├── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   │   │   └── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
        │   ├── vicuna_13b_v15_clip_vit_large_p14_336
        │   │   ├── finetune
        │   │   │   └── llava_vicuna_13b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │   └── pretrain
        │   │   │   └── llava_vicuna_13b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        │   └── vicuna_7b_v15_clip_vit_large_p14_336
        │   │   ├── finetune
        │   │       ├── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
        │   │       └── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune_refcoco.py
        │   │   └── pretrain
        │   │       └── llava_vicuna_7b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
        ├── minicpm
        │   ├── 1_2b
        │   │   ├── minicpm_1b_dpo_qlora.py
        │   │   ├── minicpm_1b_full_alpaca_zh_e3.py
        │   │   ├── minicpm_1b_lora_alpaca_zh_e3.py
        │   │   ├── minicpm_1b_qlora_alpaca_enzh_e3.py
        │   │   └── minicpm_1b_qlora_alpaca_zh_e3.py
        │   ├── 2b
        │   │   ├── minicpm_2b_dpo_qlora.py
        │   │   ├── minicpm_2b_full_alpaca_zh_e3.py
        │   │   ├── minicpm_2b_lora_alpaca_zh_e3.py
        │   │   ├── minicpm_2b_qlora_alpaca_enzh_e3.py
        │   │   └── minicpm_2b_qlora_alpaca_zh_e3.py
        │   └── minicpm3_4b
        │   │   ├── minicpm3_4b_dpo_qlora.py
        │   │   └── minicpm3_4b_full_alpaca_zh_e3.py
        ├── mistral
        │   ├── mistral_7b_full_finetune_custom_dataset_e1.py
        │   ├── mistral_7b_qlora_skypile_pretrain_e1.py
        │   ├── mistral_7b_w_tokenized_dataset.py
        │   └── mistral_7b_w_untokenized_dataset.py
        ├── mixtral
        │   ├── README.md
        │   ├── mixtral_8x7b
        │   │   ├── mixtral_8x7b_full_oasst1_e3.py
        │   │   └── mixtral_8x7b_qlora_oasst1_e3.py
        │   └── mixtral_8x7b_instruct
        │   │   ├── mixtral_8x7b_instruct_full_oasst1_e3.py
        │   │   └── mixtral_8x7b_instruct_qlora_oasst1_e3.py
        ├── orpo
        │   ├── internlm
        │   │   ├── internlm2_chat_1_8b_orpo_full.py
        │   │   ├── internlm2_chat_1_8b_orpo_full_varlenattn.py
        │   │   ├── internlm2_chat_1_8b_orpo_full_varlenattn_jsonl_dataset.py
        │   │   └── internlm2_chat_7b_orpo_qlora_varlenattn_ultrafeedback_e5.py
        │   └── llama
        │   │   └── llama3_8b_instruct_orpo_qlora_varlenattn_ultrafeedback_e5.py
        ├── phi
        │   └── phi3
        │   │   ├── phi3_mini_128k_instruct_full_alpaca_e3.py
        │   │   ├── phi3_mini_128k_instruct_qlora_alpaca_e3.py
        │   │   ├── phi3_mini_4k_instruct_full_alpaca_e3.py
        │   │   └── phi3_mini_4k_instruct_qlora_alpaca_e3.py
        ├── qwen
        │   ├── qwen1
        │   │   ├── qwen_1_8b
        │   │   │   ├── qwen_1_8b_qlora_alpaca_e3.py
        │   │   │   ├── qwen_1_8b_qlora_alpaca_enzh_e3.py
        │   │   │   ├── qwen_1_8b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   │   ├── qwen_1_8b_qlora_alpaca_zh_e3.py
        │   │   │   └── qwen_1_8b_qlora_code_alpaca_e3.py
        │   │   ├── qwen_1_8b_chat
        │   │   │   ├── qwen_1_8b_chat_qlora_alpaca_e3.py
        │   │   │   ├── qwen_1_8b_chat_qlora_alpaca_enzh_e3.py
        │   │   │   ├── qwen_1_8b_chat_qlora_alpaca_enzh_oasst1_e3.py
        │   │   │   ├── qwen_1_8b_chat_qlora_alpaca_zh_e3.py
        │   │   │   └── qwen_1_8b_chat_qlora_code_alpaca_e3.py
        │   │   ├── qwen_72b
        │   │   │   ├── qwen_72b_qlora_alpaca_e3.py
        │   │   │   ├── qwen_72b_qlora_alpaca_enzh_e3.py
        │   │   │   ├── qwen_72b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   │   ├── qwen_72b_qlora_alpaca_zh_e3.py
        │   │   │   └── qwen_72b_qlora_code_alpaca_e3.py
        │   │   ├── qwen_7b
        │   │   │   ├── qwen_7b_qlora_alpaca_e3.py
        │   │   │   ├── qwen_7b_qlora_alpaca_enzh_e3.py
        │   │   │   ├── qwen_7b_qlora_alpaca_enzh_oasst1_e3.py
        │   │   │   ├── qwen_7b_qlora_alpaca_zh_e3.py
        │   │   │   ├── qwen_7b_qlora_arxiv_gentitle_e3.py
        │   │   │   ├── qwen_7b_qlora_code_alpaca_e3.py
        │   │   │   ├── qwen_7b_qlora_colorist_e5.py
        │   │   │   ├── qwen_7b_qlora_lawyer_e3.py
        │   │   │   ├── qwen_7b_qlora_medical_e1.py
        │   │   │   ├── qwen_7b_qlora_moss_sft_all_e1.py
        │   │   │   ├── qwen_7b_qlora_moss_sft_all_e2_gpu8.py
        │   │   │   ├── qwen_7b_qlora_moss_sft_plugins_e1.py
        │   │   │   ├── qwen_7b_qlora_oasst1_512_e3.py
        │   │   │   ├── qwen_7b_qlora_oasst1_e3.py
        │   │   │   ├── qwen_7b_qlora_open_platypus_e3.py
        │   │   │   ├── qwen_7b_qlora_openorca_e1.py
        │   │   │   ├── qwen_7b_qlora_sql_e3.py
        │   │   │   └── qwen_7b_qlora_tiny_codes_e1.py
        │   │   └── qwen_7b_chat
        │   │   │   ├── qwen_7b_chat_qlora_alpaca_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_alpaca_enzh_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_alpaca_zh_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_arxiv_gentitle_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_code_alpaca_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_colorist_e5.py
        │   │   │   ├── qwen_7b_chat_qlora_lawyer_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_medical_e1.py
        │   │   │   ├── qwen_7b_chat_qlora_oasst1_512_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_oasst1_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_open_platypus_e3.py
        │   │   │   ├── qwen_7b_chat_qlora_openorca_e1.py
        │   │   │   ├── qwen_7b_chat_qlora_sql_e3.py
        │   │   │   └── qwen_7b_chat_qlora_tiny_codes_e1.py
        │   └── qwen1_5
        │   │   ├── qwen1_5_0_5b
        │   │       ├── qwen1_5_0_5b_full_alpaca_e3.py
        │   │       └── qwen1_5_0_5b_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_0_5b_chat
        │   │       ├── qwen1_5_0_5b_chat_full_alpaca_e3.py
        │   │       └── qwen1_5_0_5b_chat_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_110b
        │   │       ├── qwen1_5_110b_full_alpaca_e3.py
        │   │       └── qwen1_5_110b_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_110b_chat
        │   │       ├── README.md
        │   │       ├── qwen1_5_110b_chat_full_alpaca_e3.py
        │   │       ├── qwen1_5_110b_chat_qlora_alpaca_e3.py
        │   │       └── qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py
        │   │   ├── qwen1_5_14b
        │   │       ├── qwen1_5_14b_full_alpaca_e3.py
        │   │       └── qwen1_5_14b_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_14b_chat
        │   │       ├── qwen1_5_14b_chat_full_alpaca_e3.py
        │   │       └── qwen1_5_14b_chat_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_1_8b
        │   │       ├── qwen1_5_1_8b_full_alpaca_e3.py
        │   │       └── qwen1_5_1_8b_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_1_8b_chat
        │   │       ├── qwen1_5_1_8b_chat_full_alpaca_e3.py
        │   │       └── qwen1_5_1_8b_chat_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_4b
        │   │       ├── qwen1_5_4b_full_alpaca_e3.py
        │   │       ├── qwen1_5_4b_qlora_alpaca_e3.py
        │   │       └── qwen1_5_4b_qlora_alpaca_e3_openmind.py
        │   │   ├── qwen1_5_4b_chat
        │   │       ├── qwen1_5_4b_chat_full_alpaca_e3.py
        │   │       └── qwen1_5_4b_chat_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_72b
        │   │       ├── qwen1_5_72b_full_alpaca_e3.py
        │   │       └── qwen1_5_72b_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_72b_chat
        │   │       ├── qwen1_5_72b_chat_full_alpaca_e3.py
        │   │       └── qwen1_5_72b_chat_qlora_alpaca_e3.py
        │   │   ├── qwen1_5_7b
        │   │       ├── qwen1_5_7b_full_alpaca_e3.py
        │   │       └── qwen1_5_7b_qlora_alpaca_e3.py
        │   │   └── qwen1_5_7b_chat
        │   │       ├── qwen1_5_7b_chat_full_alpaca_e3.py
        │   │       └── qwen1_5_7b_chat_qlora_alpaca_e3.py
        ├── qwen_moe
        │   └── qwen1_5
        │   │   └── qwen1_5_moe_a2_7_b_chat
        │   │       └── qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py
        ├── reward_model
        │   ├── internlm
        │   │   ├── internlm2_chat_1_8b_reward_full_ultrafeedback.py
        │   │   ├── internlm2_chat_1_8b_reward_full_varlenattn_jsonl_dataset.py
        │   │   ├── internlm2_chat_1_8b_reward_full_varlenattn_ultrafeedback.py
        │   │   └── internlm2_chat_1_8b_reward_qlora_varlenattn_ultrafeedback.py
        │   └── llama
        │   │   └── llama3_8b_instruct_reward_full_varlenattn_ultrafeedback.py
        ├── starcoder
        │   └── starcoder_qlora_stack_exchange_example.py
        ├── yi
        │   ├── yi_34b
        │   │   └── yi_34b_qlora_alpaca_enzh_e3.py
        │   └── yi_6b
        │   │   └── yi_6b_qlora_alpaca_enzh_e3.py
        └── zephyr
        │   └── zephyr_7b_beta_qlora_alpaca_e3.py
    ├── dataset
        ├── __init__.py
        ├── collate_fns
        │   ├── __init__.py
        │   ├── default_collate_fn.py
        │   ├── mmlu_collate_fn.py
        │   └── preference_collate_fn.py
        ├── concat_dataset.py
        ├── huggingface.py
        ├── intern_repo.py
        ├── internvl_dataset.py
        ├── json_dataset.py
        ├── llava.py
        ├── map_fns
        │   ├── __init__.py
        │   ├── dataset_map_fns
        │   │   ├── __init__.py
        │   │   ├── alpaca_map_fn.py
        │   │   ├── alpaca_zh_map_fn.py
        │   │   ├── arxiv_map_fn.py
        │   │   ├── code_alpaca_map_fn.py
        │   │   ├── colors_map_fn.py
        │   │   ├── crime_kg_assitant_map_fn.py
        │   │   ├── default_map_fn.py
        │   │   ├── law_reference_map_fn.py
        │   │   ├── llava_map_fn.py
        │   │   ├── medical_map_fn.py
        │   │   ├── msagent_map_fn.py
        │   │   ├── oasst1_map_fn.py
        │   │   ├── openai_map_fn.py
        │   │   ├── openorca_map_fn.py
        │   │   ├── pretrain_map_fn.py
        │   │   ├── sql_map_fn.py
        │   │   ├── stack_exchange_map_fn.py
        │   │   ├── tiny_codes_map_fn.py
        │   │   └── wizardlm_map_fn.py
        │   └── template_map_fn.py
        ├── modelscope.py
        ├── moss_sft.py
        ├── preference_dataset.py
        ├── refcoco_json.py
        ├── samplers
        │   ├── __init__.py
        │   ├── intern_repo.py
        │   └── length_grouped.py
        └── utils.py
    ├── engine
        ├── __init__.py
        ├── _strategy
        │   ├── __init__.py
        │   └── deepspeed.py
        ├── hooks
        │   ├── __init__.py
        │   ├── dataset_info_hook.py
        │   ├── evaluate_chat_hook.py
        │   ├── hf_checkpoint_hook.py
        │   ├── throughput_hook.py
        │   └── varlen_attn_args_to_messagehub_hook.py
        └── runner
        │   ├── __init__.py
        │   └── loops.py
    ├── entry_point.py
    ├── evaluation
        ├── __init__.py
        └── metrics
        │   ├── __init__.py
        │   ├── mmlu_metric.py
        │   └── reward_metric.py
    ├── model
        ├── __init__.py
        ├── dpo.py
        ├── internvl.py
        ├── llava.py
        ├── modules
        │   ├── __init__.py
        │   ├── dispatch
        │   │   ├── __init__.py
        │   │   ├── attention.py
        │   │   ├── baichuan.py
        │   │   ├── cohere.py
        │   │   ├── deepseek_v2.py
        │   │   ├── internlm.py
        │   │   ├── internlm2.py
        │   │   ├── internlm3.py
        │   │   ├── llama.py
        │   │   ├── mistral.py
        │   │   ├── phi3.py
        │   │   ├── qwen2.py
        │   │   ├── triton_kernels
        │   │   │   ├── __init__.py
        │   │   │   ├── layer_norm.py
        │   │   │   ├── rms_norm.py
        │   │   │   └── rotary.py
        │   │   ├── utils.py
        │   │   └── yi.py
        │   └── projector
        │   │   ├── __init__.py
        │   │   ├── configuration_projector.py
        │   │   └── modeling_projector.py
        ├── orpo.py
        ├── reward.py
        ├── sft.py
        ├── transformers_models
        │   ├── __init__.py
        │   ├── deepseek_v2
        │   │   ├── __init__.py
        │   │   ├── configuration_deepseek.py
        │   │   ├── modeling_deepseek.py
        │   │   └── tokenization_deepseek_fast.py
        │   └── mixtral
        │   │   ├── __init__.py
        │   │   ├── configuration_mixtral.py
        │   │   └── modeling_mixtral.py
        └── utils.py
    ├── parallel
        ├── __init__.py
        └── sequence
        │   ├── __init__.py
        │   ├── attention.py
        │   ├── comm.py
        │   ├── data_collate.py
        │   ├── reduce_loss.py
        │   ├── sampler.py
        │   └── setup_distributed.py
    ├── registry.py
    ├── tools
        ├── chat.py
        ├── check_custom_dataset.py
        ├── copy_cfg.py
        ├── data_preprocess
        │   ├── arxiv.py
        │   └── convert_refcoco.py
        ├── eval_refcoco.py
        ├── get_data_order.py
        ├── list_cfg.py
        ├── list_dataset_format.py
        ├── log_dataset.py
        ├── mmbench.py
        ├── model_converters
        │   ├── merge.py
        │   ├── modeling_internlm2_reward
        │   │   ├── __init__.py
        │   │   ├── configuration_internlm2.py
        │   │   └── modeling_internlm2.py
        │   ├── pth_to_hf.py
        │   └── split.py
        ├── plugins
        │   ├── __init__.py
        │   ├── api.py
        │   ├── calculate.py
        │   ├── search.py
        │   └── solve.py
        ├── process_untokenized_datasets.py
        ├── process_untokenized_datasets_legacy.py
        ├── process_untokenized_llava_data.py
        ├── test.py
        ├── tokenize_ftdp_datasets.py
        ├── train.py
        └── utils.py
    ├── utils
        ├── __init__.py
        ├── constants.py
        ├── device.py
        ├── fileio.py
        ├── handle_moe_load_and_save.py
        ├── stop_criteria.py
        ├── templates.py
        └── zero_to_any_dtype.py
    └── version.py


/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: deploy
 2 | 
 3 | on: push
 4 | 
 5 | concurrency:
 6 |   group: ${{ github.workflow }}-${{ github.ref }}
 7 |   cancel-in-progress: true
 8 | 
 9 | jobs:
10 |   build-n-publish:
11 |     runs-on: ubuntu-latest
12 |     if: startsWith(github.event.ref, 'refs/tags')
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - name: Set up Python 3.8
16 |         uses: actions/setup-python@v2
17 |         with:
18 |           python-version: 3.8
19 |       - name: Build XTuner
20 |         run: |
21 |           pip install wheel
22 |           python setup.py sdist bdist_wheel
23 |       - name: Publish distribution to PyPI
24 |         run: |
25 |           pip install twine
26 |           twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
27 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | concurrency:
 6 |   group: ${{ github.workflow }}-${{ github.ref }}
 7 |   cancel-in-progress: true
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Set up Python 3.8
15 |         uses: actions/setup-python@v2
16 |         with:
17 |           python-version: 3.8
18 |       - name: Install pre-commit hook
19 |         run: |
20 |           pip install pre-commit
21 |           pre-commit install
22 |       - name: Linting
23 |         run: pre-commit run --all-files
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/*/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # custom
107 | data/
108 | data
109 | .vscode
110 | .idea
111 | .DS_Store
112 | *.pkl
113 | *.pkl.json
114 | *.log.json
115 | work_dirs/
116 | 
117 | # Pytorch
118 | *.pth
119 | *.py~
120 | *.sh~
121 | 
122 | # srun
123 | *.out
124 | batchscript-*
125 | 


--------------------------------------------------------------------------------
/.owners.yml:
--------------------------------------------------------------------------------
1 | assign:
2 |   issues: disabled
3 |   pull_requests: disabled
4 |   strategy:
5 |     random
6 |     # daily-shift-based
7 |   schedule:
8 |     '*/1 * * * *'
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: ^tests/data/|^xtuner/model/transformers_models/|^xtuner/tools/model_converters/modeling_internlm2_reward/|^xtuner/_lite/modelings/|^xtuner/_lite/accelerate/dispatches/huggingface/
 2 | repos:
 3 |   - repo: https://github.com/PyCQA/flake8
 4 |     rev: 5.0.4
 5 |     hooks:
 6 |       - id: flake8
 7 |         args: ["--max-line-length=119"]
 8 |   - repo: https://github.com/PyCQA/isort
 9 |     rev: 5.12.0
10 |     hooks:
11 |       - id: isort
12 |   - repo: https://github.com/pre-commit/pre-commit-hooks
13 |     rev: v5.0.0
14 |     hooks:
15 |       - id: check-yaml
16 |       - id: requirements-txt-fixer
17 |       - id: check-merge-conflict
18 |       - id: fix-encoding-pragma
19 |         args: ["--remove"]
20 |       - id: mixed-line-ending
21 |         args: ["--fix=lf"]
22 |   - repo: https://github.com/codespell-project/codespell
23 |     rev: v2.2.1
24 |     hooks:
25 |       - id: codespell
26 |   - repo: https://github.com/executablebooks/mdformat
27 |     rev: 0.7.9
28 |     hooks:
29 |       - id: mdformat
30 |         args: ["--number"]
31 |         additional_dependencies:
32 |           - mdformat-openmmlab
33 |           - mdformat_frontmatter
34 |           - linkify-it-py
35 |         exclude: 'docs/zh_cn/user_guides/sequence_parallel.md'
36 |   - repo: https://github.com/myint/docformatter
37 |     rev: v1.3.1
38 |     hooks:
39 |       - id: docformatter
40 |         args: ["--in-place", "--wrap-descriptions", "119"]
41 |   - repo: https://github.com/open-mmlab/pre-commit-hooks
42 |     rev: v0.4.0
43 |     hooks:
44 |       - id: check-copyright
45 |         args: ["xtuner", "--excludes", "xtuner/_lite/modelings/", "xtuner/model/transformers_models/"]
46 |       - id: remove-improper-eol-in-cn-docs
47 |   - repo: https://github.com/asottile/pyupgrade
48 |     rev: v3.0.0
49 |     hooks:
50 |       - id: pyupgrade
51 |         args: ["--py36-plus"]
52 | 
53 |   - repo: https://github.com/psf/black
54 |     rev: 23.9.1
55 |     hooks:
56 |       - id: black
57 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include xtuner/configs *.py *.yml *.json
2 | recursive-include xtuner/tools *.sh *.py
3 | 


--------------------------------------------------------------------------------
/docs/en/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.8"
 7 | 
 8 | formats:
 9 |   - epub
10 | 
11 | python:
12 |   install:
13 |     - requirements: requirements/docs.txt
14 | 
15 | sphinx:
16 |   configuration: docs/en/conf.py
17 | 


--------------------------------------------------------------------------------
/docs/en/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/en/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 |     background-image: url("../image/logo.png");
3 |     background-size: 177px 40px;
4 |     height: 40px;
5 |     width: 177px;
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/en/_static/image/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/en/_static/image/logo.png


--------------------------------------------------------------------------------
/docs/en/acceleration/benchmark.rst:
--------------------------------------------------------------------------------
1 | Benchmark
2 | =========
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/deepspeed.rst:
--------------------------------------------------------------------------------
1 | DeepSpeed
2 | =========
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/flash_attn.rst:
--------------------------------------------------------------------------------
1 | Flash Attention
2 | ===============
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/hyper_parameters.rst:
--------------------------------------------------------------------------------
1 | HyperParameters
2 | ===============
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/length_grouped_sampler.rst:
--------------------------------------------------------------------------------
1 | Length Grouped Sampler
2 | ======================
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/pack_to_max_length.rst:
--------------------------------------------------------------------------------
1 | Pack to Max Length
2 | ==================
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/train_extreme_long_sequence.rst:
--------------------------------------------------------------------------------
1 | Train Extreme Long Sequence
2 | ===========================
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/train_large_scale_dataset.rst:
--------------------------------------------------------------------------------
1 | Train Large-scale Dataset
2 | =========================
3 | 


--------------------------------------------------------------------------------
/docs/en/acceleration/varlen_flash_attn.rst:
--------------------------------------------------------------------------------
1 | Varlen Flash Attention
2 | ======================
3 | 


--------------------------------------------------------------------------------
/docs/en/chat/agent.md:
--------------------------------------------------------------------------------
1 | # Chat with Agent
2 | 


--------------------------------------------------------------------------------
/docs/en/chat/llm.md:
--------------------------------------------------------------------------------
1 | # Chat with LLM
2 | 


--------------------------------------------------------------------------------
/docs/en/chat/lmdeploy.md:
--------------------------------------------------------------------------------
1 | # Accelerate chat by LMDeploy
2 | 


--------------------------------------------------------------------------------
/docs/en/chat/vlm.md:
--------------------------------------------------------------------------------
1 | # Chat with VLM
2 | 


--------------------------------------------------------------------------------
/docs/en/evaluation/hook.md:
--------------------------------------------------------------------------------
1 | # Evaluation during training
2 | 


--------------------------------------------------------------------------------
/docs/en/evaluation/mmbench.md:
--------------------------------------------------------------------------------
1 | # MMBench (VLM)
2 | 


--------------------------------------------------------------------------------
/docs/en/evaluation/mmlu.md:
--------------------------------------------------------------------------------
1 | # MMLU (LLM)
2 | 


--------------------------------------------------------------------------------
/docs/en/evaluation/opencompass.md:
--------------------------------------------------------------------------------
1 | # Evaluate with OpenCompass
2 | 


--------------------------------------------------------------------------------
/docs/en/get_started/installation.md:
--------------------------------------------------------------------------------
 1 | ### Installation
 2 | 
 3 | In this section, we will show you how to install XTuner.
 4 | 
 5 | ## Installation Process
 6 | 
 7 | We recommend users to follow our best practices for installing XTuner.
 8 | It is recommended to use a conda virtual environment with Python-3.10 to install XTuner.
 9 | 
10 | ### Best Practices
11 | 
12 | **Step 0.** Create a Python-3.10 virtual environment using conda.
13 | 
14 | ```shell
15 | conda create --name xtuner-env python=3.10 -y
16 | conda activate xtuner-env
17 | ```
18 | 
19 | **Step 1.** Install XTuner.
20 | 
21 | Case a: Install XTuner via pip:
22 | 
23 | ```shell
24 | pip install -U xtuner
25 | ```
26 | 
27 | Case b: Install XTuner with DeepSpeed integration:
28 | 
29 | ```shell
30 | pip install -U 'xtuner[deepspeed]'
31 | ```
32 | 
33 | Case c: Install XTuner from the source code:
34 | 
35 | ```shell
36 | git clone https://github.com/InternLM/xtuner.git
37 | cd xtuner
38 | pip install -e '.[all]'
39 | # "-e" indicates installing the project in editable mode, so any local modifications to the code will take effect without reinstalling.
40 | ```
41 | 
42 | ## Verify the installation
43 | 
44 | To verify if XTuner is installed correctly, we will use a command to print the configuration files.
45 | 
46 | **Print Configuration Files:** Use the command `xtuner list-cfg` in the command line to verify if the configuration files can be printed.
47 | 
48 | ```shell
49 | xtuner list-cfg
50 | ```
51 | 
52 | You should see a list of XTuner configuration files, corresponding to the ones in [xtuner/configs](https://github.com/InternLM/xtuner/tree/main/xtuner/configs) in the source code.
53 | 


--------------------------------------------------------------------------------
/docs/en/get_started/overview.md:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | This chapter introduces you to the framework and workflow of XTuner, and provides detailed tutorial links.
4 | 
5 | ## What is XTuner
6 | 


--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case1.rst:
--------------------------------------------------------------------------------
1 | Case 1
2 | ======
3 | 


--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case2.rst:
--------------------------------------------------------------------------------
1 | Case 2
2 | ======
3 | 


--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case3.rst:
--------------------------------------------------------------------------------
1 | Case 3
2 | ======
3 | 


--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case4.rst:
--------------------------------------------------------------------------------
1 | Case 4
2 | ======
3 | 


--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/ftdp.rst:
--------------------------------------------------------------------------------
1 | ftdp
2 | ====
3 | 


--------------------------------------------------------------------------------
/docs/en/internevo_migration/internevo_migration.rst:
--------------------------------------------------------------------------------
1 | InternEVO Migration
2 | ===================
3 | 


--------------------------------------------------------------------------------
/docs/en/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/en/models/supported.md:
--------------------------------------------------------------------------------
1 | # Supported Models
2 | 


--------------------------------------------------------------------------------
/docs/en/notes/changelog.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | 
 3 | ## vX.X.X (YYYY.MM.DD)
 4 | 
 5 | ### Highlights
 6 | 
 7 | ### New Features & Improvements
 8 | 
 9 | ### Bug Fixes
10 | 
11 | ### Contributors
12 | 
13 | -->
14 | 
15 | # Changelog
16 | 
17 | ## v0.1.0 (2023.08.30)
18 | 
19 | XTuner is released! 🔥🔥🔥
20 | 
21 | ### Highlights
22 | 
23 | - XTuner supports LLM fine-tuning on consumer-grade GPUs. The minimum GPU memory required for 7B LLM fine-tuning is only **8GB**.
24 | - XTuner supports various LLMs, datasets, algorithms and training pipelines.
25 | - Several fine-tuned adapters are released simultaneously, including various gameplays such as the colorist LLM, plugins-based LLM, and many more. For further details, please visit [XTuner on HuggingFace](https://huggingface.co/xtuner)!
26 | 


--------------------------------------------------------------------------------
/docs/en/preparation/pretrained_model.rst:
--------------------------------------------------------------------------------
1 | Pretrained Model
2 | ================
3 | 


--------------------------------------------------------------------------------
/docs/en/preparation/prompt_template.rst:
--------------------------------------------------------------------------------
1 | Prompt Template
2 | ===============
3 | 


--------------------------------------------------------------------------------
/docs/en/switch_language.md:
--------------------------------------------------------------------------------
1 | ## <a href='https://xtuner.readthedocs.io/en/latest/'>English</a>
2 | 
3 | ## <a href='https://xtuner.readthedocs.io/zh_CN/latest/'>简体中文</a>
4 | 


--------------------------------------------------------------------------------
/docs/en/training/custom_agent_dataset.rst:
--------------------------------------------------------------------------------
1 | Custom Agent Dataset
2 | ====================
3 | 


--------------------------------------------------------------------------------
/docs/en/training/custom_pretrain_dataset.rst:
--------------------------------------------------------------------------------
1 | Custom Pretrain Dataset
2 | =======================
3 | 


--------------------------------------------------------------------------------
/docs/en/training/custom_sft_dataset.rst:
--------------------------------------------------------------------------------
1 | Custom SFT Dataset
2 | ==================
3 | 


--------------------------------------------------------------------------------
/docs/en/training/modify_settings.rst:
--------------------------------------------------------------------------------
1 | Modify Settings
2 | ===============
3 | 


--------------------------------------------------------------------------------
/docs/en/training/multi_modal_dataset.rst:
--------------------------------------------------------------------------------
1 | Multi-modal Dataset
2 | ===================
3 | 


--------------------------------------------------------------------------------
/docs/en/training/open_source_dataset.rst:
--------------------------------------------------------------------------------
1 | Open Source Datasets
2 | ====================
3 | 


--------------------------------------------------------------------------------
/docs/en/training/visualization.rst:
--------------------------------------------------------------------------------
1 | Visualization
2 | =============
3 | 


--------------------------------------------------------------------------------
/docs/zh_cn/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.8"
 7 | 
 8 | formats:
 9 |   - epub
10 | 
11 | python:
12 |   install:
13 |     - requirements: requirements/docs.txt
14 | 
15 | sphinx:
16 |   configuration: docs/zh_cn/conf.py
17 | 


--------------------------------------------------------------------------------
/docs/zh_cn/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/zh_cn/_static/image/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/_static/image/logo.png


--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/flash_attn.rst:
--------------------------------------------------------------------------------
 1 | .. _flash_attn:
 2 | 
 3 | Flash Attention
 4 | ==================================================
 5 | 
 6 | Flash Attention (Flash Attention 2) 是一种用于加速 Transformer 模型中 Attention 计算，并减少其显存消耗的算法。XTuner 中 Flash Attention (Flash Attention 2) 的支持情况如下表所示：
 7 | 
 8 | .. list-table::
 9 |   :widths: 25 50
10 |   :header-rows: 1
11 | 
12 |   * - 模型
13 |     - Flash Attention 支持情况
14 |   * - baichuan 1/2
15 |     - ❌
16 |   * - chatglm 2/3
17 |     - ❌
18 |   * - deepseek
19 |     - ✅
20 |   * - gemma
21 |     - ❌
22 |   * - internlm 1/2
23 |     - ✅
24 |   * - llama 2
25 |     - ✅
26 |   * - mistral
27 |     - ✅
28 |   * - qwen 1/1.5
29 |     - ✅
30 |   * - starcoder
31 |     - ✅
32 |   * - yi
33 |     - ✅
34 |   * - zephyr
35 |     - ✅
36 | 
37 | .. note::
38 |     XTuner 会根据运行环境自动控制 Flash Attention 的使用情况 (见 `dispatch_modules <https://github.com/InternLM/xtuner/blob/59834032c82d39994c13252aea9b00011d1b2457/xtuner/model/sft.py#L90>`_)：
39 | 
40 |     .. list-table::
41 |       :widths: 50 50
42 |       :header-rows: 1
43 | 
44 |       * - 环境
45 |         - Flash Attention 使用情况
46 |       * - 安装 `flash attn <https://github.com/Dao-AILab/flash-attention>`_
47 |         - Flash Attention 2
48 |       * - 未安装 `flash attn <https://github.com/Dao-AILab/flash-attention>`_ 且 PyTorch Version <= 1.13
49 |         - No Flash Attention
50 |       * - 未安装 `flash attn <https://github.com/Dao-AILab/flash-attention>`_ 且 2.0 <= PyTorch Version <= 2.1
51 |         - Flash Attention 1
52 |       * - 未安装 `flash attn <https://github.com/Dao-AILab/flash-attention>`_ 且 PyTorch Version >= 2.2
53 |         - Flash Attention 2
54 | 
55 | .. note::
56 |     使用 XTuner 训练 QWen1/1.5 时若想使用 Flash Attention 加速，需要先安装 `flash attn <https://github.com/Dao-AILab/flash-attention>`_ （参考 `flash attn 安装 <https://github.com/Dao-AILab/flash-attention?tab=readme-ov-file#installation-and-features>`_，需要 cuda ）
57 | 


--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/hyper_parameters.rst:
--------------------------------------------------------------------------------
 1 | =====================
 2 | 调整加速策略
 3 | =====================
 4 | 
 5 | 本节将会列举 XTuner 中会影响训练速度的配置项。
 6 | 
 7 | 
 8 | max_length
 9 | -------------------
10 | 
11 | ``max_length`` 表示在数据预处理过程中，单条数据长度超过 ``max_length`` 的部分会被截断，基本所有实验都会设置该项。
12 | 
13 | pack_to_max_length
14 | ---------------------------
15 | 
16 | ``pack_to_max_length`` 用于配置是否进行\ :ref:`数据集拼接 <pack_to_max_length>` \ 。
17 | 
18 | ``pack_to_max_length = True`` 表示在数据预处理过程中将多条短数据拼接为一条长度为 ``max_length`` 的长数据，该配置可以大幅提升训练速度。
19 | 
20 | 若 ``pack_to_max_length = False``，则推荐将 ``batch_size`` 适度调大以保证训练的稳定性。
21 | 
22 | use_varlen_attn
23 | ---------------------------
24 | 
25 | ``use_varlen_attn`` 用于配置是否在训练过程中使用\ :ref:`Varlen Flash Attention <varlen_flash_attn>` \  。
26 | 
27 | 当 ``use_varlen_attn = True`` 时，要求 ``pack_to_max_length`` 也要设置为 True。在此情况下，每个 token 在注意力计算阶段仅会关注其所在短数据中的所有 tokens （而非整个序列）。
28 | 
29 | 当 ``use_varlen_attn = False`` 时，每个 token 在注意力计算阶段会关注整个序列。
30 | 
31 | max_position_embeddings
32 | ---------------------------------
33 | 
34 | 当需要扩展模型上下文窗口的大小时，需要将 ``max_position_embeddings`` 设置为期望的上下文长度。 **需要保证 max_position_embeddings 不大于 max_length。**\
35 | 
36 | 假设需要将 Llama2-7B 模型支持的上下文长度自 4k 拓展为 32k：
37 | 
38 | 1. 若训练数据集中存在较多长度接近 32k 的数据，则推荐 ``max_length = 32k, pack_to_max_length = False, use_varlen_attn = False, max_position_embeddings = 32k`` 这一配置
39 | 2. 若训练数据集中长度接近 32k 的数据量较少甚至没有时，则推荐 ``max_length = 32k, pack_to_max_length = True, use_varlen_attn = False, max_position_embeddings = 32k`` 这一配置
40 | 
41 | sequence_parallel_size
42 | -------------------------------------------
43 | 
44 | 在使用序列并行策略训练超长序列时， ``sequence_parallel_size`` 个 GPUs 会共同计算一条长序列。而 ``accumulative_counts`` 则用于控制模型参数更新的频率。
45 | 
46 | 
47 | accumulative_counts
48 | ----------------------------------------------
49 | 用于控制模型参数更新的频率；假设需要在 N 块 GPUs 上执行 ``batch_size_per_device = 1, max_length = 128k`` 的训练策略。当设置序列并行维度为 ``sequence_parallel_size`` 后，为了保证训练的等价性， ``accumulative_counts`` 需要设置为原来的 ``sequence_parallel_size`` 倍，因为 128k 长度的序列会被切分为 ``sequence_parallel_size`` 份后分发给 ``sequence_parallel_size`` 个 GPUs 进行训练， ``data_parallel_world_size`` 会变为原来的 :math:`\frac{1}{sequence\_parallel\_size}`。
50 | 


--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/length_grouped_sampler.rst:
--------------------------------------------------------------------------------
 1 | .. _length_grouped_sampler:
 2 | 
 3 | 数据分组
 4 | ========================
 5 | 
 6 | .. raw:: html
 7 | 
 8 |    <html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div align="center">
 9 |    <img src="https://github.com/InternLM/xtuner/assets/36994684/779c5429-1f3c-4158-8261-289ba16c347a" width="728" data-src="https://github.com/InternLM/xtuner/assets/36994684/779c5429-1f3c-4158-8261-289ba16c347a" onerror="this.style.display = 'none';" />
10 |    </div></body></html>
11 | 
12 | 生成式大模型（例如LLM）的训练数据往往是不定长的，这就导致同一批次（batch）内的数据长短不一。为实现并行化训练，一种常见的做法是将同一批次的数据填充到最长长度。然而，这一填充（Pad）操作会导致训练的低效。如上图，假设数据内各样本的长度分别为
13 | 2、3、7、9，期望分为2个批次进行训练，那么如果使用默认的随机采样器（左侧），数据处理阶段会引入过多的填充数据，实际效率只有65.6%。
14 | 
15 | 现阶段有两种技术方案可以解决 / 缓解这一问题（两者选其一即可，优先考虑
16 | **数据拼接技术**\ ）：
17 | 
18 | 1. 利用
19 |    **数据拼接技术**\ ，将多条数据拼接至训练支持的最大长度。这一做法可以确保同一批次内的数据长度完全一致，进而避免了填充数据所导致的训练效率降低。具体可参考
20 |    \ :ref:`数据拼接文档 <pack_to_max_length>` \ 。
21 | 
22 |    :优点: 可以合并多个数据样本，显著降低训练 iter 数，加速效果好。
23 | 
24 |    :缺点: 随机合并的多个数据样本间会互相影响，进而影响训练效果（实际影响程度未知）；数据进行了合并，丢失了一定数据随机性。
25 | 
26 | 2. （本文）利用
27 |    **基于数据长度分组的采样器**\ ，在构建批次数据时，基于实际长度进行排序，确保同一批次内的数据长度尽可能相近，进而尽可能减少填充的长度。如上图右侧，利用该采样器后，同样的数据效率将提升至87.5%。
28 | 
29 |    :优点: 每条数据依然独立存在（独立计算
30 |       attention），避免数据拼接技术导致的数据样本间的互相影响；数据进行了分组，丢失了一定数据随机性。
31 | 
32 |    :缺点: 在数据样本长度比较一致的情况下，加速效果一般。
33 | 
34 | 使用 ``LengthGroupedSampler``
35 | -----------------------------------------
36 | 
37 | XTuner 中基于数据长度分组的采样器 的实现在
38 | `这里 <https://github.com/InternLM/xtuner/blob/main/xtuner/dataset/samplers/length_grouped.py>`__\ 。用户可以通过在配置文件中修改
39 | ``train_dataloader`` 的 ``sampler`` 参数进行配置。以
40 | `internlm2_chat_7b_qlora_oasst1_512_e3 <https://github.com/InternLM/xtuner/blob/main/xtuner/configs/internlm/internlm2_chat_7b/internlm2_chat_7b_qlora_oasst1_512_e3.py>`__
41 | 配置文件为例，其默认是使用随机的采样器，我们可以通过下列修改使其使用
42 | 基于数据长度分组的采样器：
43 | 
44 | .. code:: diff
45 | 
46 |    - from mmengine.dataset import DefaultSampler
47 |    + from xtuner.dataset.samplers import LengthGroupedSampler
48 | 
49 |    batch_size = 16  # per_device
50 |    accumulative_counts = 1
51 | 
52 |    train_dataloader = dict(
53 |        batch_size=batch_size,
54 |        num_workers=dataloader_num_workers,
55 |        dataset=train_dataset,
56 |    -   sampler=dict(type=DefaultSampler, shuffle=True),
57 |    +   sampler=dict(
58 |    +       type=LengthGroupedSampler,
59 |    +       length_property='length',
60 |    +       per_device_batch_size=batch_size * accumulative_counts),
61 |        collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn))
62 | 
63 | .. note::
64 |    其中，\ ``length_property``
65 |    需要传入获取数据集长度的“属性”，这一数值在通过 ``process_hf_dataset``
66 |    构建数据集时会自动设置为
67 |    ``'length'``\ （因此，如果使用自定义的数据类，请确保这一属性的正确设置）。
68 | 


--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/pack_to_max_length.rst:
--------------------------------------------------------------------------------
 1 | .. _pack_to_max_length:
 2 | 
 3 | 数据拼接
 4 | =========================
 5 | 
 6 | 简介
 7 | ---------
 8 | 
 9 | 对于大型语言模型（LLM）的输入而言，“数据集拼接” 这一概念指的是将多个 token 序列拼接成一个单独的输入。大量的数据集都存在一个特点，即其长度分布严重偏向较短的序列，而 Transformers 模型接收固定长度的输入。因此，在模型训练过程中，通常需要将每条数据 "Pad" 至当前 batch 最长序列的长度，而 "Pad Token" 往往是某个特定的无意义的 token。
10 | 
11 | 将多条数据打包在一起可以不再需要使用 "Pad Token" 进行无意义的填充，减少计算资源的浪费，同时还可以保持模型作为具有固定大小输入的静态图表示的优点。
12 | 
13 | 下表展示了 InternLM2 7B 模型在 Alpaca 数据集上使用不同数据集拼接策略进行训练的速度对比，如表所示，“数据集拼接”会大幅度提升训练效率：
14 | 
15 | .. list-table::
16 |   :widths: 25 25 15
17 |   :header-rows: 1
18 | 
19 |   * - 拼接策略
20 |     - 每秒处理 token 数
21 |     - 加速比
22 |   * - 不使用
23 |     - 362.9
24 |     -
25 |   * - 拼接至 2k
26 |     - 2677.1
27 |     - 7.38x
28 |   * - 拼接至 4k
29 |     - 3124.3
30 |     - 8.61x
31 |   * - 拼接至 8k
32 |     - 3173.9
33 |     - 8.76x
34 |   * - 拼接至 16k
35 |     - 2864.4
36 |     - 7.89x
37 |   * - 拼接至 32k
38 |     - 2965.4
39 |     - 8.17x
40 | 
41 | 使用数据拼接
42 | ---------------------------
43 | 
44 | XTuner 中提供的 config 文件中默认使用了“数据集拼接”这一功能，可以通过设置 ``max_length`` 字段来调整数据拼接长度。例如可通过以下方式将拼接长度调整为 32k ：
45 | 
46 | .. code-block:: diff
47 | 
48 |     #######################################################################
49 |     #                          PART 1  Settings                           #
50 |     #######################################################################
51 |     - max_length = 2048
52 |     + max_length = 32768
53 |     pack_to_max_length = True
54 | 
55 |     #######################################################################
56 |     #                      PART 3  Dataset & Dataloader                   #
57 |     #######################################################################
58 |     train_dataset = dict(
59 |         max_length=max_length,
60 |         pack_to_max_length=pack_to_max_length,
61 |         ...)
62 | 
63 | .. tip::
64 |   若不想使用数据拼接，在 config 中将 ``pack_to_max_length`` 设为 False 即可，
65 |   此时 config 中的 ``max_length`` 字段表示单条数据最长的 token 数，整个 batch 会被 pad 成当前 batch 内最长的一条数据的长度。
66 | 
67 | .. tip::
68 |   在不使用数据拼接策略时，XTuner 还提供了一种数据集采样策略 (``LengthGroupedSampler``)，可以保证在一个 batch 中的数据长度尽可能接近，
69 |   以减少 Pad 对计算资源的浪费。详细用法请参考
70 |   \ :ref:`LengthGroupedSampler 文档 <length_grouped_sampler>` \ 。
71 | 


--------------------------------------------------------------------------------
/docs/zh_cn/chat/agent.md:
--------------------------------------------------------------------------------
1 | # 智能体模型对话
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/chat/llm.md:
--------------------------------------------------------------------------------
1 | # 语言模型对话
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/chat/lmdeploy.md:
--------------------------------------------------------------------------------
1 | # 使用 LMDeploy 优化推理速度
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/chat/vlm.md:
--------------------------------------------------------------------------------
1 | # 视觉-语言模型对话
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/dpo/overview.md:
--------------------------------------------------------------------------------
 1 | ## DPO 介绍
 2 | 
 3 | ### 简介
 4 | 
 5 | DPO（Direct Preference Optimization，直接偏好优化）是一种在大语言模型训练中用于直接优化模型偏好的方法。与传统的强化学习方法不同，DPO 直接使用人类偏好数据进行模型优化，从而提高生成内容的质量，使其更符合人类偏好。DPO 利用人类偏好数据，直接对模型进行优化，省略了训练 Reward Model 的训练过程，与 PPO 相比进一步省去了 Critic Model，不但避免了复杂的强化学习算法，减少了训练开销，同时还提高了训练效率。
 6 | 
 7 | DPO 拥有大量的衍生算法，它们对 DPO 的损失函数进行了一定程度上的改进，我们在 XTuner 中除了 DPO 还实现了[Identity Preference Optimisation (IPO)](https://huggingface.co/papers/2310.12036)，[Kahneman-Tversky Optimisation (KTO)](https://github.com/ContextualAI/HALOs)等论文中的损失函数，如需使用这些算法，请参考[修改 DPO 配置](./modify_settings.md)章节。我们也提供了一些[示例配置](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo)用于参考。
 8 | 
 9 | 除了 DPO 之外，还出现了如 [ORPO](https://arxiv.org/abs/2403.07691) 等无需参考模型的对齐算法。ORPO 采用了对数比值（odds ratio）的概念来优化模型，通过在模型训练过程中惩罚那些被拒绝的样本，从而更有效地适应被选择的样本。ORPO 消除了对参考模型的依赖，使得训练过程更加简化且高效。XTuner 中 ORPO 的训练方式与 DPO 非常类似，我们提供了一些  ORPO 的[示例配置](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo)，用户可以参考 DPO 的教程对配置进行修改。
10 | 
11 | ### XTuner 中 DPO 训练的优势
12 | 
13 | XTuner 中的 DPO 训练具备以下显著优势：
14 | 
15 | 1. **支持最新的算法**：XTuner除了支持标准的 DPO 之外，还支持了大量的衍生算法，同时也支持ORPO等不依赖参考模型的高效算法。
16 | 
17 | 2. **减少显存浪费**：由于偏好数据中的 chosen 和 rejected 数据通常存在长度上的差异，因此在训练数据的拼接时会存在填充（padding token）,造成显存浪费。在 XTuner 中，基于 Flash Attention2 中的[变长注意力](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html)功能，我们在训练过程中通过将偏好数据打包到同一个序列中，显著减少了由于 padding token 带来的显存浪费。这不仅提高了显存的利用效率，还使得在相同硬件条件下可以训练更大的模型或处理更多的数据。
18 | 
19 | ![img](../reward_model/images/var_len_atten.png)
20 | 
21 | 3. **高效训练**：借助 XTuner 的 QLoRA 训练功能，参考模型能够被转化为移除LoRA适配器的语言模型，从而省去了参考模型权重的显存占用，大幅降低了 DPO 的训练开销。
22 | 
23 | 4. **长文本训练**: 借助 XTuner 的序列并行功能，能够对长文本数据进行训练。
24 | 
25 | ### 开始训练
26 | 
27 | 请参阅[快速上手](./quick_start.md)来了解最基本的概念，若希望了解更多训练参数配置相关的内容，请参考[修改DPO配置](./modify_settings.md)章节。
28 | 


--------------------------------------------------------------------------------
/docs/zh_cn/dpo/quick_start.md:
--------------------------------------------------------------------------------
 1 | ## DPO 快速上手
 2 | 
 3 | 在本章节中，我们将介绍如何使用 XTuner 训练 1.8B 的 DPO（Direct Preference Optimization）模型，以帮助您快速上手。
 4 | 
 5 | ### 准备预训练模型权重
 6 | 
 7 | 我们使用经过 SFT 的语言模型[InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)作为 DPO 模型的初始化模型来进行偏好对齐。
 8 | 
 9 | 在训练配置文件中设置`pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'`，则会在启动训练时自动下载模型文件。若您需要手动下载模型权重，那么请参考[准备预训练模型权重](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html)章节，其中详细说明了如何从 Huggingface 或者是 Modelscope 下载模型权重的方法。这里我们附上模型的 HuggingFace 链接与 ModelScope 链接：
10 | 
11 | - HuggingFace 链接位于：https://huggingface.co/internlm/internlm2-chat-1_8b-sft
12 | - ModelScope 链接位于：https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary
13 | 
14 | ### 准备训练数据
15 | 
16 | 在本教程中使用 Huggingface 上的[mlabonne/orpo-dpo-mix-40k](https://huggingface.co/datasets/mlabonne/orpo-dpo-mix-40k)数据集作为演示，
17 | 
18 | ```python
19 | train_dataset = dict(
20 |     type=build_preference_dataset,
21 |     dataset=dict(
22 |         type=load_dataset,
23 |         path='mlabonne/orpo-dpo-mix-40k'),
24 |     dataset_map_fn=orpo_dpo_mix_40k_map_fn,
25 |     is_dpo=True,
26 |     is_reward=False,
27 | )
28 | ```
29 | 
30 | 在配置文件中使用以上配置，即可自动下载并处理该数据集。如果您希望使用其他 Huggingface 上的开源数据集或是使用自定义的数据集，请参阅[偏好数据集](../reward_model/preference_data.md)章节。
31 | 
32 | ### 准备配置文件
33 | 
34 | XTuner 提供了多个开箱即用的配置文件，可以通过 `xtuner list-cfg` 查看。我们执行如下指令，以复制一个配置文件到当前目录。
35 | 
36 | ```bash
37 | xtuner copy-cfg internlm2_chat_1_8b_dpo_full .
38 | ```
39 | 
40 | 打开复制后的配置文件，如果您选择自动下载模型和数据集，则无需修改配置。若您希望填入您预先下载的模型路径和数据集路径，请修改配置中的`pretrained_model_name_or_path`以及`train_dataset`中`dataset`的`path`参数。
41 | 
42 | 更多的训练参数配置，请参阅[修改DPO训练配置](./modify_settings.md)章节。
43 | 
44 | ### 启动训练
45 | 
46 | 在完成上述操作后，便可以使用下面的指令启动训练任务了。
47 | 
48 | ```bash
49 | # 单机单卡
50 | xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py
51 | # 单机多卡
52 | NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py
53 | # slurm 集群
54 | srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py --launcher slurm
55 | ```
56 | 
57 | ### 模型转换
58 | 
59 | XTuner 已经集成好了将模型转换为 HuggingFace 格式的工具，我们只需要执行
60 | 
61 | ```bash
62 | # 创建存放 hf 格式参数的目录
63 | mkdir work_dirs/internlm2_chat_1_8b_dpo_full_copy/iter_15230_hf
64 | 
65 | # 转换格式
66 | xtuner convert pth_to_hf internlm2_chat_1_8b_dpo_full_copy.py \
67 |                             work_dirs/internlm2_chat_1_8b_dpo_full_copy.py/iter_15230.pth \
68 |                             work_dirs/internlm2_chat_1_8b_dpo_full_copy.py/iter_15230_hf
69 | ```
70 | 
71 | 便能够将 XTuner 的 ckpt 转换为 Huggingface 格式的模型。
72 | 


--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/hook.md:
--------------------------------------------------------------------------------
1 | # 训练过程中评测
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/mmbench.md:
--------------------------------------------------------------------------------
1 | # MMBench (VLM)
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/mmlu.md:
--------------------------------------------------------------------------------
1 | # MMLU (LLM)
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/opencompass.md:
--------------------------------------------------------------------------------
1 | # 使用 OpenCompass 评测
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/get_started/installation.rst:
--------------------------------------------------------------------------------
 1 | ==================================
 2 | 安装
 3 | ==================================
 4 | 
 5 | 本节中，我们将演示如何安装 XTuner。
 6 | 
 7 | 最佳实践
 8 | ========
 9 | 
10 | 我们推荐用户参照我们的最佳实践安装 XTuner。
11 | 推荐使用 Python-3.10 的 conda 虚拟环境安装 XTuner。
12 | 
13 | **步骤 0.** 使用 conda 先构建一个 Python-3.10 的虚拟环境
14 | 
15 | .. code-block:: console
16 | 
17 |     $ conda create --name xtuner-env python=3.10 -y
18 |     $ conda activate xtuner-env
19 | 
20 | **步骤 1.** 安装 XTuner
21 | 
22 | 方案a: 通过 pip 直接安装
23 | 
24 | .. code-block:: console
25 | 
26 |     $ pip install -U 'xtuner[deepspeed]'
27 | 
28 | 方案b: 从源码安装
29 | 
30 | .. code-block:: console
31 | 
32 |    $ git clone https://github.com/InternLM/xtuner.git
33 |    $ cd xtuner
34 |    $ pip install -e '.[deepspeed]'
35 | 
36 | .. note::
37 | 
38 |    "-e" 表示在可编辑模式下安装项目，因此对代码所做的任何本地修改都会生效
39 | 
40 | 验证
41 | ========
42 | 
43 | 为了验证 XTuner 是否安装正确，我们将使用命令打印配置文件。
44 | 
45 | **打印配置文件：** 在命令行中使用 ``xtuner list-cfg`` 验证是否能打印配置文件列表。
46 | 
47 | .. code-block:: console
48 | 
49 |    $ xtuner list-cfg
50 | 


--------------------------------------------------------------------------------
/docs/zh_cn/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/zh_cn/models/supported.md:
--------------------------------------------------------------------------------
1 | # 已支持的模型
2 | 


--------------------------------------------------------------------------------
/docs/zh_cn/notes/changelog.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | 
 3 | ## vX.X.X (YYYY.MM.DD)
 4 | 
 5 | ### 亮点
 6 | 
 7 | ### 新功能和改进
 8 | 
 9 | ### Bug 修复
10 | 
11 | ### 贡献者
12 | 
13 | -->
14 | 
15 | # 变更日志
16 | 
17 | ## v0.1.0 (2023.08.30)
18 | 
19 | XTuner 正式发布！🔥🔥🔥
20 | 
21 | ### 亮点
22 | 
23 | - XTuner 支持使用消费级显卡微调大语言模型。微调 7B 大语言模型的最低显存开销仅为 **8GB**。
24 | - XTuner 支持多种大语言模型、数据集、微调算法和训练流程。
25 | - 众多微调好的 adapter 也同步发布，包括调色师、插件对话等多种玩法。更多信息，请访问 [HuggingFace 仓库](https://huggingface.co/xtuner)。
26 | 


--------------------------------------------------------------------------------
/docs/zh_cn/reward_model/images/preference_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/reward_model/images/preference_data.png


--------------------------------------------------------------------------------
/docs/zh_cn/reward_model/images/sequence_parallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/reward_model/images/sequence_parallel.png


--------------------------------------------------------------------------------
/docs/zh_cn/reward_model/images/var_len_atten.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/reward_model/images/var_len_atten.png


--------------------------------------------------------------------------------
/docs/zh_cn/switch_language.md:
--------------------------------------------------------------------------------
1 | ## <a href='https://xtuner.readthedocs.io/en/latest/'>English</a>
2 | 
3 | ## <a href='https://xtuner.readthedocs.io/zh_CN/latest/'>简体中文</a>
4 | 


--------------------------------------------------------------------------------
/docs/zh_cn/training/visualization.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 | 可视化训练过程
 3 | ==============
 4 | 
 5 | XTuner 支持通过 `MMEngine <https://github.com/open-mmlab/mmengine>`__
 6 | 使用 `TensorBoard <https://www.tensorflow.org/tensorboard?hl=zh-cn>`__
 7 | 和 `Weights & Biases (WandB) <https://docs.wandb.ai/>`__
 8 | 实验管理工具，只需在 config 中添加一行代码，就可以跟踪和可视化损失、显存占用等指标。
 9 | 
10 | TensorBoard
11 | ============
12 | 
13 | 1. 设置 config 中的 ``visualizer`` 字段，并将 ``vis_backends`` 设置为 `TensorboardVisBackend <https://github.com/open-mmlab/mmengine/blob/2c4516c62294964065d058d98799402f50afdef6/mmengine/visualization/vis_backend.py#L514>`__\ ：
14 | 
15 | .. code:: diff
16 | 
17 |    # set visualizer
18 |    - visualizer = None
19 |    + from mmengine.visualization import Visualizer, TensorboardVisBackend
20 |    + visualizer = dict(type=Visualizer, vis_backends=[dict(type=TensorboardVisBackend)])
21 | 
22 | 2. 启动实验后，tensorboard 产生的相关文件会存在 ``vis_data`` 中，通过 tensorboard 命令可以启动进行实时可视化：
23 | 
24 | |image1|
25 | 
26 | .. code::
27 | 
28 |    tensorboard --logdir=$PATH_TO_VIS_DATA
29 | 
30 | WandB
31 | ======
32 | 
33 | 1. 使用 WandB 前需安装依赖库 ``wandb`` 并登录至 wandb。
34 | 
35 | .. code:: console
36 | 
37 |    $ pip install wandb
38 |    $ wandb login
39 | 
40 | 2. 设置 config 中的 ``visualizer`` 字段，并将 ``vis_backends`` 设置为 `WandbVisBackend <https://github.com/open-mmlab/mmengine/blob/2c4516c62294964065d058d98799402f50afdef6/mmengine/visualization/vis_backend.py#L330>`__\ ：
41 | 
42 | .. code:: diff
43 | 
44 |    # set visualizer
45 |    + from mmengine.visualization import Visualizer, WandbVisBackend
46 |    - visualizer = None
47 |    + visualizer = dict(type=Visualizer, vis_backends=[dict(type=WandbVisBackend)])
48 | 
49 | .. tip::
50 |    可以点击 `WandbVisBackend
51 |    API <https://github.com/open-mmlab/mmengine/blob/2c4516c62294964065d058d98799402f50afdef6/mmengine/visualization/vis_backend.py#L330>`__
52 |    查看 ``WandbVisBackend`` 可配置的参数。例如
53 |    ``init_kwargs``\ ，该参数会传给
54 |    `wandb.init <https://docs.wandb.ai/ref/python/init>`__ 方法。
55 | 
56 |    .. code:: diff
57 | 
58 |       # set visualizer
59 |       - visualizer = None
60 |       + from mmengine.visualization import Visualizer, WandbVisBackend
61 |       + visualizer = dict(
62 |       +   type=Visualizer,
63 |       +   vis_backends=[
64 |       +       dict(type=WandbVisBackend, init_kwargs=dict(project='toy-example'))])
65 | 
66 | 
67 | 3. 启动实验后，可在 wandb 网页端 ``https://wandb.ai`` 上查看可视化结果：
68 | 
69 | |image2|
70 | 
71 | 
72 | .. |image1| image:: https://github.com/InternLM/xtuner/assets/67539920/abacb28f-5afd-46d0-91b2-acdd20887969
73 | .. |image2| image:: https://github.com/InternLM/xtuner/assets/41630003/fc16387a-3c83-4015-9235-8ec811077953
74 | 


--------------------------------------------------------------------------------
/docs/zh_cn/user_guides/ceph.md:
--------------------------------------------------------------------------------
 1 | ## 功能说明
 2 | 
 3 | ### 已支持的功能
 4 | 
 5 | - 保存 DeepSpeed Checkpoint 至 CEPH
 6 | - 从 Ceph 上的 DeepSpeed Checkpoint 续训
 7 | - `pth_to_hf` 支持 Ceph 上的 DeepSpeed Checkpoint
 8 | 
 9 | ### 暂不支持的功能
10 | 
11 | - 训练时从 Ceph 加载 Huggingface 模型， 与 `zero3` 加载权重冲突
12 | - HuggingFace `save_pretrained` 保存至 Ceph， 逻辑过于复杂，没办法 patch
13 | 
14 | ## 使用说明
15 | 
16 | #### 1. 验证 ceph 环境
17 | 
18 | 使用前需确保 `petrel sdk` 可用，并且要使用的 Ceph bucket 存在且可用
19 | 
20 | 验证 `aws` 命令行工具
21 | 
22 | ```bash
23 | # 验证 aws 命令行工具
24 | aws s3 ls $YOUR_BUCKET
25 | ```
26 | 
27 | 验证 `petrel sdk`
28 | 
29 | ```python
30 | bucket = 's3://xxx'
31 | 
32 | from mmengine import get_file_backend
33 | backend = get_file_backend(bucket)
34 | 
35 | for f in backend.list_dir_or_file(bucket):
36 |     print(f)
37 | ```
38 | 
39 | #### 2. 训练时保存 Checkpoint 至 Ceph
40 | 
41 | `XTuner` 根据环境变量 `DS_CEPH_DIR` 来判断是否将 checkpoint 保存至 ceph
42 | 
43 | ```bash
44 | DS_CEPH_DIR=s3://xxxx srun ${SRUN_ARGS} xtuner train $CONFIG --launcher slurm
45 | ```
46 | 
47 | #### 3. 从 Ceph 上的 Checkpoint 续训
48 | 
49 | Resume 时，要填写 checkpoint 在 ceph 上的完整路径
50 | 
51 | ```bash
52 | DS_CEPH_DIR=s3://xxxx srun ${SRUN_ARGS} xtuner train $CONFIG --launcher slurm --resume s3://xxx/yyy/epoch_x.pth
53 | ```
54 | 
55 | #### 4. 将 Ceph 上的 Checkpoint 转换为 HF 模型
56 | 
57 | 不支持 `$HF_DIR` 为 ceph 路径
58 | 
59 | 由于 Checkpoint 中存储了优化器状态，加载比较耗时，对于 ZeRO 1&2 可以直接加载 checkpoint 中的 `model_states.pt` 文件加速转换过程；ZeRO 3 必须先加载整个 checkpoint
60 | 
61 | ```bash
62 | srun ${SRUN_ARGS} xtuner convert pth_to_hf $CONFIG s3://xxx/yyy/epoch_x.pth $HF_DIR
63 | 
64 | ```
65 | 


--------------------------------------------------------------------------------
/docs/zh_cn/user_guides/ftdp_dataset/README.md:
--------------------------------------------------------------------------------
 1 | ftdp 是一个闭源的处理数据工具，开源社区用户可以忽略此文档。
 2 | 
 3 | 本节介绍了常见的 4 种使用 ftdp 数据集训练的使用场景：
 4 | 
 5 | - [Case 1: 使用 Processed 数据集训练 InternLM2](Case1.md)
 6 | - [Case 2: 使用 Processed 数据集训练非 InternLM2 模型](Case2.md)
 7 | - [Case 3: 使用 Processed 普通对话数据集训任意模型](Case3.md)
 8 | - [Case 4: 使用 Tokenized 数据集训练 InternLM2](Case4.md)
 9 | 
10 | 请先参考下方流程图，选择自己的使用场景。
11 | 
12 | ```mermaid
13 | graph TD;
14 |     A{ftdp 数据}
15 |     A -->|是| B{数据 tokenized}
16 |     B -->|否| C{使用 Internlm2 对话模板}
17 |     C -->|是| D{训练 Internlm2 }
18 |     D -->|是| E[Case 1]
19 |     D -->|否| F[Case 2]
20 |     C -->|否| G{离线处理数据集}
21 |     G -->|是| H[尚不支持]
22 |     G -->|否| I[Case 3]
23 |     B -->|是| J[Case 4]
24 | ```
25 | 


--------------------------------------------------------------------------------
/docs/zh_cn/user_guides/llava_offline.md:
--------------------------------------------------------------------------------
 1 | # 离线处理 Llava 训练数据集
 2 | 
 3 | 当训练数据量非常大时，每次训练的时候都先在线处理数据可能会极为耗时。我们可以先对原始数据进行离线处理并保存至本地，随后的多次训练可以读入本地离线处理好的数据后直接开始训练。
 4 | 
 5 | ## Step 1, 导出模板 config 文件
 6 | 
 7 | 可使用以下命令查看 XTuner 中提供的 Llava 训练相关的 config：
 8 | 
 9 | ```
10 | xtuner list-cfg -p llava
11 | ```
12 | 
13 | 找到需要使用的 config 文件并导出至当前目录下：
14 | 
15 | ```
16 | xtuner copy-cfg ${CONFIG_NAME} .
17 | ```
18 | 
19 | ## Step 2, 离线处理数据集
20 | 
21 | 使用以下命令可离线处理训练数据集中的文本数据：
22 | 
23 | ```
24 | python xtuner/tools/process_untokenized_llava_data.py \
25 |     ${CONFIG_PATH} \
26 |     --save-folder /folder/to/save/processed/dataset
27 | ```
28 | 
29 | 其中，${CONFIG_PATH} 为第一步中导出的 config 文件路径，`/folder/to/save/processed/dataset` 则需要指定为离线处理数据的保存路径。
30 | 
31 | ## Step 3, 修改 config 文件
32 | 
33 | 对 Step 1 中导出的 config 文件做如下修改：
34 | 
35 | ```diff
36 | #######################################################################
37 | #                      PART 3  Dataset & Dataloader                   #
38 | #######################################################################
39 | llava_dataset = dict(
40 | -   data_path=data_path,
41 | -   tokenizer=tokenizer,
42 | +   offline_processed_text_folder=/folder/to/save/processed/dataset
43 |     ...)
44 | ```
45 | 
46 | 其中，`/folder/to/save/processed/dataset` 为 Step 2 保存的离线处理数据路径。
47 | 
48 | ## Step 4，开始训练
49 | 
50 | 使用 Step 3 修改得到的 config 训练即可。
51 | 


--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_1/data.json:
--------------------------------------------------------------------------------
 1 | [{
 2 |     "messages":[
 3 |         {
 4 |             "toy_system": "You are a helpful AI assistant.",
 5 |             "toy_input": "Give three tips for staying healthy.",
 6 |             "toy_output": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
 7 |         },
 8 |         {
 9 |             "toy_input": "How to study English?",
10 |             "toy_output": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
11 |         }
12 |     ]
13 | },
14 | {
15 |     "messages":[
16 |         {
17 |             "toy_system": "You are a helpful AI assistant.",
18 |             "toy_input": "How to study English?",
19 |             "toy_output": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
20 |         },
21 |         {
22 |             "toy_input": "Give three tips for staying healthy.",
23 |             "toy_output": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
24 |         }
25 |     ]
26 | }]
27 | 


--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_1/map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def multi_turn_1_map_fn(example):
 3 |     messages = example["messages"]
 4 |     conversation = []
 5 |     for msg in messages:
 6 |         conversation.append(
 7 |             {
 8 |                 "system": msg["toy_system"],
 9 |                 "input": msg["toy_input"],
10 |                 "output": msg["toy_output"],
11 |             }
12 |         )
13 |     return {"conversation": conversation}
14 | 


--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_2/data.json:
--------------------------------------------------------------------------------
 1 | [{
 2 |     "messages":[
 3 |         {
 4 |             "role": "system",
 5 |             "content": "You are a helpful AI assistant."
 6 |         },
 7 |         {
 8 |             "role": "user",
 9 |             "content": "Give three tips for staying healthy."
10 |         },
11 |         {
12 |             "role": "assistant",
13 |             "content": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
14 |         },
15 |         {
16 |             "role": "user",
17 |             "content": "How to study English?"
18 |         },
19 |         {
20 |             "role": "assistant",
21 |             "content": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
22 |         }
23 |     ]
24 | },
25 | {
26 |     "messages":[
27 |         {
28 |             "role": "system",
29 |             "content": "You are a helpful AI assistant."
30 |         },
31 |         {
32 |             "role": "user",
33 |             "content": "How to study English?"
34 |         },
35 |         {
36 |             "role": "assistant",
37 |             "content": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
38 |         },
39 |         {
40 |             "role": "user",
41 |             "content": "Give three tips for staying healthy."
42 |         },
43 |         {
44 |             "role": "assistant",
45 |             "content": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
46 |         }
47 |     ]
48 | }]
49 | 


--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_2/map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def multi_turn_2_map_fn(example):
 3 |     messages = example["messages"]
 4 |     system = ""
 5 |     input = ""
 6 |     conversation = []
 7 |     while messages and messages[0]["role"] == "assistant":
 8 |         # Skip the first one if it is from assistant
 9 |         messages = messages[1:]
10 |     for msg in messages:
11 |         if msg["role"] == "system":
12 |             system = msg["content"]
13 |         elif msg["role"] == "user":
14 |             input += msg["content"]
15 |         elif msg["role"] == "assistant":
16 |             conversation.append(
17 |                 {"system": system, "input": input, "output": msg["content"]}
18 |             )
19 |             system = ""
20 |             input = ""
21 |         else:
22 |             raise NotImplementedError
23 |     return {"conversation": conversation}
24 | 


--------------------------------------------------------------------------------
/examples/demo_data/pretrain/data.json:
--------------------------------------------------------------------------------
1 | [{
2 |     "toy_text": "I am an artificial intelligence (AI) assistant named InternLM. I was created by the Shanghai AI Laboratory and my purpose is to assist users with various tasks through natural language processing technology."
3 | },
4 | {
5 |     "toy_text": "I am an artificial intelligence programmed to assist with various types of tasks, including answering questions, providing information, and performing automated processes."
6 | }]
7 | 


--------------------------------------------------------------------------------
/examples/demo_data/pretrain/map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def pretrain_map_fn(example):
3 |     return {"conversation": [{"input": "", "output": example["toy_text"].strip()}]}
4 | 


--------------------------------------------------------------------------------
/examples/demo_data/single_turn/data.json:
--------------------------------------------------------------------------------
 1 | [{
 2 |     "toy_system": "You are a helpful AI assistant.",
 3 |     "toy_input": "Give three tips for staying healthy.",
 4 |     "toy_output": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
 5 | },
 6 | {
 7 |     "toy_system": "You are a helpful AI assistant.",
 8 |     "toy_input": "How to study English?",
 9 |     "toy_output": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
10 | }]
11 | 


--------------------------------------------------------------------------------
/examples/demo_data/single_turn/map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def single_turn_map_fn(example):
 3 |     return {
 4 |         "conversation": [
 5 |             {
 6 |                 "system": example["toy_system"],
 7 |                 "input": example["toy_input"],
 8 |                 "output": example["toy_output"],
 9 |             }
10 |         ]
11 |     }
12 | 


--------------------------------------------------------------------------------
/examples/huggingface_trainer/README.md:
--------------------------------------------------------------------------------
 1 | # How to use XTuner in HuggingFace training pipeline
 2 | 
 3 | ## Quick run
 4 | 
 5 | 1. step in `examples`
 6 | 
 7 |    ```shell
 8 |    cd ./examples
 9 |    ```
10 | 
11 | 2. run training scripts
12 | 
13 |    ```shell
14 |    # qlora-training internlm-7b with alpaca dataset
15 |    python train_qlora_hf.py --model_name_or_path internlm/internlm-7b --dataset_name_or_path tatsu-lab/alpaca
16 |    ```
17 | 
18 |    `--model_name_or_path`: specify the model name or path to train.
19 | 
20 |    `--dataset_name_or_path`: specify the dataset name or path to use.
21 | 
22 | ## How to customize your experiment
23 | 
24 | XTuner APIs are compatible with the usage of HuggingFace's transformers.
25 | If you want to customize your experiment, you just need to pass in your hyperparameters like HuggingFace.
26 | 
27 | ```
28 | # training example
29 | python train_qlora_hf.py \
30 |     # custom training args
31 |     --model_name_or_path internlm/internlm-7b \
32 |     --dataset_name_or_path tatsu-lab/alpaca \
33 |     # HuggingFace's default training args
34 |     --do_train = True
35 |     --per_device_train_batch_size = 1
36 |     --learning_rate = 2e-5
37 |     --save_strategy = 'epoch'
38 |     --lr_scheduler_type = 'cosine'
39 |     --logging_steps = 1
40 | ```
41 | 


--------------------------------------------------------------------------------
/examples/huggingface_trainer/train_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import transformers
 3 | from transformers import Trainer
 4 | 
 5 | from xtuner.apis import DefaultTrainingArguments, build_model
 6 | from xtuner.apis.datasets import alpaca_data_collator, alpaca_dataset
 7 | 
 8 | 
 9 | def train():
10 |     # get DefaultTrainingArguments and to be updated with passed args
11 |     parser = transformers.HfArgumentParser(DefaultTrainingArguments)
12 |     training_args = parser.parse_args_into_dataclasses()[0]
13 | 
14 |     # init model and dataset
15 |     model, tokenizer = build_model(
16 |         model_name_or_path=training_args.model_name_or_path, return_tokenizer=True
17 |     )
18 |     train_dataset = alpaca_dataset(
19 |         tokenizer=tokenizer, path=training_args.dataset_name_or_path
20 |     )
21 |     data_collator = alpaca_data_collator(return_hf_format=True)
22 | 
23 |     # build trainer
24 |     trainer = Trainer(
25 |         model=model,
26 |         args=training_args,
27 |         train_dataset=train_dataset,
28 |         data_collator=data_collator,
29 |     )
30 | 
31 |     # training
32 |     trainer.train()
33 | 
34 |     trainer.save_state()
35 |     trainer.save_model(output_dir=training_args.output_dir)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     train()
40 | 


--------------------------------------------------------------------------------
/examples/huggingface_trainer/train_lora_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import transformers
 3 | from transformers import Trainer
 4 | 
 5 | from xtuner.apis import DefaultTrainingArguments, build_lora_model
 6 | from xtuner.apis.datasets import alpaca_data_collator, alpaca_dataset
 7 | 
 8 | 
 9 | def train():
10 |     # get DefaultTrainingArguments and to be updated with passed args
11 |     parser = transformers.HfArgumentParser(DefaultTrainingArguments)
12 |     training_args = parser.parse_args_into_dataclasses()[0]
13 | 
14 |     # init model and dataset
15 |     model, tokenizer = build_lora_model(
16 |         model_name_or_path=training_args.model_name_or_path, return_tokenizer=True
17 |     )
18 |     train_dataset = alpaca_dataset(
19 |         tokenizer=tokenizer, path=training_args.dataset_name_or_path
20 |     )
21 |     data_collator = alpaca_data_collator(return_hf_format=True)
22 | 
23 |     # build trainer
24 |     trainer = Trainer(
25 |         model=model,
26 |         args=training_args,
27 |         train_dataset=train_dataset,
28 |         data_collator=data_collator,
29 |     )
30 | 
31 |     # training
32 |     trainer.train()
33 | 
34 |     trainer.save_state()
35 |     trainer.save_model(output_dir=training_args.output_dir)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     train()
40 | 


--------------------------------------------------------------------------------
/examples/huggingface_trainer/train_qlora_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import transformers
 3 | from transformers import Trainer
 4 | 
 5 | from xtuner.apis import DefaultTrainingArguments, build_qlora_model
 6 | from xtuner.apis.datasets import alpaca_data_collator, alpaca_dataset
 7 | 
 8 | 
 9 | def train():
10 |     # get DefaultTrainingArguments and to be updated with passed args
11 |     parser = transformers.HfArgumentParser(DefaultTrainingArguments)
12 |     training_args = parser.parse_args_into_dataclasses()[0]
13 | 
14 |     # init model and dataset
15 |     model, tokenizer = build_qlora_model(
16 |         model_name_or_path=training_args.model_name_or_path, return_tokenizer=True
17 |     )
18 |     train_dataset = alpaca_dataset(
19 |         tokenizer=tokenizer, path=training_args.dataset_name_or_path
20 |     )
21 |     data_collator = alpaca_data_collator(return_hf_format=True)
22 | 
23 |     # build trainer
24 |     trainer = Trainer(
25 |         model=model,
26 |         args=training_args,
27 |         train_dataset=train_dataset,
28 |         data_collator=data_collator,
29 |     )
30 | 
31 |     # training
32 |     trainer.train()
33 | 
34 |     trainer.save_state()
35 |     trainer.save_model(output_dir=training_args.output_dir)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     train()
40 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/runtime.txt
2 | -r requirements/deepspeed.txt
3 | -r requirements/modelscope.txt
4 | 


--------------------------------------------------------------------------------
/requirements/deepspeed.txt:
--------------------------------------------------------------------------------
1 | deepspeed==0.16.2
2 | mpi4py-mpich
3 | 


--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | docutils
2 | myst-parser==2.0.0
3 | sphinx==6.2.1
4 | sphinx-argparse
5 | sphinx-book-theme==1.0.1
6 | sphinx-copybutton==0.5.2
7 | sphinx_markdown_tables
8 | 


--------------------------------------------------------------------------------
/requirements/lmdeploy.txt:
--------------------------------------------------------------------------------
1 | lmdeploy>=0.6.2 --no-deps
2 | 


--------------------------------------------------------------------------------
/requirements/modelscope.txt:
--------------------------------------------------------------------------------
1 | modelscope
2 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
 1 | bitsandbytes==0.45.0
 2 | datasets>=3.2.0
 3 | einops
 4 | loguru
 5 | mmengine==0.10.6
 6 | openpyxl
 7 | peft>=0.14.0
 8 | scikit-image
 9 | scipy
10 | SentencePiece
11 | tiktoken
12 | torch
13 | torchvision
14 | transformers==4.48.0
15 | transformers_stream_generator
16 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [codespell]
 2 | ignore-words-list = nd, ba, warmup, ans
 3 | 
 4 | [flake8]
 5 | max-line-length = 119
 6 | ignore = D107,D202,D203,D401,E203,W503
 7 | inline-quotes = double
 8 | 
 9 | [black]
10 | line-length = 119
11 | 
12 | [isort]
13 | profile = black
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/xtuner/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | 
 4 | from mmengine.utils import digit_version
 5 | 
 6 | from .entry_point import cli
 7 | from .version import __version__, version_info
 8 | 
 9 | HF_CEPH_HUB = os.getenv("HF_CEPH_HUB", "")
10 | HF_USE_CEPH = os.getenv("HF_USE_CEPH", 0) or HF_CEPH_HUB != ""
11 | DS_CEPH_DIR = os.getenv("DS_CEPH_DIR", None)
12 | if HF_USE_CEPH:
13 |     from .utils.fileio import patch_hf_auto_from_pretrained, patch_hf_save_pretrained
14 | 
15 |     patch_hf_auto_from_pretrained(HF_CEPH_HUB)
16 |     patch_hf_save_pretrained()
17 | 
18 | if DS_CEPH_DIR:
19 |     from .utils.fileio import patch_deepspeed_engine
20 | 
21 |     patch_deepspeed_engine()
22 | 
23 | __all__ = [
24 |     "__version__",
25 |     "version_info",
26 |     "digit_version",
27 |     "cli",
28 |     "HF_USE_CEPH",
29 |     "DS_CEPH_DIR",
30 | ]
31 | 


--------------------------------------------------------------------------------
/xtuner/_lite/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | import subprocess
 4 | import sys
 5 | 
 6 | from loguru import logger
 7 | 
 8 | from .device import get_device, get_torch_device_module
 9 | 
10 | _LOGGER = None
11 | 
12 | 
13 | def log_format(debug=False):
14 |     formatter = "[XTuner][{time:YYYY-MM-DD HH:mm:ss}][<level>{level}</level>]"
15 | 
16 |     if debug:
17 |         formatter += "[<cyan>{name}</cyan>:"
18 |         formatter += "<cyan>{function}</cyan>:"
19 |         formatter += "<cyan>{line}</cyan>]"
20 | 
21 |     formatter += " <level>{message}</level>"
22 |     return formatter
23 | 
24 | 
25 | def get_logger(level="INFO"):
26 |     global _LOGGER
27 |     if _LOGGER is None:
28 |         # Remove the original logger in Python to prevent duplicate printing.
29 |         logger.remove()
30 |         logger.add(sys.stderr, level=level, format=log_format(debug=level == "DEBUG"))
31 |         _LOGGER = logger
32 |     return _LOGGER
33 | 
34 | 
35 | def get_repo_git_info(repo_path):
36 |     original_directory = os.getcwd()
37 |     os.chdir(repo_path)
38 | 
39 |     try:
40 |         branch = (
41 |             subprocess.check_output(
42 |                 ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.STDOUT
43 |             )
44 |             .strip()
45 |             .decode("utf-8")
46 |         )
47 | 
48 |         commit_id = (
49 |             subprocess.check_output(
50 |                 ["git", "rev-parse", "HEAD"], stderr=subprocess.STDOUT
51 |             )
52 |             .strip()
53 |             .decode("utf-8")
54 |         )
55 | 
56 |         remote_url = (
57 |             subprocess.check_output(
58 |                 ["git", "remote", "get-url", "origin"], stderr=subprocess.STDOUT
59 |             )
60 |             .strip()
61 |             .decode("utf-8")
62 |         )
63 | 
64 |         return branch, commit_id, remote_url
65 |     except subprocess.CalledProcessError:
66 |         return None, None, None
67 |     finally:
68 |         os.chdir(original_directory)
69 | 
70 | 
71 | __all__ = [
72 |     "AutoConfig",
73 |     "AutoModelForCausalLM",
74 |     "AutoTokenizer",
75 |     "get_device",
76 |     "get_torch_device_module",
77 | ]
78 | 


--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .lora import LORA_TARGET_MAP
 3 | from .packed import pack_sequence, unpack_sequence
 4 | from .utils import (
 5 |     liger_kernel_is_available,
 6 |     lmdeploy_is_available,
 7 |     mlu_is_available,
 8 |     npu_is_available,
 9 |     profile_time_and_memory,
10 |     varlen_attn_is_available,
11 | )
12 | 
13 | __all__ = [
14 |     "LORA_TARGET_MAP",
15 |     "pack_sequence",
16 |     "packed_sequence",
17 |     "unpack_sequence",
18 |     "liger_kernel_is_available",
19 |     "varlen_attn_is_available",
20 |     "lmdeploy_is_available",
21 |     "npu_is_available",
22 |     "mlu_is_available",
23 |     "profile_time_and_memory",
24 | ]
25 | 


--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/lora.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | LORA_TARGET_MAP = {
3 |     "InternLM2ForCausalLM": ["wqkv", "wo", "w1", "w2", "w3"],
4 |     "CLIPVisionModel": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"],
5 | }
6 | 


--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .moe_permute import GROUPED_GEMM_INSTALLED, permute_func, unpermute_func
3 | 
4 | __all__ = ["GROUPED_GEMM_INSTALLED", "permute_func", "unpermute_func"]
5 | 


--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/packed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import List, Union
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def unpack_sequence(packed: torch.Tensor, num_tokens: Union[torch.Tensor, List], dim=1):
 8 |     if isinstance(num_tokens, torch.Tensor):
 9 |         num_tokens = num_tokens.tolist()
10 |     sequences = torch.split(packed, num_tokens, dim=dim)
11 |     return sequences
12 | 
13 | 
14 | def pack_sequence(sequences, dim=1):
15 |     num_tokens = torch.IntTensor([seq.size(dim) for seq in sequences])
16 |     packed = torch.cat(sequences, dim=dim)
17 |     return packed, num_tokens.to(packed.device)
18 | 
19 | 
20 | def packed_cumulative_length(num_tokens: torch.Tensor):
21 |     device = num_tokens.device
22 |     _zero_pad = torch.zeros(1, device=device)
23 |     _pad_length = torch.cat([_zero_pad, num_tokens]).int()
24 |     return torch.cumsum(_pad_length, 0).int()
25 | 


--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import time
 3 | from contextlib import contextmanager
 4 | 
 5 | from transformers.utils.import_utils import is_flash_attn_2_available
 6 | 
 7 | from xtuner._lite import get_device, get_logger, get_torch_device_module
 8 | 
 9 | logger = get_logger()
10 | 
11 | 
12 | def npu_is_available():
13 |     return get_device() == "npu"
14 | 
15 | 
16 | def mlu_is_available():
17 |     return get_device() == "mlu"
18 | 
19 | 
20 | def varlen_attn_is_available():
21 |     return is_flash_attn_2_available() or npu_is_available()
22 | 
23 | 
24 | def lmdeploy_is_available():
25 |     available = False
26 |     try:
27 |         import lmdeploy  # noqa: F401
28 | 
29 |         available = True
30 |     except ImportError:
31 |         available = False
32 | 
33 |     return available
34 | 
35 | 
36 | def liger_kernel_is_available():
37 |     available = False
38 |     try:
39 |         import liger_kernel  # noqa: F401
40 | 
41 |         available = True
42 |     except ImportError:
43 |         available = False
44 | 
45 |     return available
46 | 
47 | 
48 | @contextmanager
49 | def profile_time_and_memory(desc):
50 |     torch_device = get_torch_device_module()
51 |     start_t = time.time()
52 |     torch_device.reset_peak_memory_stats()
53 | 
54 |     yield
55 | 
56 |     max_memory = torch_device.max_memory_allocated()
57 |     cost_time = time.time() - start_t
58 | 
59 |     logger.success(
60 |         f"{desc} Elapsed time {cost_time:.2f} seconds, "
61 |         f"peak gpu memory {max_memory/1024**3:.1f}G"
62 |     )
63 | 


--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dataset import (
 3 |     InferDataset,
 4 |     PPOTokenizeFunction,
 5 |     RewardBuffer,
 6 |     RewardBufferCollator,
 7 | )
 8 | from .loss import (
 9 |     CriticLoss,
10 |     PPOPolicyLoss,
11 |     compute_advantages_and_returns,
12 |     compute_kl_rewards,
13 |     gather_logprobs,
14 | )
15 | from .model import build_actor_model, build_reward_model
16 | 
17 | __all__ = [
18 |     "InferDataset",
19 |     "RewardBuffer",
20 |     "RewardBufferCollator",
21 |     "PPOCollator",
22 |     "PPODataset",
23 |     "PPOTokenizeFunction",
24 |     "CriticLoss",
25 |     "PPOPolicyLoss",
26 |     "compute_advantages_and_returns",
27 |     "compute_kl_rewards",
28 |     "compute_rewards",
29 |     "gather_logprobs",
30 |     "build_actor_model",
31 |     "build_reward_model",
32 | ]
33 | 


--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/ppo/model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
 4 | from transformers.utils.import_utils import (
 5 |     is_flash_attn_2_available,
 6 |     is_torch_sdpa_available,
 7 | )
 8 | 
 9 | from xtuner._lite.accelerate import LoadWoInit
10 | 
11 | 
12 | def build_actor_model(model_path, dtype=torch.float32, trust_remote_code=True):
13 |     config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
14 |     if is_flash_attn_2_available():
15 |         config.attn_implementation = "flash_attention_2"
16 |     elif is_torch_sdpa_available():
17 |         config.attn_implementation = "sdpa"
18 | 
19 |     with LoadWoInit():
20 |         policy = AutoModelForCausalLM.from_pretrained(
21 |             model_path,
22 |             attn_implementation="flash_attention_2",
23 |             torch_dtype=dtype,
24 |             trust_remote_code=trust_remote_code,
25 |         )
26 | 
27 |     return policy
28 | 
29 | 
30 | def build_reward_model(model_path, dtype=torch.float32, trust_remote_code=True):
31 |     config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
32 |     if is_flash_attn_2_available():
33 |         config.attn_implementation = "flash_attention_2"
34 |     elif is_torch_sdpa_available():
35 |         config.attn_implementation = "sdpa"
36 | 
37 |     config.use_cache = False
38 |     config.torch_dtype = dtype
39 |     with LoadWoInit():
40 |         reward = AutoModel.from_pretrained(
41 |             model_path,
42 |             attn_implementation="flash_attention_2",
43 |             torch_dtype=dtype,
44 |             trust_remote_code=trust_remote_code,
45 |         )
46 | 
47 |     reward.model.use_cache = False
48 | 
49 |     return reward
50 | 


--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/sft/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset import SftCollator, SftTokenizeFunction
3 | 
4 | __all__ = ["SftCollator", "SftTokenizeFunction"]
5 | 


--------------------------------------------------------------------------------
/xtuner/_lite/chat/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .messages import ChatMessages
3 | from .templates import CHAT_TEMPLATE_MAP, ChatTemplate, HybridChatTemplate
4 | 
5 | __all__ = ["ChatMessages", "CHAT_TEMPLATE_MAP", "ChatTemplate", "HybridChatTemplate"]
6 | 


--------------------------------------------------------------------------------
/xtuner/_lite/chat/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/xtuner/_lite/chat/messages/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseMessages
3 | from .chat import ChatMessages
4 | 
5 | __all__ = ["BaseMessages", "ChatMessages"]
6 | 


--------------------------------------------------------------------------------
/xtuner/_lite/chat/messages/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from abc import abstractclassmethod, abstractmethod
 3 | from typing import Dict
 4 | 
 5 | from pydantic import BaseModel
 6 | from transformers import PreTrainedTokenizer
 7 | 
 8 | from ..templates import ChatTemplate
 9 | 
10 | 
11 | class BaseMessages(BaseModel):
12 |     @abstractmethod
13 |     def add(self, role: str, content):
14 |         pass
15 | 
16 |     @abstractmethod
17 |     def pop(self):
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def get_prompt(self, chat_template: ChatTemplate) -> str:
22 |         pass
23 | 
24 |     @abstractmethod
25 |     def tokenize(
26 |         self, tokenizer: PreTrainedTokenizer, chat_template: ChatTemplate
27 |     ) -> Dict:
28 |         pass
29 | 
30 |     @abstractclassmethod
31 |     def from_dict(cls, item: Dict) -> "BaseMessages":
32 |         pass
33 | 


--------------------------------------------------------------------------------
/xtuner/_lite/chat/templates/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .chat import ChatTemplate
 3 | from .hybrid import HybridChatTemplate
 4 | 
 5 | CHAT_TEMPLATE_MAP = {
 6 |     "internlm2": HybridChatTemplate(
 7 |         system="<|im_start|>system\n{system}<|im_end|>\n",
 8 |         user="<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n",
 9 |         assistant="{assistant}<|im_end|>",
10 |         stop_words=["<|im_end|>"],
11 |     ),
12 |     "qwen2": HybridChatTemplate(
13 |         system="<|im_start|>system\n{system}<|im_end|>\n",
14 |         user="<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n",
15 |         assistant="{assistant}<|im_end|>",
16 |         stop_words=["<|im_end|>", "<|endoftext|>"],
17 |     ),
18 |     "llama3": HybridChatTemplate(
19 |         system=("<|start_header_id|>system<|end_header_id|>\n\n{system}" "<|eot_id|>"),
20 |         user=(
21 |             "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|>"
22 |             "<|start_header_id|>assistant<|end_header_id|>\n\n"
23 |         ),
24 |         assistant="{assistant}<|eot_id|>",
25 |         sep="",
26 |         stop_words=["<|eot_id|>"],
27 |     ),
28 | }
29 | 
30 | __all__ = ["ChatTemplate", "HybridChatTemplate"]
31 | 


--------------------------------------------------------------------------------
/xtuner/_lite/chat/templates/chat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import List
 3 | 
 4 | from pydantic import BaseModel, field_validator
 5 | 
 6 | 
 7 | class ChatTemplate(BaseModel):
 8 |     """Define a Pydantic data model for a hybrid chat with attributes for
 9 |     system, user and assistant chat as well as function and interpreter calls
10 |     and results."""
11 | 
12 |     # Normal Chat
13 |     system: str  # System message format
14 |     user: str  # User message format
15 |     assistant: str  # Assistant message format
16 |     stop_words: List[str]  # List of stop words
17 |     sep: str = "\n"
18 | 
19 |     def decorate_system(self, text: str) -> str:
20 |         """Decorate text with the `system` template."""
21 |         return self.system.format(system=text)
22 | 
23 |     def decorate_assistant(self, text: str) -> str:
24 |         """Decorate text with the `assistant` template."""
25 |         return self.assistant.format(assistant=text)
26 | 
27 |     def decorate_user(self, text: str) -> str:
28 |         """Decorate text with the `user` template."""
29 |         return self.user.format(user=text)
30 | 
31 |     @field_validator("system")
32 |     def check_system(cls, v: str) -> str:
33 |         """Validate that `system` contains '{system}'.
34 | 
35 |         If not, raises a ValueError.
36 |         """
37 |         if v is not None and "{system}" not in v:
38 |             raise ValueError("system must contain the keyword '{system}'")
39 |         return v
40 | 
41 |     @field_validator("user")
42 |     def check_user(cls, v: str) -> str:
43 |         """Validate that `user` contains '{user}'.
44 | 
45 |         If not, raises a ValueError.
46 |         """
47 |         if v is not None and "{user}" not in v:
48 |             raise ValueError("user must contain the keyword '{user}'")
49 |         return v
50 | 
51 |     @field_validator("assistant")
52 |     def check_assistant(cls, v: str) -> str:
53 |         """Validate that `assistant` contains '{assistant}'.
54 | 
55 |         If not, raises a ValueError.
56 |         """
57 |         if v is not None and "{assistant}" not in v:
58 |             raise ValueError("assistant must contain the keyword '{assistant}'")
59 |         return v
60 | 


--------------------------------------------------------------------------------
/xtuner/_lite/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .json import JsonDataset
 3 | from .jsonl import JsonlDataset
 4 | from .pack import SoftPackDataset
 5 | from .utils import DATASET_CLS_MAP, OPENAI_CONVERT_MAP, load_datasets
 6 | 
 7 | __all__ = [
 8 |     "JsonDataset",
 9 |     "JsonlDataset",
10 |     "SoftPackDataset",
11 |     "DATASET_CLS_MAP",
12 |     "OPENAI_CONVERT_MAP",
13 |     "load_datasets",
14 | ]
15 | 


--------------------------------------------------------------------------------
/xtuner/_lite/datasets/streaming.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | 
 4 | class Streaming:
 5 |     def __init__(self, file, max_epoch=1):
 6 |         self.file = file
 7 |         self.offset = 0
 8 |         self.epoch = 1
 9 |         self.max_epoch = max_epoch
10 | 
11 |     def __iter__(self):
12 |         return self
13 | 
14 |     def __next__(self):
15 |         with open(self.file) as f:
16 |             f.seek(self.offset)
17 |             line = f.readline()
18 | 
19 |             if not line and self.epoch < self.max_epoch:
20 |                 self.offset = 0
21 |                 self.epoch += 1
22 |                 return next(self)
23 | 
24 |             elif not line and self.epoch == self.max_epoch:
25 |                 raise StopIteration
26 | 
27 |             self.offset = f.tell()
28 |         return line
29 | 


--------------------------------------------------------------------------------
/xtuner/_lite/datasets/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .convert import OPENAI_CONVERT_MAP
 3 | from .load import DATASET_CLS_MAP, load_datasets
 4 | from .utils import apply_exif_orientation, move_data_to_device
 5 | 
 6 | __all__ = [
 7 |     "OPENAI_CONVERT_MAP",
 8 |     "DATASET_CLS_MAP",
 9 |     "load_datasets",
10 |     "apply_exif_orientation",
11 |     "move_data_to_device",
12 | ]
13 | 


--------------------------------------------------------------------------------
/xtuner/_lite/datasets/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from collections.abc import Mapping
 3 | 
 4 | import torch
 5 | from PIL import Image
 6 | 
 7 | _EXIF_ORIENT = 274  # exif 'Orientation' tag
 8 | 
 9 | 
10 | def apply_exif_orientation(image):
11 |     """Applies the exif orientation correctly.
12 | 
13 |     This code exists per the bug:
14 |       https://github.com/python-pillow/Pillow/issues/3973
15 |     with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
16 |     various methods, especially `tobytes`
17 | 
18 |     Function based on:
19 |       https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
20 |       https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
21 | 
22 |     Args:
23 |         image (PIL.Image): a PIL image
24 | 
25 |     Returns:
26 |         (PIL.Image): the PIL image with exif orientation applied, if applicable
27 |     """
28 |     if not hasattr(image, "getexif"):
29 |         return image
30 | 
31 |     try:
32 |         exif = image.getexif()
33 |     except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
34 |         exif = None
35 | 
36 |     if exif is None:
37 |         return image
38 | 
39 |     orientation = exif.get(_EXIF_ORIENT)
40 | 
41 |     method = {
42 |         2: Image.FLIP_LEFT_RIGHT,
43 |         3: Image.ROTATE_180,
44 |         4: Image.FLIP_TOP_BOTTOM,
45 |         5: Image.TRANSPOSE,
46 |         6: Image.ROTATE_270,
47 |         7: Image.TRANSVERSE,
48 |         8: Image.ROTATE_90,
49 |     }.get(orientation)
50 | 
51 |     if method is not None:
52 |         return image.transpose(method)
53 |     return image
54 | 
55 | 
56 | def move_data_to_device(data, device="cuda"):
57 |     """Prepares one `data` before feeding it to the model, be it a tensor or a
58 |     nested list/dictionary of tensors."""
59 |     if isinstance(data, Mapping):
60 |         return type(data)({k: move_data_to_device(v) for k, v in data.items()})
61 |     elif isinstance(data, (tuple, list)):
62 |         return type(data)(move_data_to_device(v) for v in data)
63 |     elif isinstance(data, torch.Tensor):
64 |         kwargs = {"device": device}
65 |         return data.to(non_blocking=True, **kwargs)
66 |     return data
67 | 


--------------------------------------------------------------------------------
/xtuner/_lite/device.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def get_device():
 6 |     device = None
 7 |     if torch.cuda.is_available():
 8 |         device = "cuda"
 9 |     else:
10 |         try:
11 |             import torch_npu  # noqa: F401
12 | 
13 |             device = "npu"
14 |         except ImportError:
15 |             pass
16 |     try:
17 |         import torch_mlu  # noqa: F401
18 | 
19 |         device = "mlu"
20 |     except ImportError:
21 |         pass
22 | 
23 |     if device is None:
24 |         raise NotImplementedError(
25 |             "Supports only CUDA or NPU. If your device is CUDA or NPU, "
26 |             "please make sure that your environmental settings are "
27 |             "configured correctly."
28 |         )
29 | 
30 |     return device
31 | 
32 | 
33 | def get_torch_device_module():
34 |     device = get_device()
35 |     if device == "cuda":
36 |         return torch.cuda
37 |     elif device == "npu":
38 |         return torch.npu
39 |     elif device == "mlu":
40 |         return torch.mlu
41 |     else:
42 |         raise NotImplementedError
43 | 


--------------------------------------------------------------------------------
/xtuner/_lite/modelings/__init__.py:
--------------------------------------------------------------------------------
 1 | from .internlm2 import InternLM2Config, InternLM2ForCausalLM
 2 | from .internlm3 import InternLM3Config, InternLM3ForCausalLM, InternLM3Tokenizer
 3 | from .llava.modeling_llava import LlavaForConditionalGeneration
 4 | from .llava.configuration_llava import EnhancedLlavaConfig
 5 | from .llava.processing_llava import LlavaProcessor
 6 | 
 7 | def register_remote_code():
 8 |     from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
 9 |     AutoConfig.register('internlm2', InternLM2Config, exist_ok=True)
10 |     AutoModelForCausalLM.register(
11 |         InternLM2Config, InternLM2ForCausalLM, exist_ok=True)
12 |     
13 |     AutoConfig.register('internlm3', InternLM3Config, exist_ok=True)
14 |     AutoModelForCausalLM.register(
15 |         InternLM3Config, InternLM3ForCausalLM, exist_ok=True)
16 |     AutoTokenizer.register(
17 |         InternLM3Config, InternLM3Tokenizer, exist_ok=True)
18 | 


--------------------------------------------------------------------------------
/xtuner/_lite/modelings/internlm2/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_internlm2 import InternLM2Config
2 | from .modeling_internlm2 import InternLM2ForCausalLM
3 | 


--------------------------------------------------------------------------------
/xtuner/_lite/modelings/internlm3/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_internlm3 import InternLM3Config
2 | from .modeling_internlm3 import InternLM3ForCausalLM
3 | from .tokenization_internlm3 import InternLM3Tokenizer
4 | 


--------------------------------------------------------------------------------
/xtuner/_lite/modelings/internvl2/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_intern_vit import InternVisionModel
2 | 
3 | __all__ = ['InternVisionModel']
4 | 


--------------------------------------------------------------------------------
/xtuner/_lite/modelings/llava/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_llava import EnhancedLlavaConfig
2 | from .modeling_llava import LlavaForConditionalGeneration
3 | from .processing_llava import LlavaProcessor
4 | 


--------------------------------------------------------------------------------
/xtuner/_lite/parallel/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .comm import all_to_all, all_to_all_list, barrier
 3 | from .sampler import LengthGroupedSampler, ParallelSampler, VLMLengthGroupedSampler
 4 | from .sequence import *  # noqa: F401, F403
 5 | from .setup import setup_parallel
 6 | 
 7 | __all__ = [
 8 |     "ParallelSampler",
 9 |     "LengthGroupedSampler",
10 |     "VLMLengthGroupedSampler",
11 |     "all_to_all",
12 |     "all_to_all_list",
13 |     "setup_parallel",
14 |     "barrier",
15 | ]
16 | 


--------------------------------------------------------------------------------
/xtuner/_lite/parallel/sequence/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.dist import init_dist
 3 | 
 4 | from .attention import (
 5 |     post_process_for_sequence_parallel_attn,
 6 |     pre_process_for_sequence_parallel_attn,
 7 | )
 8 | from .ops import (
 9 |     gather_for_sequence_parallel,
10 |     gather_forward_split_backward,
11 |     split_for_sequence_parallel,
12 |     split_forward_gather_backward,
13 | )
14 | 
15 | __all__ = [
16 |     "pre_process_for_sequence_parallel_attn",
17 |     "post_process_for_sequence_parallel_attn",
18 |     "split_for_sequence_parallel",
19 |     "init_dist",
20 |     "gather_for_sequence_parallel",
21 |     "split_forward_gather_backward",
22 |     "gather_forward_split_backward",
23 | ]
24 | 


--------------------------------------------------------------------------------
/xtuner/_lite/parallel/sequence/attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from torch.distributed.device_mesh import DeviceMesh
 4 | 
 5 | from ..comm import all_to_all
 6 | 
 7 | 
 8 | def pre_process_for_sequence_parallel_attn(
 9 |     query_states: torch.Tensor,
10 |     key_states: torch.Tensor,
11 |     value_states: torch.Tensor,
12 |     sp_mesh: DeviceMesh,
13 |     scatter_dim: int = 2,
14 |     gather_dim: int = 1,
15 | ):
16 |     sp_size = sp_mesh.size()
17 |     n_head = query_states.shape[2]
18 |     assert n_head % sp_size == 0, (
19 |         "The number of attention heads should be divisible by "
20 |         f"sequence_parallel_world_size. But got n_head = {n_head} and "
21 |         f"sequence_parallel_world_size = {sp_size}."
22 |     )
23 | 
24 |     # (b, s // sp_world_size, nd, dim) -> (b, s, nd // sp_world_size, dim)
25 |     sp_group = sp_mesh.get_group()
26 |     query_states = all_to_all(
27 |         query_states, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
28 |     )
29 |     key_states = all_to_all(
30 |         key_states, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
31 |     )
32 |     value_states = all_to_all(
33 |         value_states, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
34 |     )
35 | 
36 |     return query_states, key_states, value_states
37 | 
38 | 
39 | def post_process_for_sequence_parallel_attn(
40 |     attn_output: torch.Tensor, sp_mesh: DeviceMesh, scatter_dim=1, gather_dim=2
41 | ):
42 |     # (b, s, nd // sp_world_size, dim) -> (b, s // sp_world_size, nd, dim)
43 |     sp_group = sp_mesh.get_group()
44 |     output = all_to_all(
45 |         attn_output, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
46 |     )
47 |     return output
48 | 


--------------------------------------------------------------------------------
/xtuner/_lite/parallel/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.distributed as dist
 4 | from mmengine.dist import infer_launcher, init_dist
 5 | from torch._C._distributed_c10d import ReduceOp
 6 | from torch.distributed.c10d_logger import _exception_logger
 7 | 
 8 | from xtuner._lite import get_device
 9 | 
10 | origin_reduce_scatter_tensor = torch.distributed.reduce_scatter_tensor
11 | 
12 | 
13 | # mlu's reduce_scatter_tensor do not support ReduceOp.AVG, use ReduceOp.SUM / group_world_size instead.
14 | @_exception_logger
15 | def mlu_reduce_scatter_tensor(
16 |     output, input, op=ReduceOp.SUM, group=None, async_op=False
17 | ):
18 |     if op == ReduceOp.AVG:
19 |         result = origin_reduce_scatter_tensor(
20 |             output, input, ReduceOp.SUM, group, async_op
21 |         )
22 |         output.div_(torch.distributed.get_world_size(group))
23 |         return result
24 |     else:
25 |         return origin_reduce_scatter_tensor(output, input, op, group, async_op)
26 | 
27 | 
28 | def setup_parallel():
29 |     if not dist.is_initialized():
30 |         dist_launcher = infer_launcher()
31 |         init_dist(dist_launcher)
32 | 
33 |     device = get_device()
34 | 
35 |     if device == "mlu":
36 |         torch.distributed.reduce_scatter_tensor = mlu_reduce_scatter_tensor
37 | 


--------------------------------------------------------------------------------
/xtuner/_lite/patches/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .auto import AutoPatch
3 | from .base import FSDPConfig
4 | from .utils import pad_to_max_length, pad_to_multiple_of
5 | 
6 | __all__ = ["AutoPatch", "FSDPConfig", "pad_to_max_length", "pad_to_multiple_of"]
7 | 


--------------------------------------------------------------------------------
/xtuner/_lite/patches/auto.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from transformers.models.llama import LlamaForCausalLM
 3 | from transformers.models.qwen2 import Qwen2ForCausalLM
 4 | 
 5 | from xtuner._lite.modelings.internlm3 import InternLM3ForCausalLM
 6 | 
 7 | from .base import FSDPConfig, PatchedCausalLM
 8 | from .internlm3 import (
 9 |     CUDAPatchedInternLM3ForCausalLM,
10 |     MLUPatchedInternLM3ForCausalLM,
11 |     MuxiPatchedInternLM3ForCausalLM,
12 | )
13 | from .llama import (
14 |     CUDAPatchedLlamaForCausalLM,
15 |     MLUPatchedLlamaForCausalLM,
16 |     MuxiPatchedLlamaForCausalLM,
17 | )
18 | from .qwen2 import CUDAPatchedQwen2ForCausalLM
19 | 
20 | CUDA_PATCH_MAP = {
21 |     LlamaForCausalLM: CUDAPatchedLlamaForCausalLM,
22 |     InternLM3ForCausalLM: CUDAPatchedInternLM3ForCausalLM,
23 |     Qwen2ForCausalLM: CUDAPatchedQwen2ForCausalLM,
24 | }
25 | 
26 | MLU_PATCH_MAP = {
27 |     LlamaForCausalLM: MLUPatchedLlamaForCausalLM,
28 |     InternLM3ForCausalLM: MLUPatchedInternLM3ForCausalLM,
29 | }
30 | 
31 | MUXI_PATCH_MAP = {
32 |     LlamaForCausalLM: MuxiPatchedLlamaForCausalLM,
33 |     InternLM3ForCausalLM: MuxiPatchedInternLM3ForCausalLM,
34 | }
35 | 
36 | 
37 | class AutoPatch:
38 |     @classmethod
39 |     def from_causal_lm(
40 |         cls, model, fsdp_config: FSDPConfig, device_type="cuda"
41 |     ) -> PatchedCausalLM:
42 |         if device_type == "cuda":
43 |             patch_cls = CUDA_PATCH_MAP[type(model)]
44 |         elif device_type == "mlu":
45 |             patch_cls = MLU_PATCH_MAP[type(model)]
46 |         elif device_type == "muxi":
47 |             patch_cls = MUXI_PATCH_MAP[type(model)]
48 |         else:
49 |             raise NotImplementedError
50 | 
51 |         patched_model = patch_cls(model, fsdp_config)
52 | 
53 |         return patched_model
54 | 


--------------------------------------------------------------------------------
/xtuner/_lite/patches/internlm3.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner._lite.chat import HybridChatTemplate
 3 | from xtuner._lite.modelings.internlm3.modeling_internlm3 import (
 4 |     InternLM3Attention,
 5 |     InternLM3DecoderLayer,
 6 |     InternLM3ForCausalLM,
 7 |     InternLM3RotaryEmbedding,
 8 | )
 9 | 
10 | from .llama import CUDAPatchedLlamaForCausalLM
11 | 
12 | 
13 | class CUDAPatchedInternLM3ForCausalLM(CUDAPatchedLlamaForCausalLM):
14 |     rotary_emb_cls = InternLM3RotaryEmbedding
15 |     attn_cls = InternLM3Attention
16 |     layer_cls = InternLM3DecoderLayer
17 |     causal_cls = InternLM3ForCausalLM
18 | 
19 |     chat_template = HybridChatTemplate(
20 |         system="<|im_start|>system\n{system}<|im_end|>\n",
21 |         user="<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n",
22 |         assistant="{assistant}<|im_end|>",
23 |         stop_words=["<|im_end|>"],
24 |     )
25 | 
26 |     def __init__(self, model, fsdp_config=None):
27 |         super().__init__(model, fsdp_config)
28 | 
29 |         if fsdp_config.max_length is not None:
30 |             self.patched_model.config.rope_scaling = {"rope_type": "default"}
31 |             ori_max_len = self.patched_model.config.max_position_embeddings
32 |             self.patched_model.config.max_position_embeddings = max(
33 |                 fsdp_config.max_length, ori_max_len
34 |             )
35 |             self.patched_model.model.rotary_emb = InternLM3RotaryEmbedding(
36 |                 self.patched_model.config
37 |             ).to(self.device_type)
38 | 
39 | 
40 | class MLUPatchedInternLM3ForCausalLM(CUDAPatchedInternLM3ForCausalLM):
41 |     device_type = "mlu"
42 | 
43 | 
44 | class MuxiPatchedInternLM3ForCausalLM(CUDAPatchedInternLM3ForCausalLM):
45 |     device_type = "muxi"
46 | 


--------------------------------------------------------------------------------
/xtuner/_lite/patches/mixins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .generate import GenerateMixin
3 | 
4 | __all__ = ["GenerateMixin"]
5 | 


--------------------------------------------------------------------------------
/xtuner/_lite/patches/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import List, Union
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def pad_to_multiple_of(sequence, padding_value, multiple_of, dim=-1):
 8 |     length = sequence.shape[dim]
 9 |     if length % multiple_of == 0:
10 |         return sequence
11 | 
12 |     pad_num = multiple_of - (length % multiple_of)
13 |     pad_shape = (
14 |         (*sequence.shape[:dim], pad_num, *sequence.shape[dim + 1 :])
15 |         if dim != -1
16 |         else (*sequence.shape[:dim], pad_num)
17 |     )
18 |     pad = torch.full(
19 |         pad_shape, padding_value, dtype=sequence.dtype, device=sequence.device
20 |     )
21 |     sequence = torch.cat([sequence, pad], dim=dim)
22 |     return sequence
23 | 
24 | 
25 | def pad_to_max_length(sequence, padding_value, max_length, dim=-1):
26 |     length = sequence.shape[dim]
27 |     assert length <= max_length
28 |     pad_num = max_length - length
29 |     pad_shape = (
30 |         (*sequence.shape[:dim], pad_num, *sequence.shape[dim + 1 :])
31 |         if dim != -1
32 |         else (*sequence.shape[:dim], pad_num)
33 |     )
34 |     pad = torch.full(
35 |         pad_shape, padding_value, dtype=sequence.dtype, device=sequence.device
36 |     )
37 |     sequence = torch.cat([sequence, pad], dim=dim)
38 |     return sequence
39 | 
40 | 
41 | def unpack_sequence(packed: torch.Tensor, num_tokens: Union[torch.Tensor, List], dim=1):
42 |     if isinstance(num_tokens, torch.Tensor):
43 |         num_tokens = num_tokens.tolist()
44 |     sequences = torch.split(packed, num_tokens, dim=dim)
45 |     return sequences
46 | 
47 | 
48 | def pack_sequence(sequences, dim=1):
49 |     num_tokens = torch.IntTensor([seq.size(dim) for seq in sequences])
50 |     packed = torch.cat(sequences, dim=dim)
51 |     return packed, num_tokens.to(packed.device)
52 | 
53 | 
54 | def packed_cumulative_length(num_tokens: torch.Tensor):
55 |     device = num_tokens.device
56 |     _zero_pad = torch.zeros(1, device=device)
57 |     _pad_length = torch.cat([_zero_pad, num_tokens]).int()
58 |     return torch.cumsum(_pad_length, 0).int()
59 | 


--------------------------------------------------------------------------------
/xtuner/apis/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .datasets import *  # noqa: F401, F403
3 | from .model import *  # noqa: F401, F403
4 | from .training_args import *  # noqa: F401, F403
5 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .alpaca import (
 3 |     alpaca_data_collator,
 4 |     alpaca_dataset,
 5 |     alpaca_enzh_data_collator,
 6 |     alpaca_enzh_dataset,
 7 |     alpaca_zh_data_collator,
 8 |     alpaca_zh_dataset,
 9 | )
10 | from .arxiv import arxiv_data_collator, arxiv_dataset
11 | from .code_alpaca import code_alpaca_data_collator, code_alpaca_dataset
12 | from .colorist import colorist_data_collator, colorist_dataset
13 | from .lawyer import (
14 |     lawyer_crime_data_collator,
15 |     lawyer_crime_dataset,
16 |     lawyer_data_collator,
17 |     lawyer_dataset,
18 |     lawyer_reference_data_collator,
19 |     lawyer_reference_dataset,
20 | )
21 | from .medical import medical_data_collator, medical_dataset
22 | from .moss_003_sft import (
23 |     moss_003_sft_data_collator,
24 |     moss_003_sft_dataset,
25 |     moss_003_sft_no_plugins_data_collator,
26 |     moss_003_sft_no_plugins_dataset,
27 |     moss_003_sft_plugins_data_collator,
28 |     moss_003_sft_plugins_dataset,
29 | )
30 | from .oasst1 import oasst1_data_collator, oasst1_dataset
31 | from .open_orca import openorca_data_collator, openorca_dataset
32 | from .sql import sql_data_collator, sql_dataset
33 | from .tiny_codes import tiny_codes_data_collator, tiny_codes_dataset
34 | from .wizardlm import wizardlm_data_collator, wizardlm_dataset
35 | 
36 | __all__ = [
37 |     "alpaca_data_collator",
38 |     "alpaca_dataset",
39 |     "alpaca_enzh_data_collator",
40 |     "alpaca_enzh_dataset",
41 |     "alpaca_zh_data_collator",
42 |     "alpaca_zh_dataset",
43 |     "arxiv_data_collator",
44 |     "arxiv_dataset",
45 |     "medical_data_collator",
46 |     "medical_dataset",
47 |     "moss_003_sft_data_collator",
48 |     "moss_003_sft_dataset",
49 |     "moss_003_sft_no_plugins_data_collator",
50 |     "moss_003_sft_no_plugins_dataset",
51 |     "moss_003_sft_plugins_data_collator",
52 |     "moss_003_sft_plugins_dataset",
53 |     "oasst1_data_collator",
54 |     "oasst1_dataset",
55 |     "openorca_data_collator",
56 |     "openorca_dataset",
57 |     "lawyer_crime_dataset",
58 |     "lawyer_crime_data_collator",
59 |     "lawyer_reference_dataset",
60 |     "lawyer_reference_data_collator",
61 |     "lawyer_dataset",
62 |     "lawyer_data_collator",
63 |     "colorist_dataset",
64 |     "colorist_data_collator",
65 |     "sql_dataset",
66 |     "sql_data_collator",
67 |     "code_alpaca_dataset",
68 |     "code_alpaca_data_collator",
69 |     "tiny_codes_dataset",
70 |     "tiny_codes_data_collator",
71 |     "wizardlm_data_collator",
72 |     "wizardlm_dataset",
73 | ]
74 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/arxiv.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import arxiv_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def arxiv_dataset(
13 |     tokenizer,
14 |     data_file=None,
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=True,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     # 1. Download data from https://kaggle.com/datasets/Cornell-University/arxiv  # noqa: E501
22 |     # 2. Process data with `./tools/data_preprocess/arxiv.py`
23 |     if data_file is None:
24 |         data_file = "./data/arxiv_postprocess_csAIcsCLcsCV_20200101.json"
25 |     dataset_org = load_dataset(path="json", data_files=dict(train=data_file))
26 |     dataset = process_hf_dataset(
27 |         dataset=dataset_org,
28 |         tokenizer=tokenizer,
29 |         max_length=max_length,
30 |         dataset_map_fn=arxiv_map_fn,
31 |         template_map_fn=template_map_fn,
32 |         remove_unused_columns=remove_unused_columns,
33 |         shuffle_before_pack=True,
34 |         pack_to_max_length=pack_to_max_length,
35 |     )
36 | 
37 |     return dataset
38 | 
39 | 
40 | def arxiv_data_collator(return_hf_format=False):
41 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
42 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/code_alpaca.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import code_alpaca_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def code_alpaca_dataset(
13 |     tokenizer,
14 |     path="HuggingFaceH4/CodeAlpaca_20K",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=True,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=code_alpaca_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def code_alpaca_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/colorist.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import colors_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def colorist_dataset(
13 |     tokenizer,
14 |     path="burkelibbey/colors",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=True,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=colors_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def colorist_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/medical.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import medical_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def medical_dataset(
13 |     tokenizer,
14 |     path="shibing624/medical",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=False,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=medical_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def medical_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/moss_003_sft.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from torch.utils.data import ConcatDataset
 5 | 
 6 | from xtuner.dataset import MOSSSFTDataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | 
 9 | 
10 | def moss_003_sft_dataset(
11 |     tokenizer,
12 |     plugins_data_file=None,
13 |     no_plugins_data_file=None,
14 |     bot_name=None,
15 |     max_length=2048,
16 | ):
17 |     plugins = moss_003_sft_plugins_dataset(
18 |         tokenizer, data_file=plugins_data_file, bot_name=bot_name, max_length=max_length
19 |     )
20 |     no_plugins = moss_003_sft_no_plugins_dataset(
21 |         tokenizer,
22 |         data_file=no_plugins_data_file,
23 |         bot_name=bot_name,
24 |         max_length=max_length,
25 |     )
26 |     dataset = ConcatDataset([plugins, no_plugins])
27 |     return dataset
28 | 
29 | 
30 | def moss_003_sft_data_collator(return_hf_format=False):
31 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
32 | 
33 | 
34 | def moss_003_sft_no_plugins_dataset(
35 |     tokenizer, data_file=None, bot_name=None, max_length=2048
36 | ):
37 |     # Download data from https://huggingface.co/datasets/fnlp/moss-003-sft-data
38 |     if data_file is None:
39 |         data_file = "./data/moss-003-sft-no-tools.jsonl"
40 |     dataset = MOSSSFTDataset(
41 |         data_file=data_file,
42 |         bot_name=bot_name,
43 |         tokenizer=tokenizer,
44 |         max_length=max_length,
45 |     )
46 | 
47 |     return dataset
48 | 
49 | 
50 | def moss_003_sft_no_plugins_data_collator(return_hf_format=False):
51 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
52 | 
53 | 
54 | def moss_003_sft_plugins_dataset(
55 |     tokenizer, data_file=None, bot_name=None, max_length=2048
56 | ):
57 |     # Download data from https://huggingface.co/datasets/fnlp/moss-003-sft-data
58 |     if data_file is None:
59 |         data_file = "./data/conversations_with_tools_with_inner_instruction_no_text2image_train_all_random_meta0.5_0.1_0.01_moss_0709.jsonl"  # noqa: E501
60 |     dataset = MOSSSFTDataset(
61 |         data_file=data_file,
62 |         bot_name=bot_name,
63 |         tokenizer=tokenizer,
64 |         max_length=max_length,
65 |     )
66 | 
67 |     return dataset
68 | 
69 | 
70 | def moss_003_sft_plugins_data_collator(return_hf_format=False):
71 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
72 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/oasst1.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import oasst1_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def oasst1_dataset(
13 |     tokenizer,
14 |     path="timdettmers/openassistant-guanaco",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=False,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=oasst1_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def oasst1_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/open_orca.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import openorca_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def openorca_dataset(
13 |     tokenizer,
14 |     path="Open-Orca/OpenOrca",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=True,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=openorca_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def openorca_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/sql.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import sql_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def sql_dataset(
13 |     tokenizer,
14 |     path="b-mc2/sql-create-context",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=True,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=sql_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def sql_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/tiny_codes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import template_map_fn_factory, tiny_codes_map_fn
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def tiny_codes_dataset(
13 |     tokenizer,
14 |     path="nampdn-ai/tiny-codes",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=True,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=tiny_codes_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def tiny_codes_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/datasets/wizardlm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from datasets import load_dataset
 5 | 
 6 | from xtuner.dataset import process_hf_dataset
 7 | from xtuner.dataset.collate_fns import default_collate_fn
 8 | from xtuner.dataset.map_fns import template_map_fn_factory, wizardlm_map_fn
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | 
12 | def wizardlm_dataset(
13 |     tokenizer,
14 |     path="WizardLM/WizardLM_evol_instruct_V2_196k",
15 |     max_length=2048,
16 |     prompt_template=PROMPT_TEMPLATE.default,
17 |     remove_unused_columns=False,
18 |     pack_to_max_length=True,
19 | ):
20 |     template_map_fn = template_map_fn_factory(template=prompt_template)
21 |     dataset_org = load_dataset(path)
22 |     dataset = process_hf_dataset(
23 |         dataset=dataset_org,
24 |         tokenizer=tokenizer,
25 |         max_length=max_length,
26 |         dataset_map_fn=wizardlm_map_fn,
27 |         template_map_fn=template_map_fn,
28 |         remove_unused_columns=remove_unused_columns,
29 |         shuffle_before_pack=True,
30 |         pack_to_max_length=pack_to_max_length,
31 |     )
32 | 
33 |     return dataset
34 | 
35 | 
36 | def wizardlm_data_collator(return_hf_format=False):
37 |     return partial(default_collate_fn, return_hf_format=return_hf_format)
38 | 


--------------------------------------------------------------------------------
/xtuner/apis/model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from peft import LoraConfig
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 5 | 
 6 | from xtuner.model import SupervisedFinetune
 7 | 
 8 | __all__ = ["build_model", "build_lora_model", "build_qlora_model"]
 9 | 
10 | 
11 | def build_qlora_model(
12 |     model_name_or_path,
13 |     quantization_config=None,
14 |     lora_config=None,
15 |     return_tokenizer=True,
16 | ):
17 |     if quantization_config is None:
18 |         quantization_config = BitsAndBytesConfig(
19 |             load_in_4bit=True,
20 |             load_in_8bit=False,
21 |             llm_int8_threshold=6.0,
22 |             llm_int8_has_fp16_weight=False,
23 |             bnb_4bit_compute_dtype=torch.float16,
24 |             bnb_4bit_use_double_quant=True,
25 |             bnb_4bit_quant_type="nf4",
26 |         )
27 |     if lora_config is None:
28 |         lora_config = LoraConfig(
29 |             r=64, lora_alpha=16, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
30 |         )
31 | 
32 |     llm = AutoModelForCausalLM.from_pretrained(
33 |         model_name_or_path,
34 |         torch_dtype=torch.float16,
35 |         trust_remote_code=True,
36 |         quantization_config=quantization_config,
37 |     )
38 | 
39 |     model = SupervisedFinetune(llm, lora=lora_config)
40 | 
41 |     if return_tokenizer:
42 |         tokenizer = AutoTokenizer.from_pretrained(
43 |             model_name_or_path, trust_remote_code=True, encode_special_tokens=True
44 |         )
45 |         return model.llm, tokenizer
46 |     else:
47 |         return model.llm
48 | 
49 | 
50 | def build_lora_model(model_name_or_path, lora_config=None, return_tokenizer=True):
51 |     if lora_config is None:
52 |         lora_config = LoraConfig(
53 |             r=64, lora_alpha=16, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
54 |         )
55 | 
56 |     llm = AutoModelForCausalLM.from_pretrained(
57 |         model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True
58 |     )
59 | 
60 |     model = SupervisedFinetune(llm, lora=lora_config)
61 | 
62 |     if return_tokenizer:
63 |         tokenizer = AutoTokenizer.from_pretrained(
64 |             model_name_or_path, trust_remote_code=True, encode_special_tokens=True
65 |         )
66 |         return model.llm, tokenizer
67 |     else:
68 |         return model.llm
69 | 
70 | 
71 | def build_model(model_name_or_path, return_tokenizer=True):
72 |     model = AutoModelForCausalLM.from_pretrained(
73 |         model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True
74 |     )
75 | 
76 |     if return_tokenizer:
77 |         tokenizer = AutoTokenizer.from_pretrained(
78 |             model_name_or_path, trust_remote_code=True, encode_special_tokens=True
79 |         )
80 |         return model, tokenizer
81 |     else:
82 |         return model
83 | 


--------------------------------------------------------------------------------
/xtuner/apis/training_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from dataclasses import dataclass, field
 3 | from typing import Union
 4 | 
 5 | from transformers import TrainingArguments
 6 | from transformers.trainer_utils import IntervalStrategy, SchedulerType
 7 | 
 8 | __all__ = ["DefaultTrainingArguments"]
 9 | 
10 | 
11 | @dataclass
12 | class DefaultTrainingArguments(TrainingArguments):
13 |     # custom
14 |     model_name_or_path: str = field(
15 |         default=None,
16 |         metadata={"help": "model name or path."},
17 |     )
18 |     dataset_name_or_path: str = field(
19 |         default=None,
20 |         metadata={"help": "dataset name or path."},
21 |     )
22 | 
23 |     # huggingface
24 |     default_output_dir = "./work_dirs"
25 |     default_do_train = True
26 |     default_per_device_train_batch_size = 1
27 |     default_learning_rate = 2e-5
28 |     default_save_strategy = "epoch"
29 |     default_lr_scheduler_type = "cosine"
30 |     default_logging_steps = 5
31 | 
32 |     output_dir: str = field(
33 |         default=default_output_dir,
34 |         metadata={
35 |             "help": (
36 |                 "The output directory where the model predictions and "
37 |                 "checkpoints will be written."
38 |             )
39 |         },
40 |     )
41 |     do_train: bool = field(
42 |         default=default_do_train, metadata={"help": "Whether to run training."}
43 |     )
44 |     per_device_train_batch_size: int = field(
45 |         default=default_per_device_train_batch_size,
46 |         metadata={"help": "Batch size per GPU/TPU core/CPU for training."},
47 |     )
48 |     learning_rate: float = field(
49 |         default=default_learning_rate,
50 |         metadata={"help": "The initial learning rate for AdamW."},
51 |     )
52 |     save_strategy: Union[IntervalStrategy, str] = field(
53 |         default=default_save_strategy,
54 |         metadata={"help": "The checkpoint save strategy to use."},
55 |     )
56 |     lr_scheduler_type: Union[SchedulerType, str] = field(
57 |         default=default_lr_scheduler_type,
58 |         metadata={"help": "The scheduler type to use."},
59 |     )
60 |     logging_steps: float = field(
61 |         default=default_logging_steps,
62 |         metadata={
63 |             "help": (
64 |                 "Log every X updates steps. Should be an integer or a "
65 |                 "float in range `[0,1)`. If smaller than 1, will be "
66 |                 "interpreted as ratio of total training steps."
67 |             )
68 |         },
69 |     )
70 | 


--------------------------------------------------------------------------------
/xtuner/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | 
 4 | 
 5 | def get_cfgs_name_path():
 6 |     path = os.path.dirname(__file__)
 7 |     mapping = {}
 8 |     for root, dirs, files in os.walk(path):
 9 |         for file_ in files:
10 |             if (
11 |                 file_.endswith((".py", ".json"))
12 |                 and not file_.startswith(".")
13 |                 and not file_.startswith("_")
14 |             ):
15 |                 mapping[os.path.splitext(file_)[0]] = os.path.join(root, file_)
16 |     return mapping
17 | 
18 | 
19 | cfgs_name_path = get_cfgs_name_path()
20 | 
21 | __all__ = ["cfgs_name_path"]
22 | 


--------------------------------------------------------------------------------
/xtuner/configs/cohere/README.md:
--------------------------------------------------------------------------------
 1 | # Cohere 8x7B
 2 | 
 3 | ## Install
 4 | 
 5 | ```bash
 6 | # Install the latest xtuner
 7 | pip install -U 'xtuner[deepspeed]'
 8 | 
 9 | # Cohere requires the latest version of transformers.
10 | pip install git+https://github.com/huggingface/transformers.git
11 | 
12 | # Sequence parallel requires flash-attn
13 | pip install flash-attn
14 | ```
15 | 
16 | ## Full Parameter Fine-tune
17 | 
18 | Full parameter fine-tune needs 64 A100-80G
19 | 
20 | ### slurm
21 | 
22 | Note: `$PARTITION` means the virtual partition of slurm.
23 | 
24 | ```bash
25 | srun -p $PARTITION --job-name=Cohere --nodes=8 --gres=gpu:8 --ntasks-per-node=8 xtuner train cohere_100b_128k_sp32 --deepspeed deepspeed_zero3 --launcher slurm
26 | ```
27 | 
28 | ### torchrun
29 | 
30 | Note: `$NODE_0_ADDR` means the ip address of the node_0 machine.
31 | 
32 | ```bash
33 | # excuete on node 0
34 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=0 xtuner train cohere_100b_128k_sp32 --deepspeed deepspeed_zero3
35 | 
36 | # excuete on node 1
37 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=1 xtuner train cohere_100b_128k_sp32 --deepspeed deepspeed_zero3
38 | ```
39 | 
40 | ### Speed
41 | 
42 | 16 * A100 80G:
43 | 
44 | |    Model    | Sequence Length | GPUs Number | Sequence Parallel World Size | Tokens per Second | TFLOPs |
45 | | :---------: | :-------------: | :---------: | :--------------------------: | :---------------: | :----: |
46 | | Cohere_100b |      128k       |     64      |              32              |       97.3        | 173.4  |
47 | | Cohere_100b |      128k       |     128     |              16              |       102.1       | 182.7  |
48 | | Cohere_100b |      128k       |     256     |              16              |       101.3       | 181.3  |
49 | 


--------------------------------------------------------------------------------
/xtuner/configs/deepseek/README.md:
--------------------------------------------------------------------------------
 1 | # DeepSeek V2
 2 | 
 3 | ## Install
 4 | 
 5 | ```bash
 6 | # Git clone the latest xtuner
 7 | git clone https://github.com/InternLM/xtuner.git
 8 | 
 9 | # Install the latest xtuner
10 | cd xtuner
11 | pip install -e '.[all]'
12 | 
13 | # Mixtral requires flash-attn
14 | pip install flash-attn
15 | 
16 | # install the latest transformers
17 | pip install -U transformers
18 | ```
19 | 
20 | ## Full Parameter Fine-tune
21 | 
22 | Full parameter fine-tune DeepSeek V2 236B needs at least 64 A100-80G. The full-tuned model will be saved to `${WORK_DIRS}/hf_model` by `HFCheckpointHook`.
23 | 
24 | ### slurm
25 | 
26 | Note: `$PARTITION` means the virtual partition of slurm.
27 | 
28 | ```bash
29 | srun -p $PARTITION --job-name=mixtral --nodes=8 --gres=gpu:8 --ntasks-per-node=8 xtuner train deepseek_v2_chat_full_alpaca_e3 --deepspeed deepspeed_zero3 --launcher slurm
30 | ```
31 | 
32 | ### torchrun
33 | 
34 | Note: `$NODE_0_ADDR` means the ip address of the node_0 machine.
35 | 
36 | ```bash
37 | # excuete on node 0
38 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=0 xtuner train deepseek_v2_chat_full_alpaca_e3 --deepspeed deepspeed_zero3 --launcher pytorch
39 | 
40 | # excuete on node 1
41 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=1 xtuner train deepseek_v2_chat_full_alpaca_e3 --deepspeed deepspeed_zero3 --launcher pytorch
42 | 
43 | # excuete on node 2, 3, ..., 7
44 | ```
45 | 
46 | ### Speed
47 | 
48 | 128 * A100 80G:
49 | 
50 | |         Model          | Sequence Length | Use Varlen Attn | Sequence Parallel World Size | Tokens per Second |
51 | | :--------------------: | :-------------: | :-------------: | :--------------------------: | :---------------: |
52 | |     deepseek v2 hf     |       8k        |      False      |              1               |        60         |
53 | | **deepseek v2 XTuner** |     **8k**      |    **False**    |            **1**             |   **120 (2x)**    |
54 | |     deepseek v2 hf     |       8k        |      True       |              1               |        60         |
55 | | **deepseek v2 XTuner** |     **8k**      |    **True**     |            **1**             |  **130 (2.2x)**   |
56 | |     deepseek v2 hf     |       16k       |      False      |              1               |        OOM        |
57 | | **deepseek v2 XTuner** |     **16k**     |    **False**    |            **1**             |      **148**      |
58 | |     deepseek v2 hf     |       16k       |      True       |              1               |        95         |
59 | | **deepseek v2 XTuner** |     **16k**     |    **True**     |            **1**             |  **180 (1.9x)**   |
60 | 


--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "gradient_accumulation_steps": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_clipping": "auto",
 5 |   "zero_allow_untested_optimizer": true,
 6 |   "zero_force_ds_cpu_optimizer": false,
 7 |   "zero_optimization": {
 8 |     "stage": 1,
 9 |     "overlap_comm": true
10 |   },
11 |   "fp16": {
12 |     "enabled": "auto",
13 |     "initial_scale_power": 16
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "gradient_accumulation_steps": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_clipping": "auto",
 5 |   "zero_allow_untested_optimizer": true,
 6 |   "zero_force_ds_cpu_optimizer": false,
 7 |   "zero_optimization": {
 8 |     "stage": 2,
 9 |     "overlap_comm": true
10 |   },
11 |   "fp16": {
12 |     "enabled": "auto",
13 |     "initial_scale_power": 16
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero2_offload.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "gradient_accumulation_steps": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_clipping": "auto",
 5 |   "zero_allow_untested_optimizer": true,
 6 |   "zero_force_ds_cpu_optimizer": false,
 7 |   "zero_optimization": {
 8 |     "stage": 2,
 9 |     "overlap_comm": true,
10 |     "offload_optimizer": {
11 |       "device": "cpu",
12 |       "pin_memory": true
13 |     }
14 |   },
15 |   "fp16": {
16 |     "enabled": "auto",
17 |     "initial_scale_power": 16
18 |   },
19 |   "bf16": {
20 |     "enabled": "auto"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "gradient_accumulation_steps": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_clipping": "auto",
 5 |   "zero_allow_untested_optimizer": true,
 6 |   "zero_force_ds_cpu_optimizer": false,
 7 |   "zero_optimization": {
 8 |     "stage": 3,
 9 |     "overlap_comm": true,
10 |     "stage3_gather_16bit_weights_on_model_save": true
11 |   },
12 |   "fp16": {
13 |     "enabled": "auto",
14 |     "initial_scale_power": 16
15 |   },
16 |   "bf16": {
17 |     "enabled": "auto"
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero3_offload.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "gradient_accumulation_steps": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_clipping": "auto",
 5 |   "zero_allow_untested_optimizer": true,
 6 |   "zero_force_ds_cpu_optimizer": false,
 7 |   "zero_optimization": {
 8 |     "stage": 3,
 9 |     "overlap_comm": true,
10 |     "offload_optimizer": {
11 |       "device": "cpu",
12 |       "pin_memory": true
13 |     },
14 |     "offload_param": {
15 |       "device": "cpu",
16 |       "pin_memory": true
17 |     },
18 |     "stage3_gather_16bit_weights_on_model_save": true
19 |   },
20 |   "fp16": {
21 |     "enabled": "auto",
22 |     "initial_scale_power": 16
23 |   },
24 |   "bf16": {
25 |     "enabled": "auto"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/xtuner/configs/internlm/internlm_7b/internlm_7b_qlora_oasst1_e3_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from datasets import load_dataset
 4 | from peft import LoraConfig
 5 | from transformers import (
 6 |     AutoModelForCausalLM,
 7 |     AutoTokenizer,
 8 |     BitsAndBytesConfig,
 9 |     Trainer,
10 |     TrainingArguments,
11 | )
12 | 
13 | from xtuner.dataset import process_hf_dataset
14 | from xtuner.dataset.map_fns import oasst1_map_fn, template_map_fn_factory
15 | from xtuner.utils import PROMPT_TEMPLATE
16 | 
17 | framework = "huggingface"
18 | pretrained_model_name_or_path = "internlm/internlm-7b"
19 | dataset_name_or_path = "timdettmers/openassistant-guanaco"
20 | max_length = 2048
21 | pack_to_max_length = True
22 | prompt_template = PROMPT_TEMPLATE.default
23 | 
24 | trainer = Trainer
25 | 
26 | training_args = dict(
27 |     type=TrainingArguments,
28 |     do_train=True,
29 |     learning_rate=2e-4,
30 |     weight_decay=0,
31 |     lr_scheduler_type="cosine",
32 |     warmup_steps=100,
33 |     per_device_train_batch_size=1,
34 |     gradient_accumulation_steps=16,
35 |     num_train_epochs=3,
36 |     fp16=True,
37 |     logging_steps=1,
38 |     optim="paged_adamw_32bit",
39 |     save_strategy="steps",
40 |     save_steps=1000,
41 |     save_total_limit=2,
42 |     ddp_find_unused_parameters=False,
43 | )
44 | 
45 | tokenizer = dict(
46 |     type=AutoTokenizer.from_pretrained,
47 |     pretrained_model_name_or_path=pretrained_model_name_or_path,
48 |     trust_remote_code=True,
49 |     padding_side="right",
50 | )
51 | 
52 | model = dict(
53 |     type=AutoModelForCausalLM.from_pretrained,
54 |     pretrained_model_name_or_path=pretrained_model_name_or_path,
55 |     trust_remote_code=True,
56 |     torch_dtype=torch.float16,
57 |     quantization_config=dict(
58 |         type=BitsAndBytesConfig,
59 |         load_in_4bit=True,
60 |         load_in_8bit=False,
61 |         llm_int8_threshold=6.0,
62 |         llm_int8_has_fp16_weight=False,
63 |         bnb_4bit_compute_dtype=torch.float16,
64 |         bnb_4bit_use_double_quant=True,
65 |         bnb_4bit_quant_type="nf4",
66 |     ),
67 | )
68 | 
69 | lora = dict(
70 |     type=LoraConfig,
71 |     r=64,
72 |     lora_alpha=16,
73 |     lora_dropout=0.1,
74 |     bias="none",
75 |     task_type="CAUSAL_LM",
76 | )
77 | 
78 | train_dataset = dict(
79 |     type=process_hf_dataset,
80 |     dataset=dict(type=load_dataset, path=dataset_name_or_path),
81 |     tokenizer=tokenizer,
82 |     max_length=max_length,
83 |     dataset_map_fn=oasst1_map_fn,
84 |     template_map_fn=dict(type=template_map_fn_factory, template=prompt_template),
85 |     remove_unused_columns=True,
86 |     shuffle_before_pack=True,
87 |     pack_to_max_length=pack_to_max_length,
88 | )
89 | 


--------------------------------------------------------------------------------
/xtuner/configs/internvl/v1_5/convert_to_official.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | 
 5 | import torch
 6 | from mmengine.config import Config
 7 | from transformers import AutoTokenizer
 8 | 
 9 | from xtuner.model.utils import LoadWoInit
10 | from xtuner.registry import BUILDER
11 | 
12 | 
13 | def convert_to_official(config, trained_path, save_path):
14 |     cfg = Config.fromfile(config)
15 |     cfg.model.pretrained_pth = trained_path
16 |     cfg.model.quantization_vit = False
17 |     cfg.model.quantization_llm = False
18 | 
19 |     with LoadWoInit():
20 |         model = BUILDER.build(cfg.model)
21 |     model.to(torch.bfloat16)
22 | 
23 |     if model.use_visual_encoder_lora:
24 |         vision_model = model.model.vision_model.merge_and_unload()
25 |         model.model.vision_model = vision_model
26 | 
27 |     if model.use_llm_lora:
28 |         language_model = model.model.language_model.merge_and_unload()
29 |         model.model.language_model = language_model
30 | 
31 |     model.model.save_pretrained(save_path)
32 | 
33 |     tokenizer = AutoTokenizer.from_pretrained(
34 |         cfg.model.model_path, trust_remote_code=True
35 |     )
36 |     tokenizer.save_pretrained(save_path)
37 | 
38 |     print(model)
39 | 
40 | 
41 | def main():
42 |     parser = argparse.ArgumentParser(
43 |         description="Convert the pth model to HuggingFace model"
44 |     )
45 |     parser.add_argument("config", help="config file name or path.")
46 |     parser.add_argument("trained_model_pth", help="The trained model path.")
47 |     parser.add_argument("save_path", help="The path to save the converted model.")
48 |     args = parser.parse_args()
49 | 
50 |     if osp.realpath(args.trained_model_pth) == osp.realpath(args.save_path):
51 |         raise ValueError("The trained path and save path should not be the same.")
52 | 
53 |     convert_to_official(args.config, args.trained_model_pth, args.save_path)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/xtuner/configs/llama/llama2_70b/llama2_70b_int8_lora_open_platypus_e1_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from datasets import load_dataset
 4 | from peft import LoraConfig
 5 | from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
 6 | 
 7 | from xtuner.dataset import process_hf_dataset
 8 | from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
 9 | from xtuner.utils import PROMPT_TEMPLATE
10 | 
11 | framework = "huggingface"
12 | pretrained_model_name_or_path = "meta-llama/Llama-2-70b-hf"
13 | dataset_name_or_path = "garage-bAInd/Open-Platypus"
14 | max_length = 2048
15 | pack_to_max_length = True
16 | prompt_template = PROMPT_TEMPLATE.llama2_chat
17 | 
18 | trainer = Trainer
19 | 
20 | training_args = dict(
21 |     type=TrainingArguments,
22 |     do_train=True,
23 |     learning_rate=3e-4,
24 |     weight_decay=0,
25 |     lr_scheduler_type="cosine",
26 |     warmup_steps=100,
27 |     per_device_train_batch_size=1,
28 |     gradient_accumulation_steps=16,
29 |     num_train_epochs=1,
30 |     fp16=True,
31 |     logging_steps=1,
32 |     optim="adamw_torch",
33 |     save_strategy="steps",
34 |     save_steps=1000,
35 |     save_total_limit=2,
36 |     ddp_find_unused_parameters=False,
37 | )
38 | 
39 | tokenizer = dict(
40 |     type=AutoTokenizer.from_pretrained,
41 |     pretrained_model_name_or_path=pretrained_model_name_or_path,
42 |     trust_remote_code=True,
43 |     padding_side="right",
44 | )
45 | 
46 | model = dict(
47 |     type=AutoModelForCausalLM.from_pretrained,
48 |     pretrained_model_name_or_path=pretrained_model_name_or_path,
49 |     trust_remote_code=True,
50 |     torch_dtype=torch.float16,
51 |     load_in_8bit=True,
52 | )
53 | 
54 | lora = dict(
55 |     type=LoraConfig,
56 |     r=16,
57 |     lora_alpha=16,
58 |     lora_dropout=0.05,
59 |     target_modules=["gate_proj", "down_proj", "up_proj"],
60 |     bias="none",
61 |     task_type="CAUSAL_LM",
62 | )
63 | 
64 | train_dataset = dict(
65 |     type=process_hf_dataset,
66 |     dataset=dict(type=load_dataset, path=dataset_name_or_path),
67 |     tokenizer=tokenizer,
68 |     max_length=max_length,
69 |     dataset_map_fn=alpaca_map_fn,
70 |     template_map_fn=dict(type=template_map_fn_factory, template=prompt_template),
71 |     remove_unused_columns=True,
72 |     shuffle_before_pack=True,
73 |     pack_to_max_length=pack_to_max_length,
74 | )
75 | 


--------------------------------------------------------------------------------
/xtuner/configs/llama/llama2_70b/llama2_70b_qlora_open_platypus_e1_hf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from datasets import load_dataset
 4 | from peft import LoraConfig
 5 | from transformers import (
 6 |     AutoModelForCausalLM,
 7 |     AutoTokenizer,
 8 |     BitsAndBytesConfig,
 9 |     Trainer,
10 |     TrainingArguments,
11 | )
12 | 
13 | from xtuner.dataset import process_hf_dataset
14 | from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
15 | from xtuner.utils import PROMPT_TEMPLATE
16 | 
17 | framework = "huggingface"
18 | pretrained_model_name_or_path = "meta-llama/Llama-2-70b-hf"
19 | dataset_name_or_path = "garage-bAInd/Open-Platypus"
20 | max_length = 2048
21 | pack_to_max_length = True
22 | prompt_template = PROMPT_TEMPLATE.llama2_chat
23 | 
24 | trainer = Trainer
25 | 
26 | training_args = dict(
27 |     type=TrainingArguments,
28 |     do_train=True,
29 |     learning_rate=3e-4,
30 |     weight_decay=0,
31 |     lr_scheduler_type="cosine",
32 |     warmup_steps=100,
33 |     per_device_train_batch_size=1,
34 |     gradient_accumulation_steps=16,
35 |     num_train_epochs=1,
36 |     fp16=True,
37 |     logging_steps=1,
38 |     optim="adamw_torch",
39 |     save_strategy="steps",
40 |     save_steps=1000,
41 |     save_total_limit=2,
42 |     ddp_find_unused_parameters=False,
43 | )
44 | 
45 | tokenizer = dict(
46 |     type=AutoTokenizer.from_pretrained,
47 |     pretrained_model_name_or_path=pretrained_model_name_or_path,
48 |     trust_remote_code=True,
49 |     padding_side="right",
50 | )
51 | 
52 | model = dict(
53 |     type=AutoModelForCausalLM.from_pretrained,
54 |     pretrained_model_name_or_path=pretrained_model_name_or_path,
55 |     trust_remote_code=True,
56 |     torch_dtype=torch.float16,
57 |     quantization_config=dict(
58 |         type=BitsAndBytesConfig,
59 |         load_in_4bit=True,
60 |         load_in_8bit=False,
61 |         llm_int8_threshold=6.0,
62 |         llm_int8_has_fp16_weight=False,
63 |         bnb_4bit_compute_dtype=torch.float16,
64 |         bnb_4bit_use_double_quant=True,
65 |         bnb_4bit_quant_type="nf4",
66 |     ),
67 | )
68 | 
69 | lora = dict(
70 |     type=LoraConfig,
71 |     r=64,
72 |     lora_alpha=16,
73 |     lora_dropout=0.1,
74 |     target_modules=["gate_proj", "down_proj", "up_proj"],
75 |     bias="none",
76 |     task_type="CAUSAL_LM",
77 | )
78 | 
79 | train_dataset = dict(
80 |     type=process_hf_dataset,
81 |     dataset=dict(type=load_dataset, path=dataset_name_or_path),
82 |     tokenizer=tokenizer,
83 |     max_length=max_length,
84 |     dataset_map_fn=alpaca_map_fn,
85 |     template_map_fn=dict(type=template_map_fn_factory, template=prompt_template),
86 |     remove_unused_columns=True,
87 |     shuffle_before_pack=True,
88 |     pack_to_max_length=pack_to_max_length,
89 | )
90 | 


--------------------------------------------------------------------------------
/xtuner/configs/llama/llama3_8b/README.md:
--------------------------------------------------------------------------------
 1 | # Llama3 8B
 2 | 
 3 | ## Install
 4 | 
 5 | ```bash
 6 | # Install the latest xtuner
 7 | pip install -U 'xtuner[deepspeed]'
 8 | 
 9 | # install the latest transformers
10 | pip install -U transformers
11 | ```
12 | 
13 | ## QLoRA Fine-tune
14 | 
15 | QLoRA only need a single A100-80G
16 | 
17 | ```bash
18 | xtuner train llama3_8b_instruct_qlora_alpaca_e3
19 | ```
20 | 
21 | ## Full Parameter Fine-tune
22 | 
23 | Full parameter fine-tune Llama3 8B in 8k context only requires 2 * A100-80G
24 | 
25 | ### torchrun
26 | 
27 | ```bash
28 | NPROC_PER_NODE=${GPU_NUM} xtuner train llama3_8b_instruct_full_alpaca_e3 --deepspeed deepspeed_zero2
29 | ```
30 | 
31 | ### slurm
32 | 
33 | ```bash
34 | srun ${SRUN_ARGS} xtuner train llama3_8b_instruct_full_alpaca_e3 --launcher slurm --deepspeed deepspeed_zero3
35 | ```
36 | 
37 | ### Speed
38 | 
39 | |   Model   | Sequence Length | GPU Number |  ZeRO  | Sequence Parallel | Tokens per Second | TFLOPs |
40 | | :-------: | :-------------: | :--------: | :----: | :---------------: | :---------------: | :----: |
41 | | Llama3 8B |       8k        |     2      | ZeRO-3 |         2         |      1037.0       |  76.8  |
42 | | Llama3 8B |       8k        |     4      | ZeRO-3 |         1         |      2331.3       | 172.6  |
43 | | Llama3 8B |       8k        |     8      | ZeRO-3 |         1         |      2771.2       | 205.1  |
44 | 
45 | |   Model   | Sequence Length | GPU Number |  ZeRO  | Sequence Parallel | Tokens per Second | TFLOPs |
46 | | :-------: | :-------------: | :--------: | :----: | :---------------: | :---------------: | :----: |
47 | | Llama3 8B |       8k        |     8      | ZeRO-3 |         1         |      2771.2       | 205.1  |
48 | | Llama3 8B |       16k       |     8      | ZeRO-3 |         2         |      2320.7       | 191.7  |
49 | | Llama3 8B |       32k       |     8      | ZeRO-3 |         4         |      1870.2       | 186.6  |
50 | | Llama3 8B |       64k       |     8      | ZeRO-3 |         8         |      1356.4       | 182.0  |
51 | | Llama3 8B |      128k       |     8      | ZeRO-3 |         8         |       875.7       | 177.7  |
52 | 


--------------------------------------------------------------------------------
/xtuner/configs/mixtral/README.md:
--------------------------------------------------------------------------------
 1 | # Mixtral 8x7B
 2 | 
 3 | ## Install
 4 | 
 5 | ```bash
 6 | # Install the latest xtuner
 7 | pip install -U 'xtuner[deepspeed]'
 8 | 
 9 | # Mixtral requires flash-attn
10 | pip install flash-attn
11 | 
12 | # install the latest transformers
13 | pip install -U transformers
14 | ```
15 | 
16 | ## QLoRA Fine-tune
17 | 
18 | QLoRA only need a single A100-80G
19 | 
20 | ```bash
21 | xtuner train mixtral_8x7b_instruct_qlora_oasst1_e3 --deepspeed deepspeed_zero2
22 | ```
23 | 
24 | ## Full Parameter Fine-tune
25 | 
26 | Full parameter fine-tune needs 16 A100-80G
27 | 
28 | ### slurm
29 | 
30 | Note: `$PARTITION` means the virtual partition of slurm.
31 | 
32 | ```bash
33 | srun -p $PARTITION --job-name=mixtral --nodes=2 --gres=gpu:8 --ntasks-per-node=8 xtuner train mixtral_8x7b_instruct_full_oasst1_e3 --deepspeed deepspeed_zero3 --launcher slurm
34 | ```
35 | 
36 | ### torchrun
37 | 
38 | Note: `$NODE_0_ADDR` means the ip address of the node_0 machine.
39 | 
40 | ```bash
41 | # excuete on node 0
42 | NPROC_PER_NODE=8 NNODES=2 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=0 xtuner train mixtral_8x7b_instruct_full_oasst1_e3 --deepspeed deepspeed_zero3
43 | 
44 | # excuete on node 1
45 | NPROC_PER_NODE=8 NNODES=2 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=1 xtuner train mixtral_8x7b_instruct_full_oasst1_e3 --deepspeed deepspeed_zero3
46 | ```
47 | 
48 | ### Speed
49 | 
50 | 16 * A100 80G:
51 | 
52 | |    Model     | Sequence Length | Use Varlen Attn | Sequence Parallel World Size | Tokens per Second |
53 | | :----------: | :-------------: | :-------------: | :--------------------------: | :---------------: |
54 | | mixtral_8x7b |       32k       |      False      |              1               |       853.7       |
55 | | mixtral_8x7b |       32k       |      True       |              1               |       910.1       |
56 | | mixtral_8x7b |       32k       |      False      |              2               |       635.2       |
57 | | mixtral_8x7b |       32k       |      True       |              2               |       650.9       |
58 | 


--------------------------------------------------------------------------------
/xtuner/configs/qwen/qwen1_5/qwen1_5_110b_chat/README.md:
--------------------------------------------------------------------------------
 1 | # Qwen 110B
 2 | 
 3 | ## Install
 4 | 
 5 | ```bash
 6 | # Install the latest xtuner
 7 | pip install -U 'xtuner[deepspeed]'
 8 | 
 9 | # We recommend installing flash_attn
10 | # pip install flash-attn
11 | 
12 | # install the latest transformers
13 | pip install -U transformers
14 | ```
15 | 
16 | ## QLoRA Fine-tune
17 | 
18 | Training Qwen 110B with 32k context capability requires only 2 * A100 80G.
19 | 
20 | ```bash
21 | xtuner train xtuner/configs/qwen/qwen1_5/qwen1_5_110b_chat/qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py --deepspeed deepspeed_zero3
22 | ```
23 | 
24 | <div align=center>
25 |   <img src="https://github.com/InternLM/xtuner/assets/41630003/48e4b6e3-1bcd-4349-90f0-dbbbc0f1cee7" style="width:80%">
26 | </div>
27 | 


--------------------------------------------------------------------------------
/xtuner/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | from .concat_dataset import ConcatDataset
 5 | from .huggingface import process_hf_dataset
 6 | from .intern_repo import (
 7 |     build_packed_dataset,
 8 |     load_intern_repo_tokenized_dataset,
 9 |     load_intern_repo_untokenized_dataset,
10 | )
11 | from .internvl_dataset import InternVL_V1_5_Dataset
12 | from .json_dataset import load_json_file
13 | from .llava import LLaVADataset
14 | from .modelscope import process_ms_dataset
15 | from .moss_sft import MOSSSFTDataset
16 | from .refcoco_json import (
17 |     InvRefCOCOJsonDataset,
18 |     RefCOCOJsonDataset,
19 |     RefCOCOJsonEvalDataset,
20 | )
21 | from .utils import decode_base64_to_image, expand2square, load_image
22 | 
23 | # ignore FutureWarning in hf datasets
24 | warnings.simplefilter(action="ignore", category=FutureWarning)
25 | 
26 | __all__ = [
27 |     "process_hf_dataset",
28 |     "ConcatDataset",
29 |     "MOSSSFTDataset",
30 |     "process_ms_dataset",
31 |     "LLaVADataset",
32 |     "expand2square",
33 |     "decode_base64_to_image",
34 |     "load_image",
35 |     "load_intern_repo_tokenized_dataset",
36 |     "load_intern_repo_untokenized_dataset",
37 |     "build_packed_dataset",
38 |     "RefCOCOJsonDataset",
39 |     "RefCOCOJsonEvalDataset",
40 |     "InvRefCOCOJsonDataset",
41 |     "load_json_file",
42 |     "InternVL_V1_5_Dataset",
43 | ]
44 | 


--------------------------------------------------------------------------------
/xtuner/dataset/collate_fns/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .default_collate_fn import default_collate_fn
3 | from .mmlu_collate_fn import mmlu_collate_fn
4 | 
5 | __all__ = ["default_collate_fn", "mmlu_collate_fn"]
6 | 


--------------------------------------------------------------------------------
/xtuner/dataset/collate_fns/mmlu_collate_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import Dict, Sequence
 3 | 
 4 | import torch
 5 | from torch.nn.utils.rnn import pad_sequence
 6 | 
 7 | from xtuner.utils import DEFAULT_PAD_TOKEN_INDEX, IGNORE_INDEX
 8 | 
 9 | 
10 | def mmlu_collate_fn(
11 |     instances: Sequence[Dict],
12 |     pad_index: int = DEFAULT_PAD_TOKEN_INDEX,
13 |     return_hf_format: bool = False,
14 | ) -> Dict[str, torch.Tensor]:
15 |     input_ids = []
16 |     labels = []
17 |     data_samples = {"labels": [], "subjects": []}
18 |     for example in instances:
19 |         input_ids.append(torch.tensor(example["input_ids"]))
20 |         labels.append(torch.tensor(example["labels"]))
21 |         data_samples["labels"].append(example["output"])
22 |         data_samples["subjects"].append(example["subject"])
23 |     if len(instances) > 1:
24 |         input_ids = pad_sequence(input_ids, batch_first=True, padding_value=pad_index)
25 |         labels = pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
26 |     else:
27 |         input_ids = torch.stack(input_ids)
28 |         labels = torch.stack(labels)
29 | 
30 |     data_dict = {
31 |         "input_ids": input_ids,
32 |         "attention_mask": input_ids.ne(pad_index),
33 |         "labels": labels,
34 |     }
35 | 
36 |     if return_hf_format:
37 |         return data_dict
38 |     else:
39 |         return {"data": data_dict, "data_samples": data_samples}
40 | 


--------------------------------------------------------------------------------
/xtuner/dataset/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from torch.utils.data import ConcatDataset as TorchConcatDataset
 3 | 
 4 | from xtuner.registry import BUILDER
 5 | 
 6 | 
 7 | class ConcatDataset(TorchConcatDataset):
 8 |     def __init__(self, datasets):
 9 |         datasets_instance = []
10 |         for cfg in datasets:
11 |             datasets_instance.append(BUILDER.build(cfg))
12 |         super().__init__(datasets=datasets_instance)
13 | 
14 |     def __repr__(self):
15 |         main_str = "Dataset as a concatenation of multiple datasets. \n"
16 |         main_str += ",\n".join([f"{repr(dataset)}" for dataset in self.datasets])
17 |         return main_str
18 | 


--------------------------------------------------------------------------------
/xtuner/dataset/json_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import json
 3 | import os
 4 | 
 5 | from datasets import Dataset, concatenate_datasets
 6 | 
 7 | 
 8 | def load_json_file(data_files=None, data_dir=None, suffix=None):
 9 |     assert (data_files is not None) != (data_dir is not None)
10 |     if data_dir is not None:
11 |         data_files = os.listdir(data_dir)
12 |         data_files = [os.path.join(data_dir, fn) for fn in data_files]
13 |         if suffix is not None:
14 |             data_files = [fp for fp in data_files if fp.endswith(suffix)]
15 |     elif isinstance(data_files, str):
16 |         data_files = [data_files]
17 | 
18 |     dataset_list = []
19 |     for fp in data_files:
20 |         with open(fp, encoding="utf-8") as file:
21 |             data = json.load(file)
22 |         ds = Dataset.from_list(data)
23 |         dataset_list.append(ds)
24 |     dataset = concatenate_datasets(dataset_list)
25 |     return dataset
26 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset_map_fns import *  # noqa: F401, F403
3 | from .template_map_fn import template_map_fn  # noqa: F401
4 | from .template_map_fn import template_map_fn_factory  # noqa: F401
5 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .alpaca_map_fn import alpaca_map_fn
 3 | from .alpaca_zh_map_fn import alpaca_zh_map_fn
 4 | from .arxiv_map_fn import arxiv_map_fn
 5 | from .code_alpaca_map_fn import code_alpaca_map_fn
 6 | from .colors_map_fn import colors_map_fn
 7 | from .crime_kg_assitant_map_fn import crime_kg_assitant_map_fn
 8 | from .default_map_fn import default_map_fn
 9 | from .law_reference_map_fn import law_reference_map_fn
10 | from .llava_map_fn import llava_image_only_map_fn, llava_map_fn
11 | from .medical_map_fn import medical_map_fn
12 | from .msagent_map_fn import msagent_react_map_fn
13 | from .oasst1_map_fn import oasst1_map_fn
14 | from .openai_map_fn import openai_map_fn
15 | from .openorca_map_fn import openorca_map_fn
16 | from .pretrain_map_fn import pretrain_map_fn
17 | from .sql_map_fn import sql_map_fn
18 | from .stack_exchange_map_fn import stack_exchange_map_fn
19 | from .tiny_codes_map_fn import tiny_codes_map_fn
20 | from .wizardlm_map_fn import wizardlm_map_fn
21 | 
22 | DATASET_FORMAT_MAPPING = dict(
23 |     alpaca=alpaca_map_fn,
24 |     alpaca_zh=alpaca_zh_map_fn,
25 |     arxiv=arxiv_map_fn,
26 |     code_alpaca=code_alpaca_map_fn,
27 |     colors=colors_map_fn,
28 |     crime_kg_assitan=crime_kg_assitant_map_fn,
29 |     default=default_map_fn,
30 |     law_reference=law_reference_map_fn,
31 |     llava_image_only=llava_image_only_map_fn,
32 |     llava=llava_map_fn,
33 |     medical=medical_map_fn,
34 |     msagent_react=msagent_react_map_fn,
35 |     oasst1=oasst1_map_fn,
36 |     openai=openai_map_fn,
37 |     openorca=openorca_map_fn,
38 |     pretrain=pretrain_map_fn,
39 |     sql=sql_map_fn,
40 |     stack_exchange=stack_exchange_map_fn,
41 |     tiny_codes=tiny_codes_map_fn,
42 |     wizardlm=wizardlm_map_fn,
43 | )
44 | 
45 | __all__ = [
46 |     "alpaca_map_fn",
47 |     "alpaca_zh_map_fn",
48 |     "oasst1_map_fn",
49 |     "arxiv_map_fn",
50 |     "medical_map_fn",
51 |     "openorca_map_fn",
52 |     "code_alpaca_map_fn",
53 |     "tiny_codes_map_fn",
54 |     "colors_map_fn",
55 |     "law_reference_map_fn",
56 |     "crime_kg_assitant_map_fn",
57 |     "sql_map_fn",
58 |     "openai_map_fn",
59 |     "wizardlm_map_fn",
60 |     "stack_exchange_map_fn",
61 |     "msagent_react_map_fn",
62 |     "pretrain_map_fn",
63 |     "default_map_fn",
64 |     "llava_image_only_map_fn",
65 |     "llava_map_fn",
66 |     "DATASET_FORMAT_MAPPING",
67 | ]
68 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/alpaca_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | 
 4 | def alpaca_map_fn(example):
 5 |     if example.get("output") == "<nooutput>":
 6 |         return {"conversation": []}
 7 |     else:
 8 |         return {
 9 |             "conversation": [
10 |                 {
11 |                     "input": f"{example['instruction']}\n{example['input']}",
12 |                     "output": example["output"],
13 |                 }
14 |             ]
15 |         }
16 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/alpaca_zh_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | 
 3 | 
 4 | def alpaca_zh_map_fn(example):
 5 |     return {
 6 |         "conversation": [
 7 |             {
 8 |                 "input": f"{example['instruction_zh']}\n{example['input_zh']}",
 9 |                 "output": example["output_zh"],
10 |             }
11 |         ]
12 |     }
13 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/arxiv_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def arxiv_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.arxiv_gentile,
10 |                 "input": example["abstract"],
11 |                 "output": example["title"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/code_alpaca_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def code_alpaca_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.coder,
10 |                 "input": example["prompt"],
11 |                 "output": example["completion"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/colors_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def colors_map_fn(example):
 6 |     desc = ":".join(example["description"].split(":")[1:]).strip()
 7 |     return {
 8 |         "conversation": [
 9 |             {
10 |                 "system": SYSTEM_TEMPLATE.colorist,
11 |                 "input": desc,
12 |                 "output": example["color"],
13 |             }
14 |         ]
15 |     }
16 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/crime_kg_assitant_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def crime_kg_assitant_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.lawyer,
10 |                 "input": example["input"],
11 |                 "output": example["output"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/default_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def default_map_fn(example):
3 |     return {"conversation": [{"input": example["input"], "output": example["output"]}]}
4 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/law_reference_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def law_reference_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.lawyer,
10 |                 "input": example["question"],
11 |                 "output": example["answer"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/llava_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import DEFAULT_IMAGE_TOKEN
 3 | 
 4 | 
 5 | def llava_image_only_map_fn(example):
 6 |     # input contains the DEFAULT_IMAGE_TOKEN only
 7 |     messages = example["conversations"]
 8 |     input = ""
 9 |     conversation = []
10 |     while messages and messages[0]["from"] == "gpt":
11 |         # Skip the first one if it is from gpt
12 |         messages = messages[1:]
13 |     for msg in messages:
14 |         if msg["from"] == "human":
15 |             assert DEFAULT_IMAGE_TOKEN in msg["value"]
16 |             input += DEFAULT_IMAGE_TOKEN
17 |         elif msg["from"] == "gpt":
18 |             conversation.append({"input": input, "output": msg["value"]})
19 |             input = ""
20 |         else:
21 |             raise NotImplementedError
22 |     return {"conversation": conversation}
23 | 
24 | 
25 | def llava_map_fn(example):
26 |     messages = example["conversations"]
27 |     input = ""
28 |     conversation = []
29 |     while messages and messages[0]["from"] == "gpt":
30 |         # Skip the first one if it is from gpt
31 |         messages = messages[1:]
32 |     for msg in messages:
33 |         if msg["from"] == "human":
34 |             if DEFAULT_IMAGE_TOKEN in msg["value"]:
35 |                 msg["value"] = msg["value"].replace(DEFAULT_IMAGE_TOKEN, "").strip()
36 |                 msg["value"] = DEFAULT_IMAGE_TOKEN + "\n" + msg["value"]
37 |                 msg["value"] = msg["value"].strip()
38 |             input += msg["value"]
39 | 
40 |         elif msg["from"] == "gpt":
41 |             conversation.append({"input": input, "output": msg["value"]})
42 |             input = ""
43 |         else:
44 |             raise NotImplementedError
45 |     return {"conversation": conversation}
46 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/medical_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def medical_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.medical,
10 |                 "input": "{instruction}\n{input}".format(**example),
11 |                 "output": example["output"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/oasst1_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def oasst1_map_fn(example):
 3 |     r"""Example before preprocessing:
 4 |         example['text'] = '### Human: Can you explain xxx'
 5 |                           '### Assistant: Sure! xxx'
 6 |                           '### Human: I didn't understand how xxx'
 7 |                           '### Assistant: It has to do with a process xxx.'
 8 | 
 9 |     Example after preprocessing:
10 |         example['conversation'] = [
11 |             {
12 |                 'input': 'Can you explain xxx',
13 |                 'output': 'Sure! xxx'
14 |             },
15 |             {
16 |                 'input': 'I didn't understand how xxx',
17 |                 'output': 'It has to do with a process xxx.'
18 |             }
19 |         ]
20 |     """
21 |     data = []
22 |     for sentence in example["text"].strip().split("###"):
23 |         sentence = sentence.strip()
24 |         if sentence[:6] == "Human:":
25 |             data.append(sentence[6:].strip())
26 |         elif sentence[:10] == "Assistant:":
27 |             data.append(sentence[10:].strip())
28 |     if len(data) % 2:
29 |         # The last round of conversation solely consists of input
30 |         # without any output.
31 |         # Discard the input part of the last round, as this part is ignored in
32 |         # the loss calculation.
33 |         data.pop()
34 |     conversation = []
35 |     for i in range(0, len(data), 2):
36 |         single_turn_conversation = {"input": data[i], "output": data[i + 1]}
37 |         conversation.append(single_turn_conversation)
38 |     return {"conversation": conversation}
39 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def openai_map_fn(example):
 3 |     """
 4 |     Example before preprocessing:
 5 |         example["messages"] = [
 6 |             { "role": "system", "content": "You are an assistant that
 7 |                 occasionally misspells words." },
 8 |             { "role": "user", "content": "Tell me a story." },
 9 |             { "role": "assistant", "content": "One day a student
10 |                 went to schoool." }
11 |         ]
12 |     Example after preprocessing:
13 |         example["conversation"] = [
14 |             {
15 |                 "system": "You are an assistant that occasionally misspells
16 |                     words.",
17 |                 "input": "Tell me a story.",
18 |                 "output": "One day a student went to schoool."
19 |             }
20 |         ]
21 |     """
22 |     messages = example["messages"]
23 |     system = ""
24 |     input = ""
25 |     conversation = []
26 |     while messages and messages[0]["role"] == "assistant":
27 |         # Skip the first one if it is from assistant
28 |         messages = messages[1:]
29 |     for msg in messages:
30 |         if msg["role"] == "system":
31 |             system = msg["content"]
32 |         elif msg["role"] == "user":
33 |             input += msg["content"]
34 |         elif msg["role"] == "assistant":
35 |             output_with_loss = msg.get("loss", "True")
36 |             output_with_loss = str(output_with_loss)
37 |             output_with_loss = output_with_loss.lower() == "true"
38 |             conversation.append(
39 |                 {
40 |                     "system": system,
41 |                     "input": input,
42 |                     "output": msg["content"],
43 |                     "output_with_loss": output_with_loss,
44 |                 }
45 |             )
46 |             system = ""
47 |             input = ""
48 |         else:
49 |             raise NotImplementedError
50 |     return {"conversation": conversation}
51 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/openorca_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def openorca_map_fn(example):
 3 |     return {
 4 |         "conversation": [
 5 |             {
 6 |                 "system": example["system_prompt"],
 7 |                 "input": example["question"],
 8 |                 "output": example["response"],
 9 |             }
10 |         ]
11 |     }
12 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/pretrain_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def pretrain_map_fn(example):
 3 |     r"""Example before preprocessing:
 4 |         example['text'] = 'xxx'
 5 | 
 6 |     Example after preprocessing:
 7 |         example['conversation'] = [
 8 |             {
 9 |                 'input': '',
10 |                 'output': 'xxx'
11 |             },
12 |         ]
13 |     """
14 |     return {
15 |         "conversation": [
16 |             {"input": "", "output": example["text"].strip(), "need_eos_token": False}
17 |         ]
18 |     }
19 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/sql_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def sql_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.sql,
10 |                 "input": "{context}\n{question}".format(**example),
11 |                 "output": example["answer"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/stack_exchange_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def stack_exchange_map_fn(example):
3 |     return {
4 |         "conversation": [{"input": example["question"], "output": example["response"]}]
5 |     }
6 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/tiny_codes_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.utils import SYSTEM_TEMPLATE
 3 | 
 4 | 
 5 | def tiny_codes_map_fn(example):
 6 |     return {
 7 |         "conversation": [
 8 |             {
 9 |                 "system": SYSTEM_TEMPLATE.coder,
10 |                 "input": example["prompt"],
11 |                 "output": example["response"],
12 |             }
13 |         ]
14 |     }
15 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/wizardlm_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def wizardlm_map_fn(example):
 3 |     messages = example["conversations"]
 4 |     input = ""
 5 |     conversation = []
 6 |     while messages and messages[0]["from"] == "gpt":
 7 |         # Skip the first one if it is from gpt
 8 |         messages = messages[1:]
 9 |     for msg in messages:
10 |         if msg["from"] == "human":
11 |             input += msg["value"]
12 |         elif msg["from"] == "gpt":
13 |             conversation.append({"input": input, "output": msg["value"]})
14 |             input = ""
15 |         else:
16 |             raise NotImplementedError
17 |     return {"conversation": conversation}
18 | 


--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/template_map_fn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from functools import partial
 3 | 
 4 | from mmengine.utils.misc import get_object_from_string
 5 | 
 6 | 
 7 | def template_map_fn(example, template):
 8 |     conversation = example.get("conversation", [])
 9 |     for i, single_turn_conversation in enumerate(conversation):
10 |         input = single_turn_conversation.get("input", "")
11 |         if input is None:
12 |             input = ""
13 |         input_text = template.INSTRUCTION.format(input=input, round=i + 1)
14 |         system = single_turn_conversation.get("system", "")
15 |         if system != "" and system is not None:
16 |             system = template.SYSTEM.format(system=system)
17 |             input_text = system + input_text
18 |         single_turn_conversation["input"] = input_text
19 | 
20 |         if template.get("SUFFIX", None):
21 |             output_text = single_turn_conversation.get("output", "")
22 |             output_text += template.SUFFIX
23 |             single_turn_conversation["output"] = output_text
24 | 
25 |         # SUFFIX_AS_EOS is False ==> need_eos_token is True
26 |         single_turn_conversation["need_eos_token"] = not template.get(
27 |             "SUFFIX_AS_EOS", False
28 |         )
29 |         single_turn_conversation["sep"] = template.get("SEP", "")
30 | 
31 |     return {"conversation": conversation}
32 | 
33 | 
34 | def template_map_fn_factory(template):
35 |     if isinstance(template, str):  # for resume
36 |         template = get_object_from_string(template)
37 |     return partial(template_map_fn, template=template)
38 | 


--------------------------------------------------------------------------------
/xtuner/dataset/modelscope.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.config import Config, ConfigDict
 3 | 
 4 | from xtuner.registry import BUILDER
 5 | 
 6 | from .huggingface import process_hf_dataset
 7 | 
 8 | 
 9 | def process_ms_dataset(dataset, split="train", *args, **kwargs):
10 |     """Post-process the dataset loaded from the ModelScope Hub."""
11 | 
12 |     if isinstance(dataset, (Config, ConfigDict)):
13 |         dataset = BUILDER.build(dataset)
14 |     if isinstance(dataset, dict):
15 |         dataset = dataset[split]
16 |     dataset = dataset.to_hf_dataset()
17 |     return process_hf_dataset(dataset, *args, **kwargs)
18 | 


--------------------------------------------------------------------------------
/xtuner/dataset/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .intern_repo import InternlmRepoSampler, InternRepoSampler
3 | from .length_grouped import LengthGroupedSampler
4 | 
5 | __all__ = ["LengthGroupedSampler", "InternRepoSampler", "InternlmRepoSampler"]
6 | 


--------------------------------------------------------------------------------
/xtuner/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from ._strategy import DeepSpeedStrategy
 3 | from .hooks import (
 4 |     DatasetInfoHook,
 5 |     EvaluateChatHook,
 6 |     ThroughputHook,
 7 |     VarlenAttnArgsToMessageHubHook,
 8 | )
 9 | from .runner import TrainLoop
10 | 
11 | __all__ = [
12 |     "EvaluateChatHook",
13 |     "DatasetInfoHook",
14 |     "ThroughputHook",
15 |     "VarlenAttnArgsToMessageHubHook",
16 |     "DeepSpeedStrategy",
17 |     "TrainLoop",
18 | ]
19 | 


--------------------------------------------------------------------------------
/xtuner/engine/_strategy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .deepspeed import DeepSpeedStrategy
3 | 
4 | __all__ = ["DeepSpeedStrategy"]
5 | 


--------------------------------------------------------------------------------
/xtuner/engine/_strategy/deepspeed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import Optional
 3 | 
 4 | from mmengine._strategy import DeepSpeedStrategy as MMEngineDeepSpeedStrategy
 5 | 
 6 | from xtuner import DS_CEPH_DIR
 7 | from xtuner.parallel.sequence import init_sequence_parallel
 8 | from xtuner.utils.device import get_device
 9 | from xtuner.utils.fileio import patch_fileio
10 | 
11 | 
12 | class DeepSpeedStrategy(MMEngineDeepSpeedStrategy):
13 |     def __init__(self, *args, **kwargs):
14 |         sequence_parallel_size = kwargs.pop("sequence_parallel_size", 1)
15 |         self.sequence_parallel_size = sequence_parallel_size
16 | 
17 |         super().__init__(*args, **kwargs)
18 | 
19 |         from transformers.integrations.deepspeed import HfDeepSpeedConfig
20 | 
21 |         # hf_deepspeed_config has to be saved as an attribute.
22 |         self.hf_deepspeed_config = HfDeepSpeedConfig(self.config)
23 | 
24 |     def _wrap_model(self, model):
25 |         wrapper = super()._wrap_model(model)
26 |         # hard code for deepspeed zero3
27 |         # When utilizing Zero3, the model isn't allocated to CUDA within the
28 |         # `deepspeed.initialize` process.
29 |         assert hasattr(wrapper.model, "data_preprocessor")
30 |         wrapper.model.data_preprocessor.to(get_device())
31 |         return wrapper
32 | 
33 |     def save_checkpoint(self, *args, **kwargs) -> None:
34 |         if DS_CEPH_DIR:
35 |             from os import path as osp
36 | 
37 |             work_dir_prefix = osp.split(self.work_dir)[0]
38 | 
39 |             filename = kwargs["filename"].replace(work_dir_prefix, DS_CEPH_DIR)
40 |             kwargs["filename"] = filename
41 |             with patch_fileio():
42 |                 super().save_checkpoint(*args, **kwargs)
43 |         else:
44 |             super().save_checkpoint(*args, **kwargs)
45 | 
46 |     def load_checkpoint(self, *args, **kwargs) -> None:
47 |         if DS_CEPH_DIR:
48 |             with patch_fileio():
49 |                 checkpoint = super().load_checkpoint(*args, **kwargs)
50 |         else:
51 |             checkpoint = super().load_checkpoint(*args, **kwargs)
52 |         return checkpoint
53 | 
54 |     def resume(self, *args, **kwargs) -> None:
55 |         if DS_CEPH_DIR:
56 |             with patch_fileio():
57 |                 checkpoint = super().resume(*args, **kwargs)
58 |         else:
59 |             checkpoint = super().resume(*args, **kwargs)
60 |         return checkpoint
61 | 
62 |     def _setup_distributed(  # type: ignore
63 |         self,
64 |         launcher: Optional[str] = None,
65 |         backend: str = "nccl",
66 |         **kwargs,
67 |     ):
68 |         super()._setup_distributed(launcher, backend, **kwargs)
69 |         init_sequence_parallel(self.sequence_parallel_size)
70 | 


--------------------------------------------------------------------------------
/xtuner/engine/hooks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .dataset_info_hook import DatasetInfoHook
 3 | from .evaluate_chat_hook import EvaluateChatHook
 4 | from .hf_checkpoint_hook import HFCheckpointHook
 5 | from .throughput_hook import ThroughputHook
 6 | from .varlen_attn_args_to_messagehub_hook import VarlenAttnArgsToMessageHubHook
 7 | 
 8 | __all__ = [
 9 |     "EvaluateChatHook",
10 |     "DatasetInfoHook",
11 |     "ThroughputHook",
12 |     "VarlenAttnArgsToMessageHubHook",
13 |     "HFCheckpointHook",
14 | ]
15 | 


--------------------------------------------------------------------------------
/xtuner/engine/hooks/dataset_info_hook.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.hooks import Hook
 3 | 
 4 | from xtuner.registry import BUILDER
 5 | from xtuner.utils import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
 6 | 
 7 | 
 8 | def split_list(lst, value):
 9 |     res = []
10 |     tmp_res = []
11 |     for i in lst:
12 |         if i == value:
13 |             res.append(tmp_res)
14 |             tmp_res = []
15 |         else:
16 |             tmp_res.append(i)
17 |     res.append(tmp_res)
18 |     return res
19 | 
20 | 
21 | class DatasetInfoHook(Hook):
22 |     def __init__(self, tokenizer, is_intern_repo_dataset=False):
23 |         self.tokenizer = BUILDER.build(tokenizer)
24 |         self.is_intern_repo_dataset = is_intern_repo_dataset
25 | 
26 |     def log(self, runner, dataset, mode="train"):
27 |         def _log(input_ids, log_prefix=""):
28 |             if self.is_intern_repo_dataset:
29 |                 input_ids = [abs(x) for x in input_ids]
30 |             # Try to split list to be compatible with IMAGE token
31 |             input_ids = split_list(input_ids, IMAGE_TOKEN_INDEX)
32 |             text = log_prefix
33 |             for idx, ids in enumerate(input_ids):
34 |                 text += self.tokenizer.decode(ids)
35 |                 if idx != len(input_ids) - 1:
36 |                     text += DEFAULT_IMAGE_TOKEN
37 |             runner.logger.info(text)
38 | 
39 |         runner.logger.info(f"Num {mode} samples {len(dataset)}")
40 |         runner.logger.info(f"{mode} example:")
41 |         if "chosen_ids" in dataset[0]:
42 |             _log(dataset[0]["chosen_ids"], log_prefix="chosen: ")
43 |             _log(dataset[0]["rejected_ids"], log_prefix="rejected: ")
44 |         else:
45 |             _log(dataset[0]["input_ids"])
46 | 
47 |     def before_train(self, runner) -> None:
48 |         do_train = runner.train_loop is not None
49 |         do_eval = runner.val_loop is not None
50 |         if do_train:
51 |             train_dataset = runner.train_dataloader.dataset
52 |             self.log(runner, train_dataset, mode="train")
53 |         if do_eval:
54 |             eval_dataset = runner.val_dataloader.dataset
55 |             self.log(runner, eval_dataset, mode="eval")
56 | 
57 |     def before_val(self, runner) -> None:
58 |         eval_dataset = runner.val_dataloader.dataset
59 |         self.log(runner, eval_dataset, mode="eval")
60 | 
61 |     def before_test(self, runner) -> None:
62 |         test_dataset = runner.test_dataloader.dataset
63 |         self.log(runner, test_dataset, mode="test")
64 | 


--------------------------------------------------------------------------------
/xtuner/engine/runner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .loops import TrainLoop
3 | 
4 | __all__ = ["TrainLoop"]
5 | 


--------------------------------------------------------------------------------
/xtuner/engine/runner/loops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import Dict, Optional, Union
 3 | 
 4 | from mmengine.runner import IterBasedTrainLoop
 5 | from torch.utils.data import DataLoader
 6 | 
 7 | 
 8 | class TrainLoop(IterBasedTrainLoop):
 9 |     def __init__(
10 |         self,
11 |         runner,
12 |         dataloader: Union[DataLoader, Dict],
13 |         max_iters: Optional[int] = None,
14 |         max_epochs: Union[int, float] = None,
15 |         **kwargs,
16 |     ) -> None:
17 |         if max_iters is None and max_epochs is None:
18 |             raise RuntimeError(
19 |                 "Please specify the `max_iters` or " "`max_epochs` in `train_cfg`."
20 |             )
21 |         elif max_iters is not None and max_epochs is not None:
22 |             raise RuntimeError(
23 |                 "Only one of `max_iters` or `max_epochs` can " "exist in `train_cfg`."
24 |             )
25 |         else:
26 |             if max_iters is not None:
27 |                 iters = int(max_iters)
28 |                 assert iters == max_iters, (
29 |                     "`max_iters` should be a integer " f"number, but get {max_iters}"
30 |                 )
31 |             elif max_epochs is not None:
32 |                 if isinstance(dataloader, dict):
33 |                     diff_rank_seed = runner._randomness_cfg.get("diff_rank_seed", False)
34 |                     dataloader = runner.build_dataloader(
35 |                         dataloader, seed=runner.seed, diff_rank_seed=diff_rank_seed
36 |                     )
37 |                 iters = max_epochs * len(dataloader)
38 |             else:
39 |                 raise NotImplementedError
40 |         super().__init__(
41 |             runner=runner, dataloader=dataloader, max_iters=iters, **kwargs
42 |         )
43 | 


--------------------------------------------------------------------------------
/xtuner/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .metrics import MMLUMetric
3 | 
4 | __all__ = ["MMLUMetric"]
5 | 


--------------------------------------------------------------------------------
/xtuner/evaluation/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .mmlu_metric import MMLUMetric
3 | 
4 | __all__ = ["MMLUMetric"]
5 | 


--------------------------------------------------------------------------------
/xtuner/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .internvl import InternVL_V1_5
3 | from .llava import LLaVAModel
4 | from .sft import SupervisedFinetune
5 | 
6 | __all__ = ["SupervisedFinetune", "LLaVAModel", "InternVL_V1_5"]
7 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dispatch import dispatch_modules
3 | from .projector import ProjectorConfig, ProjectorModel
4 | 
5 | __all__ = ["dispatch_modules", "ProjectorConfig", "ProjectorModel"]
6 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/dispatch/triton_kernels/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .layer_norm import layer_norm_forward
3 | from .rms_norm import rms_norm_forward
4 | from .rotary import apply_rotary_emb
5 | 
6 | __all__ = ["rms_norm_forward", "layer_norm_forward", "apply_rotary_emb"]
7 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/dispatch/triton_kernels/layer_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def layer_norm_forward(self, hidden_states):
 7 |     input_dtype = hidden_states.dtype
 8 |     hidden_states = hidden_states.to(torch.float32)
 9 |     hidden_states = F.layer_norm(
10 |         hidden_states, (hidden_states.shape[-1],), eps=self.variance_epsilon
11 |     )
12 |     hidden_states = self.weight.to(torch.float32) * hidden_states
13 |     return hidden_states.to(input_dtype)
14 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/dispatch/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | try:
 6 |     from flash_attn.bert_padding import index_first_axis, unpad_input
 7 | except ImportError:
 8 |     pass
 9 | 
10 | 
11 | def _get_unpad_data(attention_mask):
12 |     seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
13 |     indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
14 |     max_seqlen_in_batch = seqlens_in_batch.max().item()
15 |     cu_seqlens = F.pad(
16 |         torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0)
17 |     )
18 |     return (
19 |         indices,
20 |         cu_seqlens,
21 |         max_seqlen_in_batch,
22 |     )
23 | 
24 | 
25 | def upad_qkv(query_layer, key_layer, value_layer, attention_mask, query_length):
26 |     indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
27 |     batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
28 | 
29 |     key_layer = index_first_axis(
30 |         key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim),
31 |         indices_k,
32 |     )
33 |     value_layer = index_first_axis(
34 |         value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim),
35 |         indices_k,
36 |     )
37 |     if query_length == kv_seq_len:
38 |         # Different from the origin version as sequence parallel change
39 |         # the number of attention heads.
40 |         query_layer = index_first_axis(
41 |             query_layer.reshape(batch_size * kv_seq_len, -1, head_dim), indices_k
42 |         )
43 |         cu_seqlens_q = cu_seqlens_k
44 |         max_seqlen_in_batch_q = max_seqlen_in_batch_k
45 |         indices_q = indices_k
46 |     elif query_length == 1:
47 |         max_seqlen_in_batch_q = 1
48 |         cu_seqlens_q = torch.arange(
49 |             batch_size + 1, dtype=torch.int32, device=query_layer.device
50 |         )  # There is a memcpy here, that is very bad.
51 |         indices_q = cu_seqlens_q[:-1]
52 |         query_layer = query_layer.squeeze(1)
53 |     else:
54 |         # The -q_len: slice assumes left padding.
55 |         attention_mask = attention_mask[:, -query_length:]
56 |         query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(
57 |             query_layer, attention_mask
58 |         )
59 | 
60 |     return (
61 |         query_layer,
62 |         key_layer,
63 |         value_layer,
64 |         indices_q,
65 |         (cu_seqlens_q, cu_seqlens_k),
66 |         (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
67 |     )
68 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/projector/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from transformers import AutoConfig, AutoModel
 3 | 
 4 | from .configuration_projector import ProjectorConfig
 5 | from .modeling_projector import ProjectorModel
 6 | 
 7 | AutoConfig.register("projector", ProjectorConfig)
 8 | AutoModel.register(ProjectorConfig, ProjectorModel)
 9 | 
10 | __all__ = ["ProjectorConfig", "ProjectorModel"]
11 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/projector/configuration_projector.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from transformers import PretrainedConfig
 3 | 
 4 | 
 5 | class ProjectorConfig(PretrainedConfig):
 6 |     model_type = "projector"
 7 |     _auto_class = "AutoConfig"
 8 | 
 9 |     def __init__(
10 |         self,
11 |         visual_hidden_size=4096,
12 |         llm_hidden_size=4096,
13 |         depth=2,
14 |         hidden_act="gelu",
15 |         bias=True,
16 |         **kwargs,
17 |     ):
18 |         self.visual_hidden_size = visual_hidden_size
19 |         self.llm_hidden_size = llm_hidden_size
20 |         self.depth = depth
21 |         self.hidden_act = hidden_act
22 |         self.bias = bias
23 |         super().__init__(**kwargs)
24 | 


--------------------------------------------------------------------------------
/xtuner/model/modules/projector/modeling_projector.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | from transformers import PreTrainedModel
 5 | from transformers.activations import ACT2FN
 6 | 
 7 | from .configuration_projector import ProjectorConfig
 8 | 
 9 | 
10 | class ProjectorModel(PreTrainedModel):
11 |     _auto_class = "AutoModel"
12 |     config_class = ProjectorConfig
13 |     base_model_prefix = "model"
14 |     supports_gradient_checkpointing = True
15 | 
16 |     def __init__(self, config: ProjectorConfig) -> None:
17 |         super().__init__(config)
18 |         self.gradient_checkpointing = False
19 | 
20 |         modules = [
21 |             nn.Linear(
22 |                 config.visual_hidden_size, config.llm_hidden_size, bias=config.bias
23 |             )
24 |         ]
25 |         for _ in range(1, config.depth):
26 |             modules.append(ACT2FN[config.hidden_act])
27 |             modules.append(
28 |                 nn.Linear(
29 |                     config.llm_hidden_size, config.llm_hidden_size, bias=config.bias
30 |                 )
31 |             )
32 |         self.model = nn.Sequential(*modules)
33 | 
34 |     def enable_input_require_grads(self):
35 |         def make_inputs_require_grad(module, input, output):
36 |             output.requires_grad_(True)
37 | 
38 |         self.model.register_forward_hook(make_inputs_require_grad)
39 | 
40 |     def _set_gradient_checkpointing(self, module, value=False):
41 |         if isinstance(module, ProjectorModel):
42 |             module.gradient_checkpointing = value
43 | 
44 |     def forward(self, x):
45 |         if self.gradient_checkpointing and self.training:
46 |             layer_outputs = torch.utils.checkpoint.checkpoint(self.model, x)
47 |         else:
48 |             layer_outputs = self.model(x)
49 |         return layer_outputs
50 | 


--------------------------------------------------------------------------------
/xtuner/model/transformers_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .deepseek_v2 import DeepseekTokenizerFast, DeepseekV2Config, DeepseekV2ForCausalLM, DeepseekV2Model
2 | from .mixtral import MixtralConfig, MixtralForCausalLM, MixtralModel
3 | 
4 | __all__ = [
5 |     'DeepseekTokenizerFast', 'DeepseekV2Config', 'DeepseekV2ForCausalLM', 'DeepseekV2Model', 'MixtralConfig',
6 |     'MixtralForCausalLM', 'MixtralModel'
7 | ]
8 | 


--------------------------------------------------------------------------------
/xtuner/model/transformers_models/deepseek_v2/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_deepseek import DeepseekV2Config
2 | from .modeling_deepseek import DeepseekV2ForCausalLM, DeepseekV2Model
3 | from .tokenization_deepseek_fast import DeepseekTokenizerFast
4 | 
5 | __all__ = ['DeepseekV2ForCausalLM', 'DeepseekV2Model', 'DeepseekV2Config', 'DeepseekTokenizerFast']
6 | 


--------------------------------------------------------------------------------
/xtuner/model/transformers_models/deepseek_v2/tokenization_deepseek_fast.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Union
 2 | 
 3 | from transformers.models.llama import LlamaTokenizerFast
 4 | 
 5 | 
 6 | class DeepseekTokenizerFast(LlamaTokenizerFast):
 7 | 
 8 |     def convert_ids_to_tokens(self,
 9 |                               ids: Union[int, List[int]],
10 |                               skip_special_tokens: bool = False) -> Union[str, List[str]]:
11 |         """Converts a single index or a sequence of indices in a token or a
12 |         sequence of tokens, using the vocabulary and added tokens.
13 | 
14 |         Args:
15 |             ids (`int` or `List[int]`):
16 |                 The token id (or token ids) to convert to tokens.
17 |             skip_special_tokens (`bool`, *optional*, defaults to `False`):
18 |                 Whether or not to remove special tokens in the decoding.
19 | 
20 |         Returns:
21 |             `str` or `List[str]`: The decoded token(s).
22 |         """
23 |         if isinstance(ids, int):
24 |             return self._convert_id_to_token(ids)
25 |         tokens = []
26 |         for index in ids:
27 |             index = int(index)
28 |             if skip_special_tokens and index in self.all_special_ids:
29 |                 continue
30 |             token = self._tokenizer.id_to_token(index)
31 |             tokens.append(token if token is not None else '')
32 |         return tokens
33 | 
34 |     def _convert_id_to_token(self, index: int) -> Optional[str]:
35 |         token = self._tokenizer.id_to_token(int(index))
36 |         return token if token is not None else ''
37 | 


--------------------------------------------------------------------------------
/xtuner/model/transformers_models/mixtral/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_mixtral import MixtralConfig
2 | from .modeling_mixtral import MixtralForCausalLM, MixtralModel
3 | 
4 | __all__ = ['MixtralForCausalLM', 'MixtralModel', 'MixtralConfig']
5 | 


--------------------------------------------------------------------------------
/xtuner/parallel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sequence import *  # noqa: F401, F403
3 | 


--------------------------------------------------------------------------------
/xtuner/parallel/sequence/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmengine.dist import init_dist
 3 | 
 4 | from .attention import (
 5 |     post_process_for_sequence_parallel_attn,
 6 |     pre_process_for_sequence_parallel_attn,
 7 |     sequence_parallel_wrapper,
 8 | )
 9 | from .comm import (
10 |     all_to_all,
11 |     gather_for_sequence_parallel,
12 |     gather_forward_split_backward,
13 |     split_for_sequence_parallel,
14 |     split_forward_gather_backward,
15 | )
16 | from .data_collate import (
17 |     pad_cumulative_len_for_sequence_parallel,
18 |     pad_for_sequence_parallel,
19 | )
20 | from .reduce_loss import reduce_sequence_parallel_loss
21 | from .sampler import SequenceParallelSampler
22 | from .setup_distributed import (
23 |     get_data_parallel_group,
24 |     get_data_parallel_rank,
25 |     get_data_parallel_world_size,
26 |     get_inner_sequence_parallel_group,
27 |     get_inner_sequence_parallel_rank,
28 |     get_inner_sequence_parallel_world_size,
29 |     get_sequence_parallel_group,
30 |     get_sequence_parallel_rank,
31 |     get_sequence_parallel_world_size,
32 |     init_inner_sequence_parallel,
33 |     init_sequence_parallel,
34 |     is_inner_sequence_parallel_initialized,
35 | )
36 | 
37 | __all__ = [
38 |     "sequence_parallel_wrapper",
39 |     "pre_process_for_sequence_parallel_attn",
40 |     "post_process_for_sequence_parallel_attn",
41 |     "pad_for_sequence_parallel",
42 |     "split_for_sequence_parallel",
43 |     "SequenceParallelSampler",
44 |     "init_sequence_parallel",
45 |     "get_sequence_parallel_group",
46 |     "get_sequence_parallel_world_size",
47 |     "get_sequence_parallel_rank",
48 |     "get_data_parallel_group",
49 |     "get_data_parallel_world_size",
50 |     "get_data_parallel_rank",
51 |     "reduce_sequence_parallel_loss",
52 |     "init_dist",
53 |     "all_to_all",
54 |     "gather_for_sequence_parallel",
55 |     "split_forward_gather_backward",
56 |     "gather_forward_split_backward",
57 |     "get_inner_sequence_parallel_group",
58 |     "get_inner_sequence_parallel_rank",
59 |     "get_inner_sequence_parallel_world_size",
60 |     "init_inner_sequence_parallel",
61 |     "is_inner_sequence_parallel_initialized",
62 |     "pad_cumulative_len_for_sequence_parallel",
63 | ]
64 | 


--------------------------------------------------------------------------------
/xtuner/parallel/sequence/data_collate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | from .setup_distributed import get_sequence_parallel_world_size
 5 | 
 6 | 
 7 | def pad_for_sequence_parallel(tensor, padding_value, dim=-1):
 8 |     length = tensor.shape[dim]
 9 |     seq_parallel_world_size = get_sequence_parallel_world_size()
10 |     if length % seq_parallel_world_size == 0:
11 |         return tensor
12 | 
13 |     pad_num = seq_parallel_world_size - (length % seq_parallel_world_size)
14 |     pad_shape = (
15 |         (*tensor.shape[:dim], pad_num, *tensor.shape[dim + 1 :])
16 |         if dim != -1
17 |         else (*tensor.shape[:dim], pad_num)
18 |     )
19 |     pad = torch.full(pad_shape, padding_value, dtype=tensor.dtype, device=tensor.device)
20 |     tensor = torch.cat([tensor, pad], dim=dim)
21 |     return tensor
22 | 
23 | 
24 | # This function only meets the following two conditions:
25 | # 1. use_varlen_attn = True
26 | # 2. pack_to_max_length = True and the lengths of each sequence are different
27 | def pad_cumulative_len_for_sequence_parallel(cumulative_len):
28 |     assert len(cumulative_len) == 1
29 |     seqlen = cumulative_len[0][-1]
30 |     seq_parallel_world_size = get_sequence_parallel_world_size()
31 |     if seqlen % seq_parallel_world_size == 0:
32 |         return cumulative_len, None
33 | 
34 |     bs = len(cumulative_len)
35 |     pad_len = seq_parallel_world_size - (seqlen % seq_parallel_world_size)
36 |     seqlen_new = seqlen + pad_len
37 |     attention_mask = torch.zeros(
38 |         bs, seqlen_new, dtype=torch.bool, device=cumulative_len[0].device
39 |     )
40 |     attention_mask[:, :seqlen] = True
41 | 
42 |     for i, cu_len in enumerate(cumulative_len):
43 |         pad = torch.tensor([seqlen_new], device=cu_len.device, dtype=cu_len.dtype)
44 |         cumulative_len[i] = torch.cat([cu_len, pad], dim=0)
45 | 
46 |     return cumulative_len, attention_mask
47 | 


--------------------------------------------------------------------------------
/xtuner/parallel/sequence/reduce_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.distributed as dist
 4 | 
 5 | from .setup_distributed import get_sequence_parallel_group
 6 | 
 7 | 
 8 | class _ReduceLoss(torch.autograd.Function):
 9 |     @staticmethod
10 |     def forward(ctx, mean_loss, loss_scale, process_group):
11 |         ctx.mode = process_group
12 |         if loss_scale == 0:
13 |             # convert nan to 0 just for logging
14 |             mean_loss = torch.nan_to_num(mean_loss)
15 |         loss_sum = mean_loss * loss_scale
16 |         dist.all_reduce(loss_sum, group=process_group)
17 |         dist.all_reduce(loss_scale, group=process_group)
18 |         loss = loss_sum / loss_scale
19 |         return loss
20 | 
21 |     @staticmethod
22 |     def backward(ctx, grad_output):
23 |         return grad_output, None, None
24 | 
25 | 
26 | def reduce_sequence_parallel_loss(
27 |     mean_loss, loss_scale, sp_group: dist.ProcessGroup = None
28 | ):
29 |     if dist.get_world_size(sp_group) == 1:
30 |         return mean_loss
31 |     if sp_group is None:
32 |         # avoid bc breaking
33 |         sp_group = get_sequence_parallel_group()
34 |     return _ReduceLoss.apply(mean_loss, loss_scale, sp_group)
35 | 


--------------------------------------------------------------------------------
/xtuner/parallel/sequence/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import math
 3 | from typing import Optional, Sized
 4 | 
 5 | from mmengine.dataset import DefaultSampler
 6 | from mmengine.dist import sync_random_seed
 7 | 
 8 | from .setup_distributed import get_data_parallel_rank, get_data_parallel_world_size
 9 | 
10 | 
11 | class SequenceParallelSampler(DefaultSampler):
12 |     def __init__(
13 |         self,
14 |         dataset: Sized,
15 |         shuffle: bool = True,
16 |         seed: Optional[int] = None,
17 |         round_up: bool = True,
18 |     ) -> None:
19 |         rank = get_data_parallel_rank()
20 |         world_size = get_data_parallel_world_size()
21 |         self.rank = rank
22 |         self.world_size = world_size
23 | 
24 |         self.dataset = dataset
25 |         self.shuffle = shuffle
26 |         if seed is None:
27 |             seed = sync_random_seed()
28 |         self.seed = seed
29 |         self.epoch = 0
30 |         self.round_up = round_up
31 | 
32 |         if self.round_up:
33 |             self.num_samples = math.ceil(len(self.dataset) / world_size)
34 |             self.total_size = self.num_samples * self.world_size
35 |         else:
36 |             self.num_samples = math.ceil((len(self.dataset) - rank) / world_size)
37 |             self.total_size = len(self.dataset)
38 | 


--------------------------------------------------------------------------------
/xtuner/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.registry import Registry
3 | 
4 | __all__ = ["BUILDER", "MAP_FUNC"]
5 | 
6 | BUILDER = Registry("builder")
7 | MAP_FUNC = Registry("map_fn")
8 | 


--------------------------------------------------------------------------------
/xtuner/tools/copy_cfg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | import shutil
 5 | 
 6 | from mmengine.utils import mkdir_or_exist
 7 | 
 8 | from xtuner.configs import cfgs_name_path
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument("config_name", help="config name")
14 |     parser.add_argument("save_dir", help="save directory for copied config")
15 |     args = parser.parse_args()
16 |     return args
17 | 
18 | 
19 | def add_copy_suffix(string):
20 |     file_name, ext = osp.splitext(string)
21 |     return f"{file_name}_copy{ext}"
22 | 
23 | 
24 | def main():
25 |     args = parse_args()
26 |     mkdir_or_exist(args.save_dir)
27 |     config_path = cfgs_name_path[args.config_name]
28 |     save_path = osp.join(args.save_dir, add_copy_suffix(osp.basename(config_path)))
29 |     shutil.copyfile(config_path, save_path)
30 |     print(f"Copy to {save_path}")
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/xtuner/tools/data_preprocess/arxiv.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import json
 4 | from datetime import datetime
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument("src_file", help="source file path")
10 |     parser.add_argument("dst_file", help="destination file path")
11 |     parser.add_argument(
12 |         "--categories",
13 |         nargs="+",
14 |         default=["cs.AI", "cs.CL", "cs.CV"],
15 |         help="target categories",
16 |     )
17 |     parser.add_argument(
18 |         "--start-date", default="2020-01-01", help="start date (format: YYYY-MM-DD)"
19 |     )
20 | 
21 |     args = parser.parse_args()
22 |     return args
23 | 
24 | 
25 | def has_intersection(list1, list2):
26 |     set1 = set(list1)
27 |     set2 = set(list2)
28 |     return len(set1.intersection(set2)) > 0
29 | 
30 | 
31 | def read_json_file(file_path):
32 |     data = []
33 |     with open(file_path) as file:
34 |         for line in file:
35 |             try:
36 |                 json_data = json.loads(line)
37 |                 data.append(json_data)
38 |             except json.JSONDecodeError:
39 |                 print(f"Failed to parse line: {line}")
40 |     return data
41 | 
42 | 
43 | def main():
44 |     args = parse_args()
45 |     json_data = read_json_file(args.src_file)
46 |     from_time = datetime.strptime(args.start_date, "%Y-%m-%d")
47 |     filtered_data = [
48 |         item
49 |         for item in json_data
50 |         if has_intersection(args.categories, item["categories"].split())
51 |         and datetime.strptime(item["update_date"], "%Y-%m-%d") >= from_time
52 |     ]
53 | 
54 |     with open(args.dst_file, "w") as file:
55 |         json.dump(filtered_data, file)
56 | 
57 |     print(f"Save to {args.dst_file}\n{len(filtered_data)} items")
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     main()
62 | 


--------------------------------------------------------------------------------
/xtuner/tools/data_preprocess/convert_refcoco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import json
 4 | 
 5 | from xtuner.dataset.refcoco_json import RefCOCOJsonDataset
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument(
11 |         "--ann-path",
12 |         default="data/refcoco_annotations",
13 |         help="Refcoco annotation path",
14 |     )
15 |     parser.add_argument(
16 |         "--image-path",
17 |         default="data/llava_data/llava_images/coco/train2017",
18 |         help="COCO image path",
19 |     )
20 |     parser.add_argument(
21 |         "--save-path", default="./", help="The folder to save converted data"
22 |     )
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     args = parse_args()
29 | 
30 |     data_info = [
31 |         ("refcoco", "unc"),
32 |         ("refcoco+", "unc"),
33 |         ("refcocog", "umd"),
34 |     ]
35 |     all_data = []
36 |     for dataset, split in data_info:
37 |         data = RefCOCOJsonDataset.get_data_json(
38 |             ann_path=args.ann_path,
39 |             image_path=args.image_path,
40 |             dataset=dataset,
41 |             splitBy=split,
42 |         )[0]
43 |         all_data.extend(data)
44 |     save_path = args.save_path + "/train.json"
45 |     with open(save_path, "w") as f:
46 |         print(f"save to {save_path} with {len(all_data)} items.")
47 |         print(all_data[0])
48 |         json.dump(all_data, f, indent=4)
49 | 


--------------------------------------------------------------------------------
/xtuner/tools/get_data_order.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("--data-folder", help="Data folder")
 9 |     parser.add_argument("--save-folder", help="The folder to save data order.")
10 |     parser.add_argument(
11 |         "--file-type",
12 |         default=".bin",
13 |         help="We want to get the order of the file in this type.",
14 |     )
15 |     args = parser.parse_args()
16 |     return args
17 | 
18 | 
19 | def save_data_order(data_folder, save_folder, file_type=".bin"):
20 |     assert os.path.exists(data_folder), f"{data_folder} does not exist."
21 |     triples = list(os.walk(data_folder, followlinks=True))
22 |     data_order = []
23 |     for root, dirs, files in triples:
24 |         dirs.sort()
25 |         print(f"Reading {root}...")
26 |         for fn in sorted(files):
27 |             if fn.endswith(file_type):
28 |                 fp = os.path.join(root, fn)
29 |                 # Using relative paths so that you can get the same result
30 |                 # on different clusters
31 |                 fp = fp.replace(data_folder, "")[1:]
32 |                 data_order.append(fp)
33 | 
34 |     save_path = os.path.join(save_folder, "data_order.txt")
35 |     with open(save_path, "w") as f:
36 |         for fp in data_order:
37 |             f.write(fp + "\n")
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     args = parse_args()
42 |     save_data_order(args.data_folder, args.save_folder, args.file_type)
43 | 


--------------------------------------------------------------------------------
/xtuner/tools/list_cfg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from xtuner.configs import cfgs_name_path
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument(
10 |         "-p", "--pattern", default=None, help="Pattern for fuzzy matching"
11 |     )
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def main(pattern=None):
17 |     args = parse_args()
18 |     configs_names = sorted(list(cfgs_name_path.keys()))
19 |     print("==========================CONFIGS===========================")
20 |     if args.pattern is not None:
21 |         print(f"PATTERN: {args.pattern}")
22 |         print("-------------------------------")
23 |     for name in configs_names:
24 |         if args.pattern is None or args.pattern.lower() in name.lower():
25 |             print(name)
26 |     print("=============================================================")
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     main()
31 | 


--------------------------------------------------------------------------------
/xtuner/tools/list_dataset_format.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from xtuner.dataset.map_fns import DATASET_FORMAT_MAPPING
 3 | 
 4 | 
 5 | def main():
 6 |     dataset_format = DATASET_FORMAT_MAPPING.keys()
 7 |     print("======================DATASET_FORMAT======================")
 8 |     for format in dataset_format:
 9 |         print(format)
10 |     print("==========================================================")
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     main()
15 | 


--------------------------------------------------------------------------------
/xtuner/tools/log_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmengine.config import Config
 5 | 
 6 | from xtuner.registry import BUILDER
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description="Log processed dataset.")
11 |     parser.add_argument("config", help="config file name or path.")
12 |     # chose which kind of dataset style to show
13 |     parser.add_argument(
14 |         "--show",
15 |         default="text",
16 |         choices=["text", "masked_text", "input_ids", "labels", "all"],
17 |         help="which kind of dataset style to show",
18 |     )
19 |     args = parser.parse_args()
20 |     return args
21 | 
22 | 
23 | def main():
24 |     args = parse_args()
25 | 
26 |     cfg = Config.fromfile(args.config)
27 | 
28 |     tokenizer = BUILDER.build(cfg.tokenizer)
29 |     if cfg.get("framework", "mmengine").lower() == "huggingface":
30 |         train_dataset = BUILDER.build(cfg.train_dataset)
31 |     else:
32 |         train_dataset = BUILDER.build(cfg.train_dataloader.dataset)
33 | 
34 |     if args.show == "text" or args.show == "all":
35 |         print("#" * 20 + "   text   " + "#" * 20)
36 |         print(tokenizer.decode(train_dataset[0]["input_ids"]))
37 |     if args.show == "masked_text" or args.show == "all":
38 |         print("#" * 20 + "   text(masked)   " + "#" * 20)
39 |         masked_text = " ".join(
40 |             ["[-100]" for i in train_dataset[0]["labels"] if i == -100]
41 |         )
42 |         unmasked_text = tokenizer.decode(
43 |             [i for i in train_dataset[0]["labels"] if i != -100]
44 |         )
45 |         print(masked_text + " " + unmasked_text)
46 |     if args.show == "input_ids" or args.show == "all":
47 |         print("#" * 20 + "   input_ids   " + "#" * 20)
48 |         print(train_dataset[0]["input_ids"])
49 |     if args.show == "labels" or args.show == "all":
50 |         print("#" * 20 + "   labels   " + "#" * 20)
51 |         print(train_dataset[0]["labels"])
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/xtuner/tools/model_converters/modeling_internlm2_reward/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/xtuner/tools/model_converters/split.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import copy
 4 | import json
 5 | import os
 6 | import os.path as osp
 7 | import shutil
 8 | 
 9 | import torch
10 | from mmengine.utils import mkdir_or_exist
11 | 
12 | from xtuner.utils.device import get_device_name, get_torch_device
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(
17 |         description="Split a HuggingFace model to the smallest sharded one"
18 |     )
19 |     parser.add_argument("src_dir", help="the directory of the model")
20 |     parser.add_argument("dst_dir", help="the directory to save the new model")
21 |     args = parser.parse_args()
22 |     return args
23 | 
24 | 
25 | def main():
26 |     args = parse_args()
27 |     mkdir_or_exist(args.dst_dir)
28 | 
29 |     all_files = os.listdir(args.src_dir)
30 |     for name in all_files:
31 |         if not name.startswith(("pytorch_model", ".")):
32 |             src_path = osp.join(args.src_dir, name)
33 |             dst_path = osp.join(args.dst_dir, name)
34 |             shutil.copy(src_path, dst_path)
35 | 
36 |     with open(osp.join(args.src_dir, "pytorch_model.bin.index.json")) as f:
37 |         index = json.load(f)
38 | 
39 |     n_shard = len(index["weight_map"])
40 |     new_index = copy.deepcopy(index)
41 |     new_index["weight_map"] = {}
42 |     cnt = 1
43 | 
44 |     checkpoints = set(index["weight_map"].values())
45 |     for ckpt in checkpoints:
46 |         state_dict = torch.load(
47 |             osp.join(args.src_dir, ckpt), map_location=get_device_name()
48 |         )
49 |         keys = sorted(list(state_dict.keys()))
50 |         for k in keys:
51 |             new_state_dict_name = f"pytorch_model-{cnt:05d}-of-{n_shard:05d}.bin"
52 |             new_index["weight_map"][k] = new_state_dict_name
53 |             new_state_dict = {k: state_dict[k]}
54 |             torch.save(new_state_dict, osp.join(args.dst_dir, new_state_dict_name))
55 |             cnt += 1
56 |         del state_dict
57 |         get_torch_device().empty_cache()
58 |     with open(osp.join(args.dst_dir, "pytorch_model.bin.index.json"), "w") as f:
59 |         json.dump(new_index, f)
60 |     assert (
61 |         new_index["weight_map"].keys() == index["weight_map"].keys()
62 |     ), "Mismatch on `weight_map`!"
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/xtuner/tools/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .api import plugins_api
3 | 
4 | __all__ = ["plugins_api"]
5 | 


--------------------------------------------------------------------------------
/xtuner/tools/plugins/api.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import re
 3 | 
 4 | 
 5 | def plugins_api(input_str, calculate_open=True, solve_open=True, search_open=True):
 6 |     pattern = r'(Solve|solve|Solver|solver|Calculate|calculate|Calculator|calculator|Search)\("([^"]*)"\)'  # noqa: E501
 7 | 
 8 |     matches = re.findall(pattern, input_str)
 9 | 
10 |     converted_str = "<|Results|>:\n"
11 | 
12 |     for i in range(len(matches)):
13 |         if matches[i][0] in ["Calculate", "calculate" "Calculator", "calculator"]:
14 |             if calculate_open:
15 |                 from .calculate import Calculate
16 | 
17 |                 result = Calculate(matches[i][1])
18 |             else:
19 |                 result = None
20 |             converted_str += f'Calculate("{matches[i][1]}") => {result}\n'
21 |         elif matches[i][0] in ["Solve", "solve", "Solver", "solver"]:
22 |             if solve_open:
23 |                 from .solve import Solve
24 | 
25 |                 result = Solve(matches[i][1])
26 |             else:
27 |                 result = None
28 |             converted_str += f'Solve("{matches[i][1]}") =>\n{result}\n'
29 |         elif matches[i][0] == "Search":
30 |             if search_open:
31 |                 from .search import Search
32 | 
33 |                 result = Search(matches[i][1])
34 |             else:
35 |                 result = None
36 |             converted_str += f'Search("{matches[i][1]}") =>\n{result}'
37 | 
38 |     converted_str += "<eor>\n"
39 |     return converted_str
40 | 


--------------------------------------------------------------------------------
/xtuner/tools/plugins/calculate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from math import *  # noqa: F401, F403
 3 | 
 4 | 
 5 | def Calculate(expression):
 6 |     res = ""
 7 |     for exp in expression.split(";"):
 8 |         try:
 9 |             res += "{:.2f};".format(eval(exp.replace("^", "**")))
10 |         except Exception:
11 |             res += "No result."
12 |     if res[-1] == ";":
13 |         res = res[:-1]
14 |     return res
15 | 


--------------------------------------------------------------------------------
/xtuner/tools/plugins/search.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | import requests
 6 | 
 7 | try:
 8 |     SERPER_API_KEY = os.environ["SERPER_API_KEY"]
 9 | except Exception:
10 |     print(
11 |         "Please obtain the `SERPER_API_KEY` from https://serper.dev and "
12 |         "set it using `export SERPER_API_KEY=xxx`."
13 |     )
14 |     sys.exit(1)
15 | 
16 | 
17 | def parse_results(results, k=10):
18 |     snippets = []
19 | 
20 |     for result in results["organic"][:k]:
21 |         if "snippet" in result:
22 |             snippets.append(result["snippet"])
23 |         for attribute, value in result.get("attributes", {}).items():
24 |             snippets.append(f"{attribute}: {value}.")
25 |     return snippets
26 | 
27 | 
28 | def search(api_key, search_term, **kwargs):
29 |     headers = {
30 |         "X-API-KEY": api_key,
31 |         "Content-Type": "application/json",
32 |     }
33 |     params = {
34 |         "q": search_term,
35 |         **{key: value for key, value in kwargs.items() if value is not None},
36 |     }
37 |     try:
38 |         response = requests.post(
39 |             "https://google.serper.dev/search",
40 |             headers=headers,
41 |             params=params,
42 |             timeout=5,
43 |         )
44 |     except Exception as e:
45 |         return -1, str(e)
46 |     return response.status_code, response.json()
47 | 
48 | 
49 | def Search(q, k=10):
50 |     status_code, response = search(SERPER_API_KEY, q)
51 |     if status_code != 200:
52 |         ret = "None\n"
53 |     else:
54 |         text = parse_results(response, k=k)
55 |         ret = ""
56 |         for idx, res in enumerate(text):
57 |             ret += f"<|{idx+1}|>: '{res}'\n"
58 |     return ret
59 | 


--------------------------------------------------------------------------------
/xtuner/tools/plugins/solve.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import math
 3 | import re
 4 | from math import *  # noqa: F401, F403
 5 | 
 6 | from sympy import Eq, solve, symbols
 7 | 
 8 | from .calculate import Calculate
 9 | 
10 | 
11 | def Solve(equations_str):
12 |     try:
13 |         equations_str = equations_str.replace(" ", "")
14 |         equations_ori = re.split(r"[,;]+", equations_str)
15 |         equations_str = equations_str.replace("^", "**")
16 |         equations_str = re.sub(r"(\(.*\))([a-zA-Z])", r"\1 * \2", equations_str)
17 |         equations_str = re.sub(r"(\d+)([a-zA-Z])", r"\1 * \2", equations_str)
18 |         equations_str = equations_str.replace("pi", str(math.pi))
19 |         equations = re.split(r"[,;]+", equations_str)
20 |         vars_list = list(set(re.findall(r"[a-zA-Z]+", equations_str)))
21 |         vars = {var: symbols(var) for var in vars_list}
22 | 
23 |         output = ""
24 |         eqs = []
25 |         for eq in equations:
26 |             if "=" in eq:
27 |                 left, right = eq.split("=")
28 |                 eqs.append(
29 |                     Eq(eval(left.strip(), {}, vars), eval(right.strip(), {}, vars))
30 |                 )
31 |         solutions = solve(eqs, vars, dict=True)
32 | 
33 |         vars_values = {var: [] for var in vars_list}
34 |         if isinstance(solutions, list):
35 |             for idx, solution in enumerate(solutions):
36 |                 for var, sol in solution.items():
37 |                     output += f"{var}_{idx} = {sol}\n"
38 |                     vars_values[str(var)].append(sol)
39 |         else:
40 |             for var, sol in solutions.items():
41 |                 output += f"{var} = {sol}\n"
42 |                 vars_values[str(var)].append(sol)
43 |         for eq, eq_o in zip(equations, equations_ori):
44 |             if "=" not in eq:
45 |                 for var in vars_list:
46 |                     need_note = True if len(vars_values[var]) > 1 else False
47 |                     for idx, value in enumerate(vars_values[var]):
48 |                         eq_to_calc = eq.replace(var, str(value))
49 |                         calc_result = Calculate(eq_to_calc)
50 |                         if need_note:
51 |                             eq_name = eq_o.replace(var, f"{var}_{idx}")
52 |                         else:
53 |                             eq_name = eq_o
54 |                         if calc_result != "No results.":
55 |                             output += f"{eq_name} = {calc_result}\n"
56 | 
57 |         return output.strip()
58 |     except Exception:
59 |         return "No result."
60 | 


--------------------------------------------------------------------------------
/xtuner/tools/process_untokenized_llava_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import warnings
 4 | 
 5 | from mmengine import Config
 6 | 
 7 | from xtuner.registry import BUILDER
 8 | 
 9 | # ignore FutureWarning in hf datasets
10 | warnings.simplefilter(action="ignore", category=FutureWarning)
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("config", help="config file name or path.")
16 |     parser.add_argument("--save-folder", help="The folder to save data order.")
17 |     args = parser.parse_args()
18 |     return args
19 | 
20 | 
21 | def build_llava_dataset(config):
22 |     dataset = BUILDER.build(config.train_dataloader.dataset)
23 |     return dataset
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     args = parse_args()
28 |     cfg = Config.fromfile(args.config)
29 | 
30 |     llava_dataset = build_llava_dataset(cfg)
31 |     text_data = llava_dataset.text_data
32 | 
33 |     text_data.save_to_disk(args.save_folder)
34 | 


--------------------------------------------------------------------------------
/xtuner/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .constants import (
 3 |     DEFAULT_IMAGE_TOKEN,
 4 |     DEFAULT_PAD_TOKEN_INDEX,
 5 |     IGNORE_INDEX,
 6 |     IMAGE_TOKEN_INDEX,
 7 | )
 8 | from .handle_moe_load_and_save import (
 9 |     SUPPORT_MODELS,
10 |     get_origin_state_dict,
11 |     load_state_dict_into_model,
12 | )
13 | from .stop_criteria import StopWordStoppingCriteria
14 | from .templates import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
15 | 
16 | __all__ = [
17 |     "IGNORE_INDEX",
18 |     "DEFAULT_PAD_TOKEN_INDEX",
19 |     "PROMPT_TEMPLATE",
20 |     "DEFAULT_IMAGE_TOKEN",
21 |     "SYSTEM_TEMPLATE",
22 |     "StopWordStoppingCriteria",
23 |     "IMAGE_TOKEN_INDEX",
24 |     "load_state_dict_into_model",
25 |     "get_origin_state_dict",
26 |     "SUPPORT_MODELS",
27 | ]
28 | 


--------------------------------------------------------------------------------
/xtuner/utils/constants.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | IGNORE_INDEX = -100
3 | DEFAULT_PAD_TOKEN_INDEX = 0
4 | IMAGE_TOKEN_INDEX = -200
5 | DEFAULT_IMAGE_TOKEN = "<image>"
6 | 


--------------------------------------------------------------------------------
/xtuner/utils/device.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # This code is inspired by the torchtune.
 3 | # https://github.com/pytorch/torchtune/blob/main/torchtune/utils/_device.py
 4 | 
 5 | import logging
 6 | from typing import Optional
 7 | 
 8 | import torch
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def is_torch_npu_available() -> bool:
14 |     """Check the availability of NPU."""
15 |     try:
16 |         import torch_npu  # noqa: F401
17 | 
18 |         return torch.npu.is_available()
19 |     except ImportError:
20 |         return False
21 | 
22 | 
23 | is_cuda_available = torch.cuda.is_available()
24 | is_npu_available = is_torch_npu_available()
25 | 
26 | 
27 | def get_device_name() -> str:
28 |     """Function that gets the torch.device based on the current machine.
29 | 
30 |     This currently only supports CPU, CUDA, NPU.
31 | 
32 |     Returns:
33 |         device
34 |     """
35 |     if is_cuda_available:
36 |         device = "cuda"
37 |     elif is_npu_available:
38 |         device = "npu"
39 |     else:
40 |         device = "cpu"
41 |     return device
42 | 
43 | 
44 | def get_device(device_name: Optional[str] = None) -> torch.device:
45 |     """Function that takes an optional device string, verifies it's correct and
46 |     available given the machine and distributed settings, and returns a
47 |     :func:`~torch.device`. If device string is not provided, this function will
48 |     infer the device based on the environment.
49 | 
50 |     If CUDA-like is available and being used, this function also sets the CUDA-like device.
51 | 
52 |     Args:
53 |         device (Optional[str]): The name of the device to use, e.g. "cuda" or "cpu" or "npu".
54 | 
55 |     Example:
56 |         >>> device = get_device("cuda")
57 |         >>> device
58 |         device(type='cuda', index=0)
59 | 
60 |     Returns:
61 |         torch.device: Device
62 |     """
63 |     if device_name is None:
64 |         device_name = get_device_name()
65 |     device = torch.device(device_name)
66 |     return device
67 | 
68 | 
69 | def get_torch_device() -> any:
70 |     """Return the corresponding torch attribute based on the device type
71 |     string.
72 | 
73 |     Returns:
74 |         module: The corresponding torch device namespace, or torch.cuda if not found.
75 |     """
76 |     device_name = get_device_name()
77 |     try:
78 |         return getattr(torch, device_name)
79 |     except AttributeError:
80 |         logger.warning(
81 |             f"Device namespace '{device_name}' not found in torch, try to load torch.cuda."
82 |         )
83 |         return torch.cuda
84 | 


--------------------------------------------------------------------------------
/xtuner/utils/stop_criteria.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from transformers import StoppingCriteria
 3 | 
 4 | 
 5 | class StopWordStoppingCriteria(StoppingCriteria):
 6 |     """StopWord stopping criteria."""
 7 | 
 8 |     def __init__(self, tokenizer, stop_word):
 9 |         self.tokenizer = tokenizer
10 |         self.stop_word = stop_word
11 |         self.length = len(self.stop_word)
12 | 
13 |     def __call__(self, input_ids, *args, **kwargs) -> bool:
14 |         cur_text = self.tokenizer.decode(input_ids[0])
15 |         cur_text = cur_text.replace("\r", "").replace("\n", "")
16 |         return cur_text[-self.length :] == self.stop_word
17 | 


--------------------------------------------------------------------------------
/xtuner/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | __version__ = "0.2.0rc0"
 3 | short_version = __version__
 4 | 
 5 | 
 6 | def parse_version_info(version_str):
 7 |     """Parse a version string into a tuple.
 8 | 
 9 |     Args:
10 |         version_str (str): The version string.
11 |     Returns:
12 |         tuple[int or str]: The version info, e.g., "1.3.0" is parsed into
13 |         (1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1').
14 |     """
15 |     version_info = []
16 |     for x in version_str.split("."):
17 |         if x.isdigit():
18 |             version_info.append(int(x))
19 |         elif x.find("rc") != -1:
20 |             patch_version = x.split("rc")
21 |             version_info.append(int(patch_version[0]))
22 |             version_info.append(f"rc{patch_version[1]}")
23 |     return tuple(version_info)
24 | 
25 | 
26 | version_info = parse_version_info(__version__)
27 | 


--------------------------------------------------------------------------------