├── .github
├── CONTRIBUTING.md
└── workflows
│ ├── deploy.yml
│ └── lint.yml
├── .gitignore
├── .owners.yml
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_zh-CN.md
├── docs
├── en
│ ├── .readthedocs.yaml
│ ├── Makefile
│ ├── _static
│ │ ├── css
│ │ │ └── readthedocs.css
│ │ └── image
│ │ │ └── logo.png
│ ├── acceleration
│ │ ├── benchmark.rst
│ │ ├── deepspeed.rst
│ │ ├── flash_attn.rst
│ │ ├── hyper_parameters.rst
│ │ ├── length_grouped_sampler.rst
│ │ ├── pack_to_max_length.rst
│ │ ├── train_extreme_long_sequence.rst
│ │ ├── train_large_scale_dataset.rst
│ │ └── varlen_flash_attn.rst
│ ├── chat
│ │ ├── agent.md
│ │ ├── llm.md
│ │ ├── lmdeploy.md
│ │ └── vlm.md
│ ├── conf.py
│ ├── dpo
│ │ ├── modify_settings.md
│ │ ├── overview.md
│ │ └── quick_start.md
│ ├── evaluation
│ │ ├── hook.md
│ │ ├── mmbench.md
│ │ ├── mmlu.md
│ │ └── opencompass.md
│ ├── get_started
│ │ ├── installation.md
│ │ ├── overview.md
│ │ └── quickstart.md
│ ├── index.rst
│ ├── internevo_migration
│ │ ├── ftdp_dataset
│ │ │ ├── Case1.rst
│ │ │ ├── Case2.rst
│ │ │ ├── Case3.rst
│ │ │ ├── Case4.rst
│ │ │ └── ftdp.rst
│ │ └── internevo_migration.rst
│ ├── make.bat
│ ├── models
│ │ └── supported.md
│ ├── notes
│ │ └── changelog.md
│ ├── preparation
│ │ ├── pretrained_model.rst
│ │ └── prompt_template.rst
│ ├── reward_model
│ │ ├── modify_settings.md
│ │ ├── overview.md
│ │ ├── preference_data.md
│ │ └── quick_start.md
│ ├── switch_language.md
│ ├── training
│ │ ├── custom_agent_dataset.rst
│ │ ├── custom_pretrain_dataset.rst
│ │ ├── custom_sft_dataset.rst
│ │ ├── modify_settings.rst
│ │ ├── multi_modal_dataset.rst
│ │ ├── open_source_dataset.rst
│ │ └── visualization.rst
│ └── user_guides
│ │ ├── chat.md
│ │ ├── dataset_format.md
│ │ ├── dataset_prepare.md
│ │ ├── finetune.md
│ │ ├── incremental_pretraining.md
│ │ ├── intern_repo_dataset.md
│ │ ├── multi_turn_conversation.md
│ │ ├── prompt_template.md
│ │ └── single_turn_conversation.md
└── zh_cn
│ ├── .readthedocs.yaml
│ ├── Makefile
│ ├── _static
│ └── image
│ │ └── logo.png
│ ├── acceleration
│ ├── benchmark.rst
│ ├── deepspeed.rst
│ ├── flash_attn.rst
│ ├── hyper_parameters.rst
│ ├── length_grouped_sampler.rst
│ ├── pack_to_max_length.rst
│ ├── train_extreme_long_sequence.rst
│ ├── train_large_scale_dataset.rst
│ └── varlen_flash_attn.rst
│ ├── chat
│ ├── agent.md
│ ├── llm.md
│ ├── lmdeploy.md
│ └── vlm.md
│ ├── conf.py
│ ├── dpo
│ ├── modify_settings.md
│ ├── overview.md
│ └── quick_start.md
│ ├── evaluation
│ ├── hook.md
│ ├── mmbench.md
│ ├── mmlu.md
│ └── opencompass.md
│ ├── get_started
│ ├── installation.rst
│ └── quickstart.rst
│ ├── index.rst
│ ├── internevo_migration
│ ├── differences.rst
│ └── ftdp_dataset
│ │ ├── processed_and_internlm2.rst
│ │ ├── processed_and_others.rst
│ │ ├── processed_normal_chat.rst
│ │ └── tokenized_and_internlm2.rst
│ ├── make.bat
│ ├── models
│ └── supported.md
│ ├── notes
│ └── changelog.md
│ ├── preparation
│ ├── pretrained_model.rst
│ └── prompt_template.rst
│ ├── reward_model
│ ├── images
│ │ ├── preference_data.png
│ │ ├── sequence_parallel.png
│ │ └── var_len_atten.png
│ ├── modify_settings.md
│ ├── overview.md
│ ├── preference_data.md
│ └── quick_start.md
│ ├── switch_language.md
│ ├── training
│ ├── custom_pretrain_dataset.rst
│ ├── custom_sft_dataset.rst
│ ├── modify_settings.rst
│ ├── multi_modal_dataset.rst
│ ├── open_source_dataset.rst
│ └── visualization.rst
│ └── user_guides
│ ├── ceph.md
│ ├── chat.md
│ ├── config.md
│ ├── custom_dataset
│ ├── Offline.md
│ └── Online.md
│ ├── dataset_format.md
│ ├── dataset_prepare.md
│ ├── finetune.md
│ ├── ftdp_dataset
│ ├── Case1.md
│ ├── Case2.md
│ ├── Case3.md
│ ├── Case4.md
│ └── README.md
│ ├── incremental_pretraining.md
│ ├── intern_repo_dataset.md
│ ├── llava_offline.md
│ ├── multi_turn_conversation.md
│ ├── prompt_template.md
│ ├── sequence_parallel.md
│ ├── single_turn_conversation.md
│ └── varlen_attention.md
├── examples
├── demo_data
│ ├── multi_turn_1
│ │ ├── README.md
│ │ ├── config.py
│ │ ├── data.json
│ │ └── map_fn.py
│ ├── multi_turn_2
│ │ ├── README.md
│ │ ├── config.py
│ │ ├── data.json
│ │ └── map_fn.py
│ ├── pretrain
│ │ ├── README.md
│ │ ├── config.py
│ │ ├── data.json
│ │ └── map_fn.py
│ └── single_turn
│ │ ├── README.md
│ │ ├── config.py
│ │ ├── data.json
│ │ └── map_fn.py
└── huggingface_trainer
│ ├── README.md
│ ├── train_hf.py
│ ├── train_lora_hf.py
│ └── train_qlora_hf.py
├── requirements.txt
├── requirements
├── deepspeed.txt
├── docs.txt
├── lmdeploy.txt
├── modelscope.txt
└── runtime.txt
├── setup.cfg
├── setup.py
└── xtuner
├── __init__.py
├── _lite
├── __init__.py
├── accelerate
│ ├── __init__.py
│ ├── lora.py
│ ├── ops
│ │ ├── __init__.py
│ │ └── moe_permute.py
│ ├── packed.py
│ └── utils.py
├── algorithms
│ ├── __init__.py
│ ├── ppo
│ │ ├── __init__.py
│ │ ├── dataset.py
│ │ ├── loss.py
│ │ └── model.py
│ └── sft
│ │ ├── __init__.py
│ │ └── dataset.py
├── chat
│ ├── __init__.py
│ ├── backends
│ │ └── __init__.py
│ ├── messages
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── chat.py
│ └── templates
│ │ ├── __init__.py
│ │ ├── chat.py
│ │ └── hybrid.py
├── datasets
│ ├── __init__.py
│ ├── json.py
│ ├── jsonl.py
│ ├── pack.py
│ ├── streaming.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── convert.py
│ │ ├── load.py
│ │ └── utils.py
├── device.py
├── modelings
│ ├── __init__.py
│ ├── internlm2
│ │ ├── __init__.py
│ │ ├── configuration_internlm2.py
│ │ └── modeling_internlm2.py
│ ├── internlm3
│ │ ├── __init__.py
│ │ ├── configuration_internlm3.py
│ │ ├── modeling_internlm3.py
│ │ └── tokenization_internlm3.py
│ ├── internvl2
│ │ ├── __init__.py
│ │ ├── configuration_intern_vit.py
│ │ └── modeling_intern_vit.py
│ └── llava
│ │ ├── __init__.py
│ │ ├── configuration_internlm2.py
│ │ ├── configuration_llava.py
│ │ ├── modeling_internlm2.py
│ │ ├── modeling_llava.py
│ │ └── processing_llava.py
├── parallel
│ ├── __init__.py
│ ├── comm.py
│ ├── sampler.py
│ ├── sequence
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ └── ops.py
│ └── setup.py
└── patches
│ ├── __init__.py
│ ├── auto.py
│ ├── base.py
│ ├── internlm3.py
│ ├── llama.py
│ ├── mixins
│ ├── __init__.py
│ └── generate.py
│ ├── qwen2.py
│ └── utils.py
├── apis
├── __init__.py
├── datasets
│ ├── __init__.py
│ ├── alpaca.py
│ ├── arxiv.py
│ ├── code_alpaca.py
│ ├── colorist.py
│ ├── lawyer.py
│ ├── medical.py
│ ├── moss_003_sft.py
│ ├── oasst1.py
│ ├── open_orca.py
│ ├── sql.py
│ ├── tiny_codes.py
│ └── wizardlm.py
├── model.py
└── training_args.py
├── configs
├── __init__.py
├── baichuan
│ ├── baichuan2_13b_base
│ │ ├── baichuan2_13b_base_qlora_alpaca_e3.py
│ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan2_13b_base_qlora_alpaca_zh_e3.py
│ │ ├── baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan2_13b_base_qlora_code_alpaca_e3.py
│ │ ├── baichuan2_13b_base_qlora_colorist_e5.py
│ │ ├── baichuan2_13b_base_qlora_lawyer_e3.py
│ │ ├── baichuan2_13b_base_qlora_oasst1_512_e3.py
│ │ ├── baichuan2_13b_base_qlora_oasst1_e3.py
│ │ ├── baichuan2_13b_base_qlora_open_platypus_e3.py
│ │ └── baichuan2_13b_base_qlora_sql_e3.py
│ ├── baichuan2_13b_chat
│ │ ├── baichuan2_13b_chat_qlora_alpaca_e3.py
│ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan2_13b_chat_qlora_alpaca_zh_e3.py
│ │ ├── baichuan2_13b_chat_qlora_code_alpaca_e3.py
│ │ ├── baichuan2_13b_chat_qlora_lawyer_e3.py
│ │ ├── baichuan2_13b_chat_qlora_oasst1_512_e3.py
│ │ ├── baichuan2_13b_chat_qlora_oasst1_e3.py
│ │ └── baichuan2_13b_chat_qlora_open_platypus_e3.py
│ ├── baichuan2_7b_base
│ │ ├── baichuan2_7b_base_qlora_alpaca_e3.py
│ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan2_7b_base_qlora_alpaca_zh_e3.py
│ │ ├── baichuan2_7b_base_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan2_7b_base_qlora_code_alpaca_e3.py
│ │ ├── baichuan2_7b_base_qlora_colorist_e5.py
│ │ ├── baichuan2_7b_base_qlora_lawyer_e3.py
│ │ ├── baichuan2_7b_base_qlora_oasst1_512_e3.py
│ │ ├── baichuan2_7b_base_qlora_oasst1_e3.py
│ │ ├── baichuan2_7b_base_qlora_open_platypus_e3.py
│ │ └── baichuan2_7b_base_qlora_sql_e3.py
│ ├── baichuan2_7b_chat
│ │ ├── baichuan2_7b_chat_qlora_alpaca_e3.py
│ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan2_7b_chat_qlora_alpaca_zh_e3.py
│ │ ├── baichuan2_7b_chat_qlora_code_alpaca_e3.py
│ │ ├── baichuan2_7b_chat_qlora_lawyer_e3.py
│ │ ├── baichuan2_7b_chat_qlora_oasst1_512_e3.py
│ │ ├── baichuan2_7b_chat_qlora_oasst1_e3.py
│ │ └── baichuan2_7b_chat_qlora_open_platypus_e3.py
│ ├── baichuan_13b_base
│ │ ├── baichuan_13b_base_qlora_alpaca_e3.py
│ │ ├── baichuan_13b_base_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan_13b_base_qlora_alpaca_zh_e3.py
│ │ ├── baichuan_13b_base_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan_13b_base_qlora_code_alpaca_e3.py
│ │ ├── baichuan_13b_base_qlora_colorist_e5.py
│ │ ├── baichuan_13b_base_qlora_lawyer_e3.py
│ │ ├── baichuan_13b_base_qlora_medical_e1.py
│ │ ├── baichuan_13b_base_qlora_moss_sft_all_e1.py
│ │ ├── baichuan_13b_base_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── baichuan_13b_base_qlora_moss_sft_plugins_e1.py
│ │ ├── baichuan_13b_base_qlora_oasst1_512_e3.py
│ │ ├── baichuan_13b_base_qlora_oasst1_e3.py
│ │ ├── baichuan_13b_base_qlora_open_platypus_e3.py
│ │ ├── baichuan_13b_base_qlora_openorca_e1.py
│ │ ├── baichuan_13b_base_qlora_sql_e3.py
│ │ └── baichuan_13b_base_qlora_tiny_codes_e1.py
│ ├── baichuan_13b_chat
│ │ ├── baichuan_13b_chat_qlora_alpaca_e3.py
│ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan_13b_chat_qlora_alpaca_zh_e3.py
│ │ ├── baichuan_13b_chat_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan_13b_chat_qlora_code_alpaca_e3.py
│ │ ├── baichuan_13b_chat_qlora_colorist_e5.py
│ │ ├── baichuan_13b_chat_qlora_lawyer_e3.py
│ │ ├── baichuan_13b_chat_qlora_medical_e1.py
│ │ ├── baichuan_13b_chat_qlora_oasst1_512_e3.py
│ │ ├── baichuan_13b_chat_qlora_oasst1_e3.py
│ │ ├── baichuan_13b_chat_qlora_open_platypus_e3.py
│ │ ├── baichuan_13b_chat_qlora_openorca_e1.py
│ │ ├── baichuan_13b_chat_qlora_sql_e3.py
│ │ └── baichuan_13b_chat_qlora_tiny_codes_e1.py
│ └── baichuan_7b
│ │ ├── baichuan_7b_qlora_alpaca_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_zh_e3.py
│ │ ├── baichuan_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan_7b_qlora_code_alpaca_e3.py
│ │ ├── baichuan_7b_qlora_colorist_e5.py
│ │ ├── baichuan_7b_qlora_lawyer_e3.py
│ │ ├── baichuan_7b_qlora_medical_e1.py
│ │ ├── baichuan_7b_qlora_moss_sft_all_e1.py
│ │ ├── baichuan_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── baichuan_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── baichuan_7b_qlora_oasst1_512_e3.py
│ │ ├── baichuan_7b_qlora_oasst1_e3.py
│ │ ├── baichuan_7b_qlora_open_platypus_e3.py
│ │ ├── baichuan_7b_qlora_openorca_e1.py
│ │ ├── baichuan_7b_qlora_sql_e3.py
│ │ └── baichuan_7b_qlora_tiny_codes_e1.py
├── chatglm
│ ├── chatglm2_6b
│ │ ├── chatglm2_6b_qlora_alpaca_e3.py
│ │ ├── chatglm2_6b_qlora_alpaca_enzh_e3.py
│ │ ├── chatglm2_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── chatglm2_6b_qlora_alpaca_zh_e3.py
│ │ ├── chatglm2_6b_qlora_arxiv_gentitle_e3.py
│ │ ├── chatglm2_6b_qlora_code_alpaca_e3.py
│ │ ├── chatglm2_6b_qlora_colorist_e5.py
│ │ ├── chatglm2_6b_qlora_lawyer_e3.py
│ │ ├── chatglm2_6b_qlora_medical_e1.py
│ │ ├── chatglm2_6b_qlora_oasst1_512_e3.py
│ │ ├── chatglm2_6b_qlora_oasst1_e3.py
│ │ ├── chatglm2_6b_qlora_open_platypus_e3.py
│ │ ├── chatglm2_6b_qlora_openorca_e1.py
│ │ ├── chatglm2_6b_qlora_sql_e3.py
│ │ └── chatglm2_6b_qlora_tiny_codes_e1.py
│ ├── chatglm3_6b
│ │ ├── chatglm3_6b_qlora_alpaca_e3.py
│ │ ├── chatglm3_6b_qlora_alpaca_enzh_e3.py
│ │ ├── chatglm3_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── chatglm3_6b_qlora_alpaca_zh_e3.py
│ │ ├── chatglm3_6b_qlora_arxiv_gentitle_e3.py
│ │ ├── chatglm3_6b_qlora_code_alpaca_e3.py
│ │ ├── chatglm3_6b_qlora_colorist_e5.py
│ │ ├── chatglm3_6b_qlora_lawyer_e3.py
│ │ ├── chatglm3_6b_qlora_medical_e1.py
│ │ ├── chatglm3_6b_qlora_oasst1_512_e3.py
│ │ ├── chatglm3_6b_qlora_oasst1_e3.py
│ │ ├── chatglm3_6b_qlora_open_platypus_e3.py
│ │ ├── chatglm3_6b_qlora_openorca_e1.py
│ │ ├── chatglm3_6b_qlora_sql_e3.py
│ │ └── chatglm3_6b_qlora_tiny_codes_e1.py
│ └── chatglm3_6b_base
│ │ ├── chatglm3_6b_base_qlora_alpaca_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_zh_e3.py
│ │ ├── chatglm3_6b_base_qlora_arxiv_gentitle_e3.py
│ │ ├── chatglm3_6b_base_qlora_code_alpaca_e3.py
│ │ ├── chatglm3_6b_base_qlora_colorist_e5.py
│ │ ├── chatglm3_6b_base_qlora_lawyer_e3.py
│ │ ├── chatglm3_6b_base_qlora_medical_e1.py
│ │ ├── chatglm3_6b_base_qlora_oasst1_512_e3.py
│ │ ├── chatglm3_6b_base_qlora_oasst1_e3.py
│ │ ├── chatglm3_6b_base_qlora_open_platypus_e3.py
│ │ ├── chatglm3_6b_base_qlora_openorca_e1.py
│ │ ├── chatglm3_6b_base_qlora_sql_e3.py
│ │ └── chatglm3_6b_base_qlora_tiny_codes_e1.py
├── cohere
│ ├── README.md
│ └── cohere_104b
│ │ └── cohere_100b_128k_sp32.py
├── custom_dataset
│ ├── pretrain
│ │ ├── baichuan
│ │ │ ├── baichuan2_13b_base_full_custom_pretrain_e1.py
│ │ │ └── baichuan2_7b_base_full_custom_pretrain_e1.py
│ │ ├── chatglm
│ │ │ ├── chatglm2_6b_full_custom_pretrain_e1.py
│ │ │ └── chatglm3_6b_full_custom_pretrain_e1.py
│ │ ├── deepseek
│ │ │ └── deepseek_moe_16b_base_full_custom_pretrain_e1.py
│ │ ├── gemma
│ │ │ ├── gemma_2b_full_custom_pretrain_e1.py
│ │ │ └── gemma_7b_full_custom_pretrain_e1.py
│ │ ├── internlm
│ │ │ ├── internlm2_1_8b_full_custom_pretrain_e1.py
│ │ │ ├── internlm2_20b_full_custom_pretrain_e1.py
│ │ │ └── internlm2_7b_full_custom_pretrain_e1.py
│ │ ├── llama
│ │ │ ├── llama2_70b_full_custom_pretrain_e1.py
│ │ │ └── llama2_7b_full_custom_pretrain_e1.py
│ │ ├── minicpm
│ │ │ ├── minicpm3_4b_full_custom_pretrain_e1.py
│ │ │ ├── minicpm_1b_full_custom_pretrain_e1.py
│ │ │ └── minicpm_2b_full_custom_pretrain_e1.py
│ │ ├── mistral
│ │ │ └── mistral_7b_full_custom_pretrain_e1.py
│ │ ├── mixtral
│ │ │ └── mixtral_8x7b_full_custom_pretrain_e1.py
│ │ ├── qwen
│ │ │ ├── qwen1_5_0_5b_full_custom_pretrain_e1.py
│ │ │ ├── qwen1_5_14b_full_custom_pretrain_e1.py
│ │ │ ├── qwen1_5_1_8b_full_custom_pretrain_e1.py
│ │ │ ├── qwen1_5_4b_full_custom_pretrain_e1.py
│ │ │ ├── qwen1_5_72b_full_custom_pretrain_e1.py
│ │ │ ├── qwen1_5_7b_full_custom_pretrain_e1.py
│ │ │ ├── qwen_1_8b_full_custom_pretrain_e1.py
│ │ │ ├── qwen_72b_full_custom_pretrain_e1.py
│ │ │ └── qwen_7b_full_custom_pretrain_e1.py
│ │ ├── starcoder
│ │ │ └── starcoder_full_custom_pretrain_e1.py
│ │ ├── yi
│ │ │ ├── yi_34b_full_custom_pretrain_e1.py
│ │ │ └── yi_6b_full_custom_pretrain_e1.py
│ │ └── zephyr
│ │ │ └── zephyr_7b_beta_full_custom_pretrain_e1.py
│ └── sft
│ │ ├── baichuan
│ │ ├── baichuan2_13b_chat_qlora_custom_sft_e1.py
│ │ ├── baichuan2_7b_chat_qlora_custom_sft_e1.py
│ │ ├── baichuan_13b_chat_qlora_custom_sft_e1.py
│ │ └── baichuan_7b_qlora_custom_sft_e1.py
│ │ ├── chatglm
│ │ ├── chatglm2_6b_qlora_custom_sft_e1.py
│ │ └── chatglm3_6b_qlora_custom_sft_e1.py
│ │ ├── deepseek
│ │ ├── deepseek_moe_16b_chat_qlora_custom_sft_e1.py
│ │ └── deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py
│ │ ├── gemma
│ │ ├── gemma_2b_it_qlora_custom_sft_e1.py
│ │ ├── gemma_2b_qlora_custom_sft_e1.py
│ │ ├── gemma_7b_it_qlora_custom_sft_e1.py
│ │ └── gemma_7b_qlora_custom_sft_e1.py
│ │ ├── internlm
│ │ ├── internlm2_chat_1_8b_qlora_custom_sft_e1.py
│ │ ├── internlm2_chat_20b_qlora_custom_sft_e1.py
│ │ └── internlm2_chat_7b_qlora_custom_sft_e1.py
│ │ ├── llama
│ │ ├── llama2_70b_qlora_custom_sft_e1.py
│ │ └── llama2_7b_chat_qlora_custom_sft_e1.py
│ │ ├── minicpm
│ │ ├── minicpm3_4b_chat_qlora_custom_sft_e1.py
│ │ ├── minicpm_1b_full_custom_pretrain_e1.py
│ │ └── minicpm_2b_full_custom_pretrain_e1.py
│ │ ├── mistral
│ │ └── mistral_7b_full_finetune_custom_sft_e1.py
│ │ ├── mixtral
│ │ └── mixtral_8x7b_instruct_qlora_custom_sft_e1.py
│ │ ├── qwen
│ │ ├── qwen1_5_0_5b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen1_5_14b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen1_5_1_8b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen1_5_4b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen1_5_72b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen1_5_7b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen_1_8b_chat_qlora_custom_sft_e1.py
│ │ ├── qwen_72b_qlora_custom_sft_e1.py
│ │ └── qwen_7b_chat_qlora_custom_sft_e1.py
│ │ ├── starcoder
│ │ └── starcoder_qlora_custom_sft_e1.py
│ │ ├── yi
│ │ ├── yi_34b_qlora_custom_sft_e1.py
│ │ └── yi_6b_qlora_custom_sft_e1.py
│ │ └── zephyr
│ │ └── zephyr_7b_beta_qlora_custom_sft_e1.py
├── deepseek
│ ├── README.md
│ ├── deepseek_coder_6_7b_base
│ │ └── deepseek_coder_6_7b_base_qlora_code_alpaca_e3.py
│ ├── deepseek_coder_6_7b_instruct
│ │ └── deepseekcoder_6_7b_instruct_qlora_code_alpaca_e3.py
│ ├── deepseek_moe_16b_base
│ │ ├── deepseek_moe_16b_base_full_oasst1_e3.py
│ │ └── deepseek_moe_16b_base_qlora_oasst1_e3.py
│ ├── deepseek_moe_16b_chat
│ │ ├── deepseek_moe_16b_chat_full_oasst1_e3.py
│ │ └── deepseek_moe_16b_chat_qlora_oasst1_e3.py
│ ├── deepseek_v2_chat
│ │ └── deepseek_v2_chat_full_alpaca_e3.py
│ └── deepseek_v2_lite_chat
│ │ ├── deepseek_v2_lite_chat_full_alpaca_e3.py
│ │ └── deepseek_v2_lite_chat_full_alpaca_e3_32k_varlen.py
├── deepspeed
│ ├── deepspeed_zero1.json
│ ├── deepspeed_zero2.json
│ ├── deepspeed_zero2_offload.json
│ ├── deepspeed_zero3.json
│ └── deepspeed_zero3_offload.json
├── dpo
│ ├── internlm
│ │ ├── internlm2_chat_1_8b_dpo_full.py
│ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn.py
│ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py
│ │ └── internlm2_chat_7b_dpo_qlora_varlenattn.py
│ └── llama
│ │ └── llama3_8b_instruct_dpo_qlora_varlenattn.py
├── gemma
│ ├── gemma_2b
│ │ ├── gemma_2b_full_alpaca_e3.py
│ │ └── gemma_2b_qlora_alpaca_e3.py
│ ├── gemma_2b_it
│ │ ├── gemma_2b_it_full_alpaca_e3.py
│ │ └── gemma_2b_it_qlora_alpaca_e3.py
│ ├── gemma_7b
│ │ ├── gemma_7b_full_alpaca_e3.py
│ │ └── gemma_7b_qlora_alpaca_e3.py
│ └── gemma_7b_it
│ │ ├── gemma_7b_it_full_alpaca_e3.py
│ │ └── gemma_7b_it_qlora_alpaca_e3.py
├── internlm
│ ├── internlm2_1_8b
│ │ ├── internlm2_1_8b_full_alpaca_e3.py
│ │ └── internlm2_1_8b_qlora_alpaca_e3.py
│ ├── internlm2_20b
│ │ ├── internlm2_20b_full_finetune_custom_dataset_e1.py
│ │ ├── internlm2_20b_qlora_alpaca_e3.py
│ │ ├── internlm2_20b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm2_20b_qlora_code_alpaca_e3.py
│ │ ├── internlm2_20b_qlora_colorist_e5.py
│ │ ├── internlm2_20b_qlora_lawyer_e3.py
│ │ ├── internlm2_20b_qlora_msagent_react_e3_gpu8.py
│ │ ├── internlm2_20b_qlora_oasst1_512_e3.py
│ │ ├── internlm2_20b_qlora_oasst1_e3.py
│ │ └── internlm2_20b_qlora_sql_e3.py
│ ├── internlm2_5_chat_20b
│ │ ├── internlm2_5_chat_20b_alpaca_e3.py
│ │ └── internlm2_5_chat_20b_qlora_alpaca_e3.py
│ ├── internlm2_5_chat_7b
│ │ ├── internlm2_5_chat_7b_full_finetune_custom_dataset_e1.py
│ │ ├── internlm2_5_chat_7b_qlora_alpaca_e3.py
│ │ └── internlm2_5_chat_7b_qlora_oasst1_e3.py
│ ├── internlm2_7b
│ │ ├── internlm2_7b_full_finetune_custom_dataset_e1.py
│ │ ├── internlm2_7b_full_finetune_custom_dataset_e1_sequence_parallel_4.py
│ │ ├── internlm2_7b_qlora_alpaca_e3.py
│ │ ├── internlm2_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm2_7b_qlora_code_alpaca_e3.py
│ │ ├── internlm2_7b_qlora_colorist_e5.py
│ │ ├── internlm2_7b_qlora_json_e3.py
│ │ ├── internlm2_7b_qlora_lawyer_e3.py
│ │ ├── internlm2_7b_qlora_msagent_react_e3_gpu8.py
│ │ ├── internlm2_7b_qlora_oasst1_512_e3.py
│ │ ├── internlm2_7b_qlora_oasst1_e3.py
│ │ ├── internlm2_7b_qlora_sql_e3.py
│ │ ├── internlm2_7b_w_internevo_dataset.py
│ │ ├── internlm2_7b_w_tokenized_dataset.py
│ │ └── internlm2_7b_w_untokenized_dataset.py
│ ├── internlm2_chat_1_8b
│ │ ├── internlm2_chat_1_8b_full_alpaca_e3.py
│ │ └── internlm2_chat_1_8b_qlora_alpaca_e3.py
│ ├── internlm2_chat_20b
│ │ ├── internlm2_chat_20b_full_finetune_custom_dataset_e1.py
│ │ ├── internlm2_chat_20b_qlora_alpaca_e3.py
│ │ ├── internlm2_chat_20b_qlora_code_alpaca_e3.py
│ │ ├── internlm2_chat_20b_qlora_lawyer_e3.py
│ │ ├── internlm2_chat_20b_qlora_oasst1_512_e3.py
│ │ └── internlm2_chat_20b_qlora_oasst1_e3.py
│ ├── internlm2_chat_7b
│ │ ├── internlm2_chat_7b_full_finetune_custom_dataset_e1.py
│ │ ├── internlm2_chat_7b_qlora_alpaca_e3.py
│ │ ├── internlm2_chat_7b_qlora_code_alpaca_e3.py
│ │ ├── internlm2_chat_7b_qlora_lawyer_e3.py
│ │ ├── internlm2_chat_7b_qlora_oasst1_512_e3.py
│ │ └── internlm2_chat_7b_qlora_oasst1_e3.py
│ ├── internlm_20b
│ │ ├── internlm_20b_qlora_alpaca_e3.py
│ │ ├── internlm_20b_qlora_alpaca_enzh_e3.py
│ │ ├── internlm_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_20b_qlora_alpaca_zh_e3.py
│ │ ├── internlm_20b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm_20b_qlora_code_alpaca_e3.py
│ │ ├── internlm_20b_qlora_colorist_e5.py
│ │ ├── internlm_20b_qlora_lawyer_e3.py
│ │ ├── internlm_20b_qlora_msagent_react_e3_gpu8.py
│ │ ├── internlm_20b_qlora_oasst1_512_e3.py
│ │ ├── internlm_20b_qlora_oasst1_e3.py
│ │ ├── internlm_20b_qlora_open_platypus_e3.py
│ │ └── internlm_20b_qlora_sql_e3.py
│ ├── internlm_7b
│ │ ├── internlm_7b_full_alpaca_e3.py
│ │ ├── internlm_7b_full_alpaca_enzh_e3.py
│ │ ├── internlm_7b_full_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_7b_full_alpaca_zh_e3.py
│ │ ├── internlm_7b_full_intern_repo_dataset_template.py
│ │ ├── internlm_7b_full_oasst1_e3.py
│ │ ├── internlm_7b_qlora_alpaca_e3.py
│ │ ├── internlm_7b_qlora_alpaca_enzh_e3.py
│ │ ├── internlm_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_7b_qlora_alpaca_zh_e3.py
│ │ ├── internlm_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm_7b_qlora_code_alpaca_e3.py
│ │ ├── internlm_7b_qlora_colorist_e5.py
│ │ ├── internlm_7b_qlora_json_e3.py
│ │ ├── internlm_7b_qlora_lawyer_e3.py
│ │ ├── internlm_7b_qlora_medical_e1.py
│ │ ├── internlm_7b_qlora_moss_sft_all_e1.py
│ │ ├── internlm_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── internlm_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── internlm_7b_qlora_msagent_react_e3_gpu8.py
│ │ ├── internlm_7b_qlora_oasst1_512_e3.py
│ │ ├── internlm_7b_qlora_oasst1_e3.py
│ │ ├── internlm_7b_qlora_oasst1_e3_hf.py
│ │ ├── internlm_7b_qlora_oasst1_mmlu_e3.py
│ │ ├── internlm_7b_qlora_open_platypus_e3.py
│ │ ├── internlm_7b_qlora_openorca_e1.py
│ │ ├── internlm_7b_qlora_sql_e3.py
│ │ └── internlm_7b_qlora_tiny_codes_e1.py
│ ├── internlm_chat_20b
│ │ ├── internlm_chat_20b_qlora_alpaca_e3.py
│ │ ├── internlm_chat_20b_qlora_alpaca_enzh_e3.py
│ │ ├── internlm_chat_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_chat_20b_qlora_alpaca_zh_e3.py
│ │ ├── internlm_chat_20b_qlora_code_alpaca_e3.py
│ │ ├── internlm_chat_20b_qlora_lawyer_e3.py
│ │ ├── internlm_chat_20b_qlora_oasst1_512_e3.py
│ │ ├── internlm_chat_20b_qlora_oasst1_e3.py
│ │ └── internlm_chat_20b_qlora_open_platypus_e3.py
│ └── internlm_chat_7b
│ │ ├── internlm_chat_7b_qlora_alpaca_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_enzh_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_zh_e3.py
│ │ ├── internlm_chat_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm_chat_7b_qlora_code_alpaca_e3.py
│ │ ├── internlm_chat_7b_qlora_colorist_e5.py
│ │ ├── internlm_chat_7b_qlora_lawyer_e3.py
│ │ ├── internlm_chat_7b_qlora_medical_e1.py
│ │ ├── internlm_chat_7b_qlora_oasst1_512_e3.py
│ │ ├── internlm_chat_7b_qlora_oasst1_e3.py
│ │ ├── internlm_chat_7b_qlora_open_platypus_e3.py
│ │ ├── internlm_chat_7b_qlora_openorca_e1.py
│ │ ├── internlm_chat_7b_qlora_sql_e3.py
│ │ └── internlm_chat_7b_qlora_tiny_codes_e1.py
├── internvl
│ ├── README.md
│ ├── README_zh-CN.md
│ ├── v1_5
│ │ ├── convert_to_official.py
│ │ ├── internvl_v1_5_internlm2_26b_finetune.py
│ │ ├── internvl_v1_5_internlm2_26b_lora_finetune.py
│ │ ├── internvl_v1_5_internlm2_26b_qlora_finetune.py
│ │ ├── internvl_v1_5_internlm2_2b_finetune.py
│ │ ├── internvl_v1_5_internlm2_2b_lora_finetune.py
│ │ ├── internvl_v1_5_internlm2_2b_qlora_finetune.py
│ │ ├── internvl_v1_5_phi3_4b_finetune.py
│ │ ├── internvl_v1_5_phi3_4b_lora_finetune.py
│ │ └── internvl_v1_5_phi3_4b_qlora_finetune.py
│ └── v2
│ │ ├── internvl_v2_internlm2_26b_finetune.py
│ │ ├── internvl_v2_internlm2_26b_lora_finetune.py
│ │ ├── internvl_v2_internlm2_26b_qlora_finetune.py
│ │ ├── internvl_v2_internlm2_2b_finetune.py
│ │ ├── internvl_v2_internlm2_2b_lora_finetune.py
│ │ ├── internvl_v2_internlm2_2b_qlora_finetune.py
│ │ ├── internvl_v2_internlm2_5_8b_finetune.py
│ │ ├── internvl_v2_internlm2_5_8b_lora_finetune.py
│ │ ├── internvl_v2_internlm2_5_8b_qlora_finetune.py
│ │ ├── internvl_v2_phi3_4b_finetune.py
│ │ ├── internvl_v2_phi3_4b_lora_finetune.py
│ │ └── internvl_v2_phi3_4b_qlora_finetune.py
├── llama
│ ├── llama2_70b
│ │ ├── llama2_70b_full_wizardlm_e1.py
│ │ ├── llama2_70b_int8_lora_open_platypus_e1.py
│ │ ├── llama2_70b_int8_lora_open_platypus_e1_hf.py
│ │ ├── llama2_70b_qlora_open_platypus_e1.py
│ │ └── llama2_70b_qlora_open_platypus_e1_hf.py
│ ├── llama2_7b
│ │ ├── llama2_7b_full_pgbooks_400iters_sp1.py
│ │ ├── llama2_7b_full_pgbooks_400iters_sp4.py
│ │ ├── llama2_7b_full_wizardlm_e1.py
│ │ ├── llama2_7b_qlora_alpaca_e3.py
│ │ ├── llama2_7b_qlora_alpaca_enzh_e3.py
│ │ ├── llama2_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── llama2_7b_qlora_alpaca_zh_e3.py
│ │ ├── llama2_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── llama2_7b_qlora_code_alpaca_e3.py
│ │ ├── llama2_7b_qlora_colorist_e5.py
│ │ ├── llama2_7b_qlora_lawyer_e3.py
│ │ ├── llama2_7b_qlora_medical_e1.py
│ │ ├── llama2_7b_qlora_moss_sft_all_e1.py
│ │ ├── llama2_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── llama2_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── llama2_7b_qlora_msagent_react_e3_gpu8.py
│ │ ├── llama2_7b_qlora_oasst1_512_e3.py
│ │ ├── llama2_7b_qlora_oasst1_e3.py
│ │ ├── llama2_7b_qlora_open_platypus_e3.py
│ │ ├── llama2_7b_qlora_openorca_e1.py
│ │ ├── llama2_7b_qlora_sql_e3.py
│ │ └── llama2_7b_qlora_tiny_codes_e1.py
│ ├── llama2_7b_chat
│ │ ├── llama2_7b_chat_qlora_alpaca_e3.py
│ │ ├── llama2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ ├── llama2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── llama2_7b_chat_qlora_alpaca_zh_e3.py
│ │ ├── llama2_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ ├── llama2_7b_chat_qlora_code_alpaca_e3.py
│ │ ├── llama2_7b_chat_qlora_colorist_e5.py
│ │ ├── llama2_7b_chat_qlora_lawyer_e3.py
│ │ ├── llama2_7b_chat_qlora_medical_e1.py
│ │ ├── llama2_7b_chat_qlora_oasst1_512_e3.py
│ │ ├── llama2_7b_chat_qlora_oasst1_e3.py
│ │ ├── llama2_7b_chat_qlora_open_platypus_e3.py
│ │ ├── llama2_7b_chat_qlora_openorca_e1.py
│ │ ├── llama2_7b_chat_qlora_sql_e3.py
│ │ └── llama2_7b_chat_qlora_tiny_codes_e1.py
│ ├── llama3_70b_instruct
│ │ └── llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py
│ ├── llama3_8b
│ │ ├── README.md
│ │ └── llama3_8b_full_alpaca_e3.py
│ ├── llama3_8b_instruct
│ │ ├── llama3_8b_instruct_full_alpaca_e3.py
│ │ └── llama3_8b_instruct_qlora_alpaca_e3.py
│ └── llama_7b
│ │ ├── llama_7b_qlora_alpaca_e3.py
│ │ ├── llama_7b_qlora_alpaca_enzh_e3.py
│ │ ├── llama_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── llama_7b_qlora_alpaca_zh_e3.py
│ │ ├── llama_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── llama_7b_qlora_code_alpaca_e3.py
│ │ ├── llama_7b_qlora_colorist_e5.py
│ │ ├── llama_7b_qlora_lawyer_e3.py
│ │ ├── llama_7b_qlora_medical_e1.py
│ │ ├── llama_7b_qlora_moss_sft_all_e1.py
│ │ ├── llama_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── llama_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── llama_7b_qlora_oasst1_512_e3.py
│ │ ├── llama_7b_qlora_oasst1_e3.py
│ │ ├── llama_7b_qlora_open_platypus_e3.py
│ │ ├── llama_7b_qlora_openorca_e1.py
│ │ ├── llama_7b_qlora_sql_e3.py
│ │ └── llama_7b_qlora_tiny_codes_e1.py
├── llama_speed_benchmark
│ ├── llama2_70b
│ │ ├── llama2_70b_full_alpaca_enzh_128k_sp8.py
│ │ ├── llama2_70b_full_alpaca_enzh_256k_sp16.py
│ │ ├── llama2_70b_full_alpaca_enzh_32k_sp4.py
│ │ └── llama2_70b_full_alpaca_enzh_8k_sp1.py
│ ├── llama2_7b
│ │ ├── llama2_7b_full_alpaca_enzh_128k_sp8.py
│ │ ├── llama2_7b_full_alpaca_enzh_1M_sp16.py
│ │ ├── llama2_7b_full_alpaca_enzh_256k_sp8.py
│ │ ├── llama2_7b_full_alpaca_enzh_32k_sp1.py
│ │ └── llama2_7b_full_alpaca_enzh_8k_sp1.py
│ └── yi_34b
│ │ ├── yi_34b_200k_full_alpaca_enzh_128k_sp8.py
│ │ ├── yi_34b_200k_full_alpaca_enzh_256k_sp8.py
│ │ ├── yi_34b_200k_full_alpaca_enzh_32k_sp2.py
│ │ └── yi_34b_200k_full_alpaca_enzh_8k_sp1.py
├── llava
│ ├── README.md
│ ├── README_zh-CN.md
│ ├── internlm2_chat_1_8b_clip_vit_large_p14_336
│ │ ├── finetune
│ │ │ └── llava_internlm2_chat_1_8b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ └── pretrain
│ │ │ └── llava_internlm2_chat_1_8b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── internlm2_chat_20b_clip_vit_large_p14_336
│ │ ├── finetune
│ │ │ ├── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ └── llava_internlm2_chat_20b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ └── pretrain
│ │ │ └── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── internlm2_chat_7b_clip_vit_large_p14_336
│ │ ├── finetune
│ │ │ ├── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ └── llava_internlm2_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ └── pretrain
│ │ │ └── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── internlm_chat_7b_clip_vit_large_p14_336
│ │ ├── finetune
│ │ │ └── llava_internlm_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ └── pretrain
│ │ │ └── llava_internlm_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── llama3_70b_instruct_clip_vit_large_p14_336
│ │ └── pretrain
│ │ │ └── llava_llama3_70b_instruct_quant_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── llama3_8b_instruct_clip_vit_large_p14_336
│ │ ├── README.md
│ │ ├── convert_xtuner_weights_to_hf.py
│ │ ├── convert_xtuner_weights_to_llava.py
│ │ ├── finetune
│ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_internvl_finetune.py
│ │ │ └── llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_e1_gpu1_finetune.py
│ │ └── pretrain
│ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ └── llava_llama3_8b_instruct_quant_clip_vit_large_p14_336_e1_gpu1_pretrain.py
│ ├── official
│ │ ├── llava_v15_13b
│ │ │ ├── llava_v15_13b_finetune.py
│ │ │ ├── llava_v15_13b_finetune_lora.py
│ │ │ └── llava_v15_13b_pretrain.py
│ │ └── llava_v15_7b
│ │ │ ├── llava_v15_7b_finetune.py
│ │ │ ├── llava_v15_7b_finetune_lora.py
│ │ │ └── llava_v15_7b_pretrain.py
│ ├── phi3_mini_4k_instruct_clip_vit_large_p14_336
│ │ ├── README.md
│ │ ├── convert_phi_to_llama.py
│ │ ├── convert_xtuner_weights_to_hf.py
│ │ ├── convert_xtuner_weights_to_llava.py
│ │ ├── finetune
│ │ │ ├── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ └── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_full_e2_gpu8_internvl_finetune.py
│ │ └── pretrain
│ │ │ ├── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ └── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ ├── vicuna_13b_v15_clip_vit_large_p14_336
│ │ ├── finetune
│ │ │ └── llava_vicuna_13b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ └── pretrain
│ │ │ └── llava_vicuna_13b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ └── vicuna_7b_v15_clip_vit_large_p14_336
│ │ ├── finetune
│ │ ├── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ └── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune_refcoco.py
│ │ └── pretrain
│ │ └── llava_vicuna_7b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
├── minicpm
│ ├── 1_2b
│ │ ├── minicpm_1b_dpo_qlora.py
│ │ ├── minicpm_1b_full_alpaca_zh_e3.py
│ │ ├── minicpm_1b_lora_alpaca_zh_e3.py
│ │ ├── minicpm_1b_qlora_alpaca_enzh_e3.py
│ │ └── minicpm_1b_qlora_alpaca_zh_e3.py
│ ├── 2b
│ │ ├── minicpm_2b_dpo_qlora.py
│ │ ├── minicpm_2b_full_alpaca_zh_e3.py
│ │ ├── minicpm_2b_lora_alpaca_zh_e3.py
│ │ ├── minicpm_2b_qlora_alpaca_enzh_e3.py
│ │ └── minicpm_2b_qlora_alpaca_zh_e3.py
│ └── minicpm3_4b
│ │ ├── minicpm3_4b_dpo_qlora.py
│ │ └── minicpm3_4b_full_alpaca_zh_e3.py
├── mistral
│ ├── mistral_7b_full_finetune_custom_dataset_e1.py
│ ├── mistral_7b_qlora_skypile_pretrain_e1.py
│ ├── mistral_7b_w_tokenized_dataset.py
│ └── mistral_7b_w_untokenized_dataset.py
├── mixtral
│ ├── README.md
│ ├── mixtral_8x7b
│ │ ├── mixtral_8x7b_full_oasst1_e3.py
│ │ └── mixtral_8x7b_qlora_oasst1_e3.py
│ └── mixtral_8x7b_instruct
│ │ ├── mixtral_8x7b_instruct_full_oasst1_e3.py
│ │ └── mixtral_8x7b_instruct_qlora_oasst1_e3.py
├── orpo
│ ├── internlm
│ │ ├── internlm2_chat_1_8b_orpo_full.py
│ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn.py
│ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn_jsonl_dataset.py
│ │ └── internlm2_chat_7b_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ └── llama
│ │ └── llama3_8b_instruct_orpo_qlora_varlenattn_ultrafeedback_e5.py
├── phi
│ └── phi3
│ │ ├── phi3_mini_128k_instruct_full_alpaca_e3.py
│ │ ├── phi3_mini_128k_instruct_qlora_alpaca_e3.py
│ │ ├── phi3_mini_4k_instruct_full_alpaca_e3.py
│ │ └── phi3_mini_4k_instruct_qlora_alpaca_e3.py
├── qwen
│ ├── qwen1
│ │ ├── qwen_1_8b
│ │ │ ├── qwen_1_8b_qlora_alpaca_e3.py
│ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_1_8b_qlora_alpaca_zh_e3.py
│ │ │ └── qwen_1_8b_qlora_code_alpaca_e3.py
│ │ ├── qwen_1_8b_chat
│ │ │ ├── qwen_1_8b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_1_8b_chat_qlora_alpaca_zh_e3.py
│ │ │ └── qwen_1_8b_chat_qlora_code_alpaca_e3.py
│ │ ├── qwen_72b
│ │ │ ├── qwen_72b_qlora_alpaca_e3.py
│ │ │ ├── qwen_72b_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_72b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_72b_qlora_alpaca_zh_e3.py
│ │ │ └── qwen_72b_qlora_code_alpaca_e3.py
│ │ ├── qwen_7b
│ │ │ ├── qwen_7b_qlora_alpaca_e3.py
│ │ │ ├── qwen_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── qwen_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── qwen_7b_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_7b_qlora_colorist_e5.py
│ │ │ ├── qwen_7b_qlora_lawyer_e3.py
│ │ │ ├── qwen_7b_qlora_medical_e1.py
│ │ │ ├── qwen_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── qwen_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── qwen_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── qwen_7b_qlora_oasst1_512_e3.py
│ │ │ ├── qwen_7b_qlora_oasst1_e3.py
│ │ │ ├── qwen_7b_qlora_open_platypus_e3.py
│ │ │ ├── qwen_7b_qlora_openorca_e1.py
│ │ │ ├── qwen_7b_qlora_sql_e3.py
│ │ │ └── qwen_7b_qlora_tiny_codes_e1.py
│ │ └── qwen_7b_chat
│ │ │ ├── qwen_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── qwen_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── qwen_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_7b_chat_qlora_colorist_e5.py
│ │ │ ├── qwen_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── qwen_7b_chat_qlora_medical_e1.py
│ │ │ ├── qwen_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── qwen_7b_chat_qlora_oasst1_e3.py
│ │ │ ├── qwen_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── qwen_7b_chat_qlora_openorca_e1.py
│ │ │ ├── qwen_7b_chat_qlora_sql_e3.py
│ │ │ └── qwen_7b_chat_qlora_tiny_codes_e1.py
│ └── qwen1_5
│ │ ├── qwen1_5_0_5b
│ │ ├── qwen1_5_0_5b_full_alpaca_e3.py
│ │ └── qwen1_5_0_5b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_0_5b_chat
│ │ ├── qwen1_5_0_5b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_0_5b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_110b
│ │ ├── qwen1_5_110b_full_alpaca_e3.py
│ │ └── qwen1_5_110b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_110b_chat
│ │ ├── README.md
│ │ ├── qwen1_5_110b_chat_full_alpaca_e3.py
│ │ ├── qwen1_5_110b_chat_qlora_alpaca_e3.py
│ │ └── qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py
│ │ ├── qwen1_5_14b
│ │ ├── qwen1_5_14b_full_alpaca_e3.py
│ │ └── qwen1_5_14b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_14b_chat
│ │ ├── qwen1_5_14b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_14b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_1_8b
│ │ ├── qwen1_5_1_8b_full_alpaca_e3.py
│ │ └── qwen1_5_1_8b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_1_8b_chat
│ │ ├── qwen1_5_1_8b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_1_8b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_4b
│ │ ├── qwen1_5_4b_full_alpaca_e3.py
│ │ ├── qwen1_5_4b_qlora_alpaca_e3.py
│ │ └── qwen1_5_4b_qlora_alpaca_e3_openmind.py
│ │ ├── qwen1_5_4b_chat
│ │ ├── qwen1_5_4b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_4b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_72b
│ │ ├── qwen1_5_72b_full_alpaca_e3.py
│ │ └── qwen1_5_72b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_72b_chat
│ │ ├── qwen1_5_72b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_72b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_7b
│ │ ├── qwen1_5_7b_full_alpaca_e3.py
│ │ └── qwen1_5_7b_qlora_alpaca_e3.py
│ │ └── qwen1_5_7b_chat
│ │ ├── qwen1_5_7b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_7b_chat_qlora_alpaca_e3.py
├── qwen_moe
│ └── qwen1_5
│ │ └── qwen1_5_moe_a2_7_b_chat
│ │ └── qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py
├── reward_model
│ ├── internlm
│ │ ├── internlm2_chat_1_8b_reward_full_ultrafeedback.py
│ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_jsonl_dataset.py
│ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_ultrafeedback.py
│ │ └── internlm2_chat_1_8b_reward_qlora_varlenattn_ultrafeedback.py
│ └── llama
│ │ └── llama3_8b_instruct_reward_full_varlenattn_ultrafeedback.py
├── starcoder
│ └── starcoder_qlora_stack_exchange_example.py
├── yi
│ ├── yi_34b
│ │ └── yi_34b_qlora_alpaca_enzh_e3.py
│ └── yi_6b
│ │ └── yi_6b_qlora_alpaca_enzh_e3.py
└── zephyr
│ └── zephyr_7b_beta_qlora_alpaca_e3.py
├── dataset
├── __init__.py
├── collate_fns
│ ├── __init__.py
│ ├── default_collate_fn.py
│ ├── mmlu_collate_fn.py
│ └── preference_collate_fn.py
├── concat_dataset.py
├── huggingface.py
├── intern_repo.py
├── internvl_dataset.py
├── json_dataset.py
├── llava.py
├── map_fns
│ ├── __init__.py
│ ├── dataset_map_fns
│ │ ├── __init__.py
│ │ ├── alpaca_map_fn.py
│ │ ├── alpaca_zh_map_fn.py
│ │ ├── arxiv_map_fn.py
│ │ ├── code_alpaca_map_fn.py
│ │ ├── colors_map_fn.py
│ │ ├── crime_kg_assitant_map_fn.py
│ │ ├── default_map_fn.py
│ │ ├── law_reference_map_fn.py
│ │ ├── llava_map_fn.py
│ │ ├── medical_map_fn.py
│ │ ├── msagent_map_fn.py
│ │ ├── oasst1_map_fn.py
│ │ ├── openai_map_fn.py
│ │ ├── openorca_map_fn.py
│ │ ├── pretrain_map_fn.py
│ │ ├── sql_map_fn.py
│ │ ├── stack_exchange_map_fn.py
│ │ ├── tiny_codes_map_fn.py
│ │ └── wizardlm_map_fn.py
│ └── template_map_fn.py
├── modelscope.py
├── moss_sft.py
├── preference_dataset.py
├── refcoco_json.py
├── samplers
│ ├── __init__.py
│ ├── intern_repo.py
│ └── length_grouped.py
└── utils.py
├── engine
├── __init__.py
├── _strategy
│ ├── __init__.py
│ └── deepspeed.py
├── hooks
│ ├── __init__.py
│ ├── dataset_info_hook.py
│ ├── evaluate_chat_hook.py
│ ├── hf_checkpoint_hook.py
│ ├── throughput_hook.py
│ └── varlen_attn_args_to_messagehub_hook.py
└── runner
│ ├── __init__.py
│ └── loops.py
├── entry_point.py
├── evaluation
├── __init__.py
└── metrics
│ ├── __init__.py
│ ├── mmlu_metric.py
│ └── reward_metric.py
├── model
├── __init__.py
├── dpo.py
├── internvl.py
├── llava.py
├── modules
│ ├── __init__.py
│ ├── dispatch
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── baichuan.py
│ │ ├── cohere.py
│ │ ├── deepseek_v2.py
│ │ ├── internlm.py
│ │ ├── internlm2.py
│ │ ├── internlm3.py
│ │ ├── llama.py
│ │ ├── mistral.py
│ │ ├── phi3.py
│ │ ├── qwen2.py
│ │ ├── triton_kernels
│ │ │ ├── __init__.py
│ │ │ ├── layer_norm.py
│ │ │ ├── rms_norm.py
│ │ │ └── rotary.py
│ │ ├── utils.py
│ │ └── yi.py
│ └── projector
│ │ ├── __init__.py
│ │ ├── configuration_projector.py
│ │ └── modeling_projector.py
├── orpo.py
├── reward.py
├── sft.py
├── transformers_models
│ ├── __init__.py
│ ├── deepseek_v2
│ │ ├── __init__.py
│ │ ├── configuration_deepseek.py
│ │ ├── modeling_deepseek.py
│ │ └── tokenization_deepseek_fast.py
│ └── mixtral
│ │ ├── __init__.py
│ │ ├── configuration_mixtral.py
│ │ └── modeling_mixtral.py
└── utils.py
├── parallel
├── __init__.py
└── sequence
│ ├── __init__.py
│ ├── attention.py
│ ├── comm.py
│ ├── data_collate.py
│ ├── reduce_loss.py
│ ├── sampler.py
│ └── setup_distributed.py
├── registry.py
├── tools
├── chat.py
├── check_custom_dataset.py
├── copy_cfg.py
├── data_preprocess
│ ├── arxiv.py
│ └── convert_refcoco.py
├── eval_refcoco.py
├── get_data_order.py
├── list_cfg.py
├── list_dataset_format.py
├── log_dataset.py
├── mmbench.py
├── model_converters
│ ├── merge.py
│ ├── modeling_internlm2_reward
│ │ ├── __init__.py
│ │ ├── configuration_internlm2.py
│ │ └── modeling_internlm2.py
│ ├── pth_to_hf.py
│ └── split.py
├── plugins
│ ├── __init__.py
│ ├── api.py
│ ├── calculate.py
│ ├── search.py
│ └── solve.py
├── process_untokenized_datasets.py
├── process_untokenized_datasets_legacy.py
├── process_untokenized_llava_data.py
├── test.py
├── tokenize_ftdp_datasets.py
├── train.py
└── utils.py
├── utils
├── __init__.py
├── constants.py
├── device.py
├── fileio.py
├── handle_moe_load_and_save.py
├── stop_criteria.py
├── templates.py
└── zero_to_any_dtype.py
└── version.py
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | name: deploy
2 |
3 | on: push
4 |
5 | concurrency:
6 | group: ${{ github.workflow }}-${{ github.ref }}
7 | cancel-in-progress: true
8 |
9 | jobs:
10 | build-n-publish:
11 | runs-on: ubuntu-latest
12 | if: startsWith(github.event.ref, 'refs/tags')
13 | steps:
14 | - uses: actions/checkout@v2
15 | - name: Set up Python 3.8
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: 3.8
19 | - name: Build XTuner
20 | run: |
21 | pip install wheel
22 | python setup.py sdist bdist_wheel
23 | - name: Publish distribution to PyPI
24 | run: |
25 | pip install twine
26 | twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
27 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on: [push, pull_request]
4 |
5 | concurrency:
6 | group: ${{ github.workflow }}-${{ github.ref }}
7 | cancel-in-progress: true
8 |
9 | jobs:
10 | lint:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v2
14 | - name: Set up Python 3.8
15 | uses: actions/setup-python@v2
16 | with:
17 | python-version: 3.8
18 | - name: Install pre-commit hook
19 | run: |
20 | pip install pre-commit
21 | pre-commit install
22 | - name: Linting
23 | run: pre-commit run --all-files
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/*/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # custom
107 | data/
108 | data
109 | .vscode
110 | .idea
111 | .DS_Store
112 | *.pkl
113 | *.pkl.json
114 | *.log.json
115 | work_dirs/
116 |
117 | # Pytorch
118 | *.pth
119 | *.py~
120 | *.sh~
121 |
122 | # srun
123 | *.out
124 | batchscript-*
125 |
--------------------------------------------------------------------------------
/.owners.yml:
--------------------------------------------------------------------------------
1 | assign:
2 | issues: disabled
3 | pull_requests: disabled
4 | strategy:
5 | random
6 | # daily-shift-based
7 | schedule:
8 | '*/1 * * * *'
9 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: ^tests/data/|^xtuner/model/transformers_models/|^xtuner/tools/model_converters/modeling_internlm2_reward/|^xtuner/_lite/modelings/|^xtuner/_lite/accelerate/dispatches/huggingface/
2 | repos:
3 | - repo: https://github.com/PyCQA/flake8
4 | rev: 5.0.4
5 | hooks:
6 | - id: flake8
7 | args: ["--max-line-length=119"]
8 | - repo: https://github.com/PyCQA/isort
9 | rev: 5.12.0
10 | hooks:
11 | - id: isort
12 | - repo: https://github.com/pre-commit/pre-commit-hooks
13 | rev: v5.0.0
14 | hooks:
15 | - id: check-yaml
16 | - id: requirements-txt-fixer
17 | - id: check-merge-conflict
18 | - id: fix-encoding-pragma
19 | args: ["--remove"]
20 | - id: mixed-line-ending
21 | args: ["--fix=lf"]
22 | - repo: https://github.com/codespell-project/codespell
23 | rev: v2.2.1
24 | hooks:
25 | - id: codespell
26 | - repo: https://github.com/executablebooks/mdformat
27 | rev: 0.7.9
28 | hooks:
29 | - id: mdformat
30 | args: ["--number"]
31 | additional_dependencies:
32 | - mdformat-openmmlab
33 | - mdformat_frontmatter
34 | - linkify-it-py
35 | exclude: 'docs/zh_cn/user_guides/sequence_parallel.md'
36 | - repo: https://github.com/myint/docformatter
37 | rev: v1.3.1
38 | hooks:
39 | - id: docformatter
40 | args: ["--in-place", "--wrap-descriptions", "119"]
41 | - repo: https://github.com/open-mmlab/pre-commit-hooks
42 | rev: v0.4.0
43 | hooks:
44 | - id: check-copyright
45 | args: ["xtuner", "--excludes", "xtuner/_lite/modelings/", "xtuner/model/transformers_models/"]
46 | - id: remove-improper-eol-in-cn-docs
47 | - repo: https://github.com/asottile/pyupgrade
48 | rev: v3.0.0
49 | hooks:
50 | - id: pyupgrade
51 | args: ["--py36-plus"]
52 |
53 | - repo: https://github.com/psf/black
54 | rev: 23.9.1
55 | hooks:
56 | - id: black
57 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include xtuner/configs *.py *.yml *.json
2 | recursive-include xtuner/tools *.sh *.py
3 |
--------------------------------------------------------------------------------
/docs/en/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.8"
7 |
8 | formats:
9 | - epub
10 |
11 | python:
12 | install:
13 | - requirements: requirements/docs.txt
14 |
15 | sphinx:
16 | configuration: docs/en/conf.py
17 |
--------------------------------------------------------------------------------
/docs/en/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/en/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 | background-image: url("../image/logo.png");
3 | background-size: 177px 40px;
4 | height: 40px;
5 | width: 177px;
6 | }
7 |
--------------------------------------------------------------------------------
/docs/en/_static/image/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/en/_static/image/logo.png
--------------------------------------------------------------------------------
/docs/en/acceleration/benchmark.rst:
--------------------------------------------------------------------------------
1 | Benchmark
2 | =========
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/deepspeed.rst:
--------------------------------------------------------------------------------
1 | DeepSpeed
2 | =========
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/flash_attn.rst:
--------------------------------------------------------------------------------
1 | Flash Attention
2 | ===============
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/hyper_parameters.rst:
--------------------------------------------------------------------------------
1 | HyperParameters
2 | ===============
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/length_grouped_sampler.rst:
--------------------------------------------------------------------------------
1 | Length Grouped Sampler
2 | ======================
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/pack_to_max_length.rst:
--------------------------------------------------------------------------------
1 | Pack to Max Length
2 | ==================
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/train_extreme_long_sequence.rst:
--------------------------------------------------------------------------------
1 | Train Extreme Long Sequence
2 | ===========================
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/train_large_scale_dataset.rst:
--------------------------------------------------------------------------------
1 | Train Large-scale Dataset
2 | =========================
3 |
--------------------------------------------------------------------------------
/docs/en/acceleration/varlen_flash_attn.rst:
--------------------------------------------------------------------------------
1 | Varlen Flash Attention
2 | ======================
3 |
--------------------------------------------------------------------------------
/docs/en/chat/agent.md:
--------------------------------------------------------------------------------
1 | # Chat with Agent
2 |
--------------------------------------------------------------------------------
/docs/en/chat/llm.md:
--------------------------------------------------------------------------------
1 | # Chat with LLM
2 |
--------------------------------------------------------------------------------
/docs/en/chat/lmdeploy.md:
--------------------------------------------------------------------------------
1 | # Accelerate chat by LMDeploy
2 |
--------------------------------------------------------------------------------
/docs/en/chat/vlm.md:
--------------------------------------------------------------------------------
1 | # Chat with VLM
2 |
--------------------------------------------------------------------------------
/docs/en/evaluation/hook.md:
--------------------------------------------------------------------------------
1 | # Evaluation during training
2 |
--------------------------------------------------------------------------------
/docs/en/evaluation/mmbench.md:
--------------------------------------------------------------------------------
1 | # MMBench (VLM)
2 |
--------------------------------------------------------------------------------
/docs/en/evaluation/mmlu.md:
--------------------------------------------------------------------------------
1 | # MMLU (LLM)
2 |
--------------------------------------------------------------------------------
/docs/en/evaluation/opencompass.md:
--------------------------------------------------------------------------------
1 | # Evaluate with OpenCompass
2 |
--------------------------------------------------------------------------------
/docs/en/get_started/installation.md:
--------------------------------------------------------------------------------
1 | ### Installation
2 |
3 | In this section, we will show you how to install XTuner.
4 |
5 | ## Installation Process
6 |
7 | We recommend users to follow our best practices for installing XTuner.
8 | It is recommended to use a conda virtual environment with Python-3.10 to install XTuner.
9 |
10 | ### Best Practices
11 |
12 | **Step 0.** Create a Python-3.10 virtual environment using conda.
13 |
14 | ```shell
15 | conda create --name xtuner-env python=3.10 -y
16 | conda activate xtuner-env
17 | ```
18 |
19 | **Step 1.** Install XTuner.
20 |
21 | Case a: Install XTuner via pip:
22 |
23 | ```shell
24 | pip install -U xtuner
25 | ```
26 |
27 | Case b: Install XTuner with DeepSpeed integration:
28 |
29 | ```shell
30 | pip install -U 'xtuner[deepspeed]'
31 | ```
32 |
33 | Case c: Install XTuner from the source code:
34 |
35 | ```shell
36 | git clone https://github.com/InternLM/xtuner.git
37 | cd xtuner
38 | pip install -e '.[all]'
39 | # "-e" indicates installing the project in editable mode, so any local modifications to the code will take effect without reinstalling.
40 | ```
41 |
42 | ## Verify the installation
43 |
44 | To verify if XTuner is installed correctly, we will use a command to print the configuration files.
45 |
46 | **Print Configuration Files:** Use the command `xtuner list-cfg` in the command line to verify if the configuration files can be printed.
47 |
48 | ```shell
49 | xtuner list-cfg
50 | ```
51 |
52 | You should see a list of XTuner configuration files, corresponding to the ones in [xtuner/configs](https://github.com/InternLM/xtuner/tree/main/xtuner/configs) in the source code.
53 |
--------------------------------------------------------------------------------
/docs/en/get_started/overview.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | This chapter introduces you to the framework and workflow of XTuner, and provides detailed tutorial links.
4 |
5 | ## What is XTuner
6 |
--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case1.rst:
--------------------------------------------------------------------------------
1 | Case 1
2 | ======
3 |
--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case2.rst:
--------------------------------------------------------------------------------
1 | Case 2
2 | ======
3 |
--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case3.rst:
--------------------------------------------------------------------------------
1 | Case 3
2 | ======
3 |
--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/Case4.rst:
--------------------------------------------------------------------------------
1 | Case 4
2 | ======
3 |
--------------------------------------------------------------------------------
/docs/en/internevo_migration/ftdp_dataset/ftdp.rst:
--------------------------------------------------------------------------------
1 | ftdp
2 | ====
3 |
--------------------------------------------------------------------------------
/docs/en/internevo_migration/internevo_migration.rst:
--------------------------------------------------------------------------------
1 | InternEVO Migration
2 | ===================
3 |
--------------------------------------------------------------------------------
/docs/en/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/en/models/supported.md:
--------------------------------------------------------------------------------
1 | # Supported Models
2 |
--------------------------------------------------------------------------------
/docs/en/notes/changelog.md:
--------------------------------------------------------------------------------
1 |
14 |
15 | # Changelog
16 |
17 | ## v0.1.0 (2023.08.30)
18 |
19 | XTuner is released! 🔥🔥🔥
20 |
21 | ### Highlights
22 |
23 | - XTuner supports LLM fine-tuning on consumer-grade GPUs. The minimum GPU memory required for 7B LLM fine-tuning is only **8GB**.
24 | - XTuner supports various LLMs, datasets, algorithms and training pipelines.
25 | - Several fine-tuned adapters are released simultaneously, including various gameplays such as the colorist LLM, plugins-based LLM, and many more. For further details, please visit [XTuner on HuggingFace](https://huggingface.co/xtuner)!
26 |
--------------------------------------------------------------------------------
/docs/en/preparation/pretrained_model.rst:
--------------------------------------------------------------------------------
1 | Pretrained Model
2 | ================
3 |
--------------------------------------------------------------------------------
/docs/en/preparation/prompt_template.rst:
--------------------------------------------------------------------------------
1 | Prompt Template
2 | ===============
3 |
--------------------------------------------------------------------------------
/docs/en/switch_language.md:
--------------------------------------------------------------------------------
1 | ## English
2 |
3 | ## 简体中文
4 |
--------------------------------------------------------------------------------
/docs/en/training/custom_agent_dataset.rst:
--------------------------------------------------------------------------------
1 | Custom Agent Dataset
2 | ====================
3 |
--------------------------------------------------------------------------------
/docs/en/training/custom_pretrain_dataset.rst:
--------------------------------------------------------------------------------
1 | Custom Pretrain Dataset
2 | =======================
3 |
--------------------------------------------------------------------------------
/docs/en/training/custom_sft_dataset.rst:
--------------------------------------------------------------------------------
1 | Custom SFT Dataset
2 | ==================
3 |
--------------------------------------------------------------------------------
/docs/en/training/modify_settings.rst:
--------------------------------------------------------------------------------
1 | Modify Settings
2 | ===============
3 |
--------------------------------------------------------------------------------
/docs/en/training/multi_modal_dataset.rst:
--------------------------------------------------------------------------------
1 | Multi-modal Dataset
2 | ===================
3 |
--------------------------------------------------------------------------------
/docs/en/training/open_source_dataset.rst:
--------------------------------------------------------------------------------
1 | Open Source Datasets
2 | ====================
3 |
--------------------------------------------------------------------------------
/docs/en/training/visualization.rst:
--------------------------------------------------------------------------------
1 | Visualization
2 | =============
3 |
--------------------------------------------------------------------------------
/docs/zh_cn/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.8"
7 |
8 | formats:
9 | - epub
10 |
11 | python:
12 | install:
13 | - requirements: requirements/docs.txt
14 |
15 | sphinx:
16 | configuration: docs/zh_cn/conf.py
17 |
--------------------------------------------------------------------------------
/docs/zh_cn/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/zh_cn/_static/image/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/_static/image/logo.png
--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/flash_attn.rst:
--------------------------------------------------------------------------------
1 | .. _flash_attn:
2 |
3 | Flash Attention
4 | ==================================================
5 |
6 | Flash Attention (Flash Attention 2) 是一种用于加速 Transformer 模型中 Attention 计算,并减少其显存消耗的算法。XTuner 中 Flash Attention (Flash Attention 2) 的支持情况如下表所示:
7 |
8 | .. list-table::
9 | :widths: 25 50
10 | :header-rows: 1
11 |
12 | * - 模型
13 | - Flash Attention 支持情况
14 | * - baichuan 1/2
15 | - ❌
16 | * - chatglm 2/3
17 | - ❌
18 | * - deepseek
19 | - ✅
20 | * - gemma
21 | - ❌
22 | * - internlm 1/2
23 | - ✅
24 | * - llama 2
25 | - ✅
26 | * - mistral
27 | - ✅
28 | * - qwen 1/1.5
29 | - ✅
30 | * - starcoder
31 | - ✅
32 | * - yi
33 | - ✅
34 | * - zephyr
35 | - ✅
36 |
37 | .. note::
38 | XTuner 会根据运行环境自动控制 Flash Attention 的使用情况 (见 `dispatch_modules `_):
39 |
40 | .. list-table::
41 | :widths: 50 50
42 | :header-rows: 1
43 |
44 | * - 环境
45 | - Flash Attention 使用情况
46 | * - 安装 `flash attn `_
47 | - Flash Attention 2
48 | * - 未安装 `flash attn `_ 且 PyTorch Version <= 1.13
49 | - No Flash Attention
50 | * - 未安装 `flash attn `_ 且 2.0 <= PyTorch Version <= 2.1
51 | - Flash Attention 1
52 | * - 未安装 `flash attn `_ 且 PyTorch Version >= 2.2
53 | - Flash Attention 2
54 |
55 | .. note::
56 | 使用 XTuner 训练 QWen1/1.5 时若想使用 Flash Attention 加速,需要先安装 `flash attn `_ (参考 `flash attn 安装 `_,需要 cuda )
57 |
--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/hyper_parameters.rst:
--------------------------------------------------------------------------------
1 | =====================
2 | 调整加速策略
3 | =====================
4 |
5 | 本节将会列举 XTuner 中会影响训练速度的配置项。
6 |
7 |
8 | max_length
9 | -------------------
10 |
11 | ``max_length`` 表示在数据预处理过程中,单条数据长度超过 ``max_length`` 的部分会被截断,基本所有实验都会设置该项。
12 |
13 | pack_to_max_length
14 | ---------------------------
15 |
16 | ``pack_to_max_length`` 用于配置是否进行\ :ref:`数据集拼接 ` \ 。
17 |
18 | ``pack_to_max_length = True`` 表示在数据预处理过程中将多条短数据拼接为一条长度为 ``max_length`` 的长数据,该配置可以大幅提升训练速度。
19 |
20 | 若 ``pack_to_max_length = False``,则推荐将 ``batch_size`` 适度调大以保证训练的稳定性。
21 |
22 | use_varlen_attn
23 | ---------------------------
24 |
25 | ``use_varlen_attn`` 用于配置是否在训练过程中使用\ :ref:`Varlen Flash Attention ` \ 。
26 |
27 | 当 ``use_varlen_attn = True`` 时,要求 ``pack_to_max_length`` 也要设置为 True。在此情况下,每个 token 在注意力计算阶段仅会关注其所在短数据中的所有 tokens (而非整个序列)。
28 |
29 | 当 ``use_varlen_attn = False`` 时,每个 token 在注意力计算阶段会关注整个序列。
30 |
31 | max_position_embeddings
32 | ---------------------------------
33 |
34 | 当需要扩展模型上下文窗口的大小时,需要将 ``max_position_embeddings`` 设置为期望的上下文长度。 **需要保证 max_position_embeddings 不大于 max_length。**\
35 |
36 | 假设需要将 Llama2-7B 模型支持的上下文长度自 4k 拓展为 32k:
37 |
38 | 1. 若训练数据集中存在较多长度接近 32k 的数据,则推荐 ``max_length = 32k, pack_to_max_length = False, use_varlen_attn = False, max_position_embeddings = 32k`` 这一配置
39 | 2. 若训练数据集中长度接近 32k 的数据量较少甚至没有时,则推荐 ``max_length = 32k, pack_to_max_length = True, use_varlen_attn = False, max_position_embeddings = 32k`` 这一配置
40 |
41 | sequence_parallel_size
42 | -------------------------------------------
43 |
44 | 在使用序列并行策略训练超长序列时, ``sequence_parallel_size`` 个 GPUs 会共同计算一条长序列。而 ``accumulative_counts`` 则用于控制模型参数更新的频率。
45 |
46 |
47 | accumulative_counts
48 | ----------------------------------------------
49 | 用于控制模型参数更新的频率;假设需要在 N 块 GPUs 上执行 ``batch_size_per_device = 1, max_length = 128k`` 的训练策略。当设置序列并行维度为 ``sequence_parallel_size`` 后,为了保证训练的等价性, ``accumulative_counts`` 需要设置为原来的 ``sequence_parallel_size`` 倍,因为 128k 长度的序列会被切分为 ``sequence_parallel_size`` 份后分发给 ``sequence_parallel_size`` 个 GPUs 进行训练, ``data_parallel_world_size`` 会变为原来的 :math:`\frac{1}{sequence\_parallel\_size}`。
50 |
--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/length_grouped_sampler.rst:
--------------------------------------------------------------------------------
1 | .. _length_grouped_sampler:
2 |
3 | 数据分组
4 | ========================
5 |
6 | .. raw:: html
7 |
8 |
9 |

10 |
11 |
12 | 生成式大模型(例如LLM)的训练数据往往是不定长的,这就导致同一批次(batch)内的数据长短不一。为实现并行化训练,一种常见的做法是将同一批次的数据填充到最长长度。然而,这一填充(Pad)操作会导致训练的低效。如上图,假设数据内各样本的长度分别为
13 | 2、3、7、9,期望分为2个批次进行训练,那么如果使用默认的随机采样器(左侧),数据处理阶段会引入过多的填充数据,实际效率只有65.6%。
14 |
15 | 现阶段有两种技术方案可以解决 / 缓解这一问题(两者选其一即可,优先考虑
16 | **数据拼接技术**\ ):
17 |
18 | 1. 利用
19 | **数据拼接技术**\ ,将多条数据拼接至训练支持的最大长度。这一做法可以确保同一批次内的数据长度完全一致,进而避免了填充数据所导致的训练效率降低。具体可参考
20 | \ :ref:`数据拼接文档 ` \ 。
21 |
22 | :优点: 可以合并多个数据样本,显著降低训练 iter 数,加速效果好。
23 |
24 | :缺点: 随机合并的多个数据样本间会互相影响,进而影响训练效果(实际影响程度未知);数据进行了合并,丢失了一定数据随机性。
25 |
26 | 2. (本文)利用
27 | **基于数据长度分组的采样器**\ ,在构建批次数据时,基于实际长度进行排序,确保同一批次内的数据长度尽可能相近,进而尽可能减少填充的长度。如上图右侧,利用该采样器后,同样的数据效率将提升至87.5%。
28 |
29 | :优点: 每条数据依然独立存在(独立计算
30 | attention),避免数据拼接技术导致的数据样本间的互相影响;数据进行了分组,丢失了一定数据随机性。
31 |
32 | :缺点: 在数据样本长度比较一致的情况下,加速效果一般。
33 |
34 | 使用 ``LengthGroupedSampler``
35 | -----------------------------------------
36 |
37 | XTuner 中基于数据长度分组的采样器 的实现在
38 | `这里 `__\ 。用户可以通过在配置文件中修改
39 | ``train_dataloader`` 的 ``sampler`` 参数进行配置。以
40 | `internlm2_chat_7b_qlora_oasst1_512_e3 `__
41 | 配置文件为例,其默认是使用随机的采样器,我们可以通过下列修改使其使用
42 | 基于数据长度分组的采样器:
43 |
44 | .. code:: diff
45 |
46 | - from mmengine.dataset import DefaultSampler
47 | + from xtuner.dataset.samplers import LengthGroupedSampler
48 |
49 | batch_size = 16 # per_device
50 | accumulative_counts = 1
51 |
52 | train_dataloader = dict(
53 | batch_size=batch_size,
54 | num_workers=dataloader_num_workers,
55 | dataset=train_dataset,
56 | - sampler=dict(type=DefaultSampler, shuffle=True),
57 | + sampler=dict(
58 | + type=LengthGroupedSampler,
59 | + length_property='length',
60 | + per_device_batch_size=batch_size * accumulative_counts),
61 | collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn))
62 |
63 | .. note::
64 | 其中,\ ``length_property``
65 | 需要传入获取数据集长度的“属性”,这一数值在通过 ``process_hf_dataset``
66 | 构建数据集时会自动设置为
67 | ``'length'``\ (因此,如果使用自定义的数据类,请确保这一属性的正确设置)。
68 |
--------------------------------------------------------------------------------
/docs/zh_cn/acceleration/pack_to_max_length.rst:
--------------------------------------------------------------------------------
1 | .. _pack_to_max_length:
2 |
3 | 数据拼接
4 | =========================
5 |
6 | 简介
7 | ---------
8 |
9 | 对于大型语言模型(LLM)的输入而言,“数据集拼接” 这一概念指的是将多个 token 序列拼接成一个单独的输入。大量的数据集都存在一个特点,即其长度分布严重偏向较短的序列,而 Transformers 模型接收固定长度的输入。因此,在模型训练过程中,通常需要将每条数据 "Pad" 至当前 batch 最长序列的长度,而 "Pad Token" 往往是某个特定的无意义的 token。
10 |
11 | 将多条数据打包在一起可以不再需要使用 "Pad Token" 进行无意义的填充,减少计算资源的浪费,同时还可以保持模型作为具有固定大小输入的静态图表示的优点。
12 |
13 | 下表展示了 InternLM2 7B 模型在 Alpaca 数据集上使用不同数据集拼接策略进行训练的速度对比,如表所示,“数据集拼接”会大幅度提升训练效率:
14 |
15 | .. list-table::
16 | :widths: 25 25 15
17 | :header-rows: 1
18 |
19 | * - 拼接策略
20 | - 每秒处理 token 数
21 | - 加速比
22 | * - 不使用
23 | - 362.9
24 | -
25 | * - 拼接至 2k
26 | - 2677.1
27 | - 7.38x
28 | * - 拼接至 4k
29 | - 3124.3
30 | - 8.61x
31 | * - 拼接至 8k
32 | - 3173.9
33 | - 8.76x
34 | * - 拼接至 16k
35 | - 2864.4
36 | - 7.89x
37 | * - 拼接至 32k
38 | - 2965.4
39 | - 8.17x
40 |
41 | 使用数据拼接
42 | ---------------------------
43 |
44 | XTuner 中提供的 config 文件中默认使用了“数据集拼接”这一功能,可以通过设置 ``max_length`` 字段来调整数据拼接长度。例如可通过以下方式将拼接长度调整为 32k :
45 |
46 | .. code-block:: diff
47 |
48 | #######################################################################
49 | # PART 1 Settings #
50 | #######################################################################
51 | - max_length = 2048
52 | + max_length = 32768
53 | pack_to_max_length = True
54 |
55 | #######################################################################
56 | # PART 3 Dataset & Dataloader #
57 | #######################################################################
58 | train_dataset = dict(
59 | max_length=max_length,
60 | pack_to_max_length=pack_to_max_length,
61 | ...)
62 |
63 | .. tip::
64 | 若不想使用数据拼接,在 config 中将 ``pack_to_max_length`` 设为 False 即可,
65 | 此时 config 中的 ``max_length`` 字段表示单条数据最长的 token 数,整个 batch 会被 pad 成当前 batch 内最长的一条数据的长度。
66 |
67 | .. tip::
68 | 在不使用数据拼接策略时,XTuner 还提供了一种数据集采样策略 (``LengthGroupedSampler``),可以保证在一个 batch 中的数据长度尽可能接近,
69 | 以减少 Pad 对计算资源的浪费。详细用法请参考
70 | \ :ref:`LengthGroupedSampler 文档 ` \ 。
71 |
--------------------------------------------------------------------------------
/docs/zh_cn/chat/agent.md:
--------------------------------------------------------------------------------
1 | # 智能体模型对话
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/chat/llm.md:
--------------------------------------------------------------------------------
1 | # 语言模型对话
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/chat/lmdeploy.md:
--------------------------------------------------------------------------------
1 | # 使用 LMDeploy 优化推理速度
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/chat/vlm.md:
--------------------------------------------------------------------------------
1 | # 视觉-语言模型对话
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/dpo/overview.md:
--------------------------------------------------------------------------------
1 | ## DPO 介绍
2 |
3 | ### 简介
4 |
5 | DPO(Direct Preference Optimization,直接偏好优化)是一种在大语言模型训练中用于直接优化模型偏好的方法。与传统的强化学习方法不同,DPO 直接使用人类偏好数据进行模型优化,从而提高生成内容的质量,使其更符合人类偏好。DPO 利用人类偏好数据,直接对模型进行优化,省略了训练 Reward Model 的训练过程,与 PPO 相比进一步省去了 Critic Model,不但避免了复杂的强化学习算法,减少了训练开销,同时还提高了训练效率。
6 |
7 | DPO 拥有大量的衍生算法,它们对 DPO 的损失函数进行了一定程度上的改进,我们在 XTuner 中除了 DPO 还实现了[Identity Preference Optimisation (IPO)](https://huggingface.co/papers/2310.12036),[Kahneman-Tversky Optimisation (KTO)](https://github.com/ContextualAI/HALOs)等论文中的损失函数,如需使用这些算法,请参考[修改 DPO 配置](./modify_settings.md)章节。我们也提供了一些[示例配置](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/dpo)用于参考。
8 |
9 | 除了 DPO 之外,还出现了如 [ORPO](https://arxiv.org/abs/2403.07691) 等无需参考模型的对齐算法。ORPO 采用了对数比值(odds ratio)的概念来优化模型,通过在模型训练过程中惩罚那些被拒绝的样本,从而更有效地适应被选择的样本。ORPO 消除了对参考模型的依赖,使得训练过程更加简化且高效。XTuner 中 ORPO 的训练方式与 DPO 非常类似,我们提供了一些 ORPO 的[示例配置](https://github.com/InternLM/xtuner/tree/main/xtuner/configs/orpo),用户可以参考 DPO 的教程对配置进行修改。
10 |
11 | ### XTuner 中 DPO 训练的优势
12 |
13 | XTuner 中的 DPO 训练具备以下显著优势:
14 |
15 | 1. **支持最新的算法**:XTuner除了支持标准的 DPO 之外,还支持了大量的衍生算法,同时也支持ORPO等不依赖参考模型的高效算法。
16 |
17 | 2. **减少显存浪费**:由于偏好数据中的 chosen 和 rejected 数据通常存在长度上的差异,因此在训练数据的拼接时会存在填充(padding token),造成显存浪费。在 XTuner 中,基于 Flash Attention2 中的[变长注意力](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html)功能,我们在训练过程中通过将偏好数据打包到同一个序列中,显著减少了由于 padding token 带来的显存浪费。这不仅提高了显存的利用效率,还使得在相同硬件条件下可以训练更大的模型或处理更多的数据。
18 |
19 | 
20 |
21 | 3. **高效训练**:借助 XTuner 的 QLoRA 训练功能,参考模型能够被转化为移除LoRA适配器的语言模型,从而省去了参考模型权重的显存占用,大幅降低了 DPO 的训练开销。
22 |
23 | 4. **长文本训练**: 借助 XTuner 的序列并行功能,能够对长文本数据进行训练。
24 |
25 | ### 开始训练
26 |
27 | 请参阅[快速上手](./quick_start.md)来了解最基本的概念,若希望了解更多训练参数配置相关的内容,请参考[修改DPO配置](./modify_settings.md)章节。
28 |
--------------------------------------------------------------------------------
/docs/zh_cn/dpo/quick_start.md:
--------------------------------------------------------------------------------
1 | ## DPO 快速上手
2 |
3 | 在本章节中,我们将介绍如何使用 XTuner 训练 1.8B 的 DPO(Direct Preference Optimization)模型,以帮助您快速上手。
4 |
5 | ### 准备预训练模型权重
6 |
7 | 我们使用经过 SFT 的语言模型[InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft)作为 DPO 模型的初始化模型来进行偏好对齐。
8 |
9 | 在训练配置文件中设置`pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'`,则会在启动训练时自动下载模型文件。若您需要手动下载模型权重,那么请参考[准备预训练模型权重](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html)章节,其中详细说明了如何从 Huggingface 或者是 Modelscope 下载模型权重的方法。这里我们附上模型的 HuggingFace 链接与 ModelScope 链接:
10 |
11 | - HuggingFace 链接位于:https://huggingface.co/internlm/internlm2-chat-1_8b-sft
12 | - ModelScope 链接位于:https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary
13 |
14 | ### 准备训练数据
15 |
16 | 在本教程中使用 Huggingface 上的[mlabonne/orpo-dpo-mix-40k](https://huggingface.co/datasets/mlabonne/orpo-dpo-mix-40k)数据集作为演示,
17 |
18 | ```python
19 | train_dataset = dict(
20 | type=build_preference_dataset,
21 | dataset=dict(
22 | type=load_dataset,
23 | path='mlabonne/orpo-dpo-mix-40k'),
24 | dataset_map_fn=orpo_dpo_mix_40k_map_fn,
25 | is_dpo=True,
26 | is_reward=False,
27 | )
28 | ```
29 |
30 | 在配置文件中使用以上配置,即可自动下载并处理该数据集。如果您希望使用其他 Huggingface 上的开源数据集或是使用自定义的数据集,请参阅[偏好数据集](../reward_model/preference_data.md)章节。
31 |
32 | ### 准备配置文件
33 |
34 | XTuner 提供了多个开箱即用的配置文件,可以通过 `xtuner list-cfg` 查看。我们执行如下指令,以复制一个配置文件到当前目录。
35 |
36 | ```bash
37 | xtuner copy-cfg internlm2_chat_1_8b_dpo_full .
38 | ```
39 |
40 | 打开复制后的配置文件,如果您选择自动下载模型和数据集,则无需修改配置。若您希望填入您预先下载的模型路径和数据集路径,请修改配置中的`pretrained_model_name_or_path`以及`train_dataset`中`dataset`的`path`参数。
41 |
42 | 更多的训练参数配置,请参阅[修改DPO训练配置](./modify_settings.md)章节。
43 |
44 | ### 启动训练
45 |
46 | 在完成上述操作后,便可以使用下面的指令启动训练任务了。
47 |
48 | ```bash
49 | # 单机单卡
50 | xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py
51 | # 单机多卡
52 | NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py
53 | # slurm 集群
54 | srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_dpo_full_copy.py --launcher slurm
55 | ```
56 |
57 | ### 模型转换
58 |
59 | XTuner 已经集成好了将模型转换为 HuggingFace 格式的工具,我们只需要执行
60 |
61 | ```bash
62 | # 创建存放 hf 格式参数的目录
63 | mkdir work_dirs/internlm2_chat_1_8b_dpo_full_copy/iter_15230_hf
64 |
65 | # 转换格式
66 | xtuner convert pth_to_hf internlm2_chat_1_8b_dpo_full_copy.py \
67 | work_dirs/internlm2_chat_1_8b_dpo_full_copy.py/iter_15230.pth \
68 | work_dirs/internlm2_chat_1_8b_dpo_full_copy.py/iter_15230_hf
69 | ```
70 |
71 | 便能够将 XTuner 的 ckpt 转换为 Huggingface 格式的模型。
72 |
--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/hook.md:
--------------------------------------------------------------------------------
1 | # 训练过程中评测
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/mmbench.md:
--------------------------------------------------------------------------------
1 | # MMBench (VLM)
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/mmlu.md:
--------------------------------------------------------------------------------
1 | # MMLU (LLM)
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/evaluation/opencompass.md:
--------------------------------------------------------------------------------
1 | # 使用 OpenCompass 评测
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/get_started/installation.rst:
--------------------------------------------------------------------------------
1 | ==================================
2 | 安装
3 | ==================================
4 |
5 | 本节中,我们将演示如何安装 XTuner。
6 |
7 | 最佳实践
8 | ========
9 |
10 | 我们推荐用户参照我们的最佳实践安装 XTuner。
11 | 推荐使用 Python-3.10 的 conda 虚拟环境安装 XTuner。
12 |
13 | **步骤 0.** 使用 conda 先构建一个 Python-3.10 的虚拟环境
14 |
15 | .. code-block:: console
16 |
17 | $ conda create --name xtuner-env python=3.10 -y
18 | $ conda activate xtuner-env
19 |
20 | **步骤 1.** 安装 XTuner
21 |
22 | 方案a: 通过 pip 直接安装
23 |
24 | .. code-block:: console
25 |
26 | $ pip install -U 'xtuner[deepspeed]'
27 |
28 | 方案b: 从源码安装
29 |
30 | .. code-block:: console
31 |
32 | $ git clone https://github.com/InternLM/xtuner.git
33 | $ cd xtuner
34 | $ pip install -e '.[deepspeed]'
35 |
36 | .. note::
37 |
38 | "-e" 表示在可编辑模式下安装项目,因此对代码所做的任何本地修改都会生效
39 |
40 | 验证
41 | ========
42 |
43 | 为了验证 XTuner 是否安装正确,我们将使用命令打印配置文件。
44 |
45 | **打印配置文件:** 在命令行中使用 ``xtuner list-cfg`` 验证是否能打印配置文件列表。
46 |
47 | .. code-block:: console
48 |
49 | $ xtuner list-cfg
50 |
--------------------------------------------------------------------------------
/docs/zh_cn/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/zh_cn/models/supported.md:
--------------------------------------------------------------------------------
1 | # 已支持的模型
2 |
--------------------------------------------------------------------------------
/docs/zh_cn/notes/changelog.md:
--------------------------------------------------------------------------------
1 |
14 |
15 | # 变更日志
16 |
17 | ## v0.1.0 (2023.08.30)
18 |
19 | XTuner 正式发布!🔥🔥🔥
20 |
21 | ### 亮点
22 |
23 | - XTuner 支持使用消费级显卡微调大语言模型。微调 7B 大语言模型的最低显存开销仅为 **8GB**。
24 | - XTuner 支持多种大语言模型、数据集、微调算法和训练流程。
25 | - 众多微调好的 adapter 也同步发布,包括调色师、插件对话等多种玩法。更多信息,请访问 [HuggingFace 仓库](https://huggingface.co/xtuner)。
26 |
--------------------------------------------------------------------------------
/docs/zh_cn/reward_model/images/preference_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/reward_model/images/preference_data.png
--------------------------------------------------------------------------------
/docs/zh_cn/reward_model/images/sequence_parallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/reward_model/images/sequence_parallel.png
--------------------------------------------------------------------------------
/docs/zh_cn/reward_model/images/var_len_atten.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/xtuner/53f2429d8a4662c04a8a4a2dc5c941672f4d3bdd/docs/zh_cn/reward_model/images/var_len_atten.png
--------------------------------------------------------------------------------
/docs/zh_cn/switch_language.md:
--------------------------------------------------------------------------------
1 | ## English
2 |
3 | ## 简体中文
4 |
--------------------------------------------------------------------------------
/docs/zh_cn/training/visualization.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | 可视化训练过程
3 | ==============
4 |
5 | XTuner 支持通过 `MMEngine `__
6 | 使用 `TensorBoard `__
7 | 和 `Weights & Biases (WandB) `__
8 | 实验管理工具,只需在 config 中添加一行代码,就可以跟踪和可视化损失、显存占用等指标。
9 |
10 | TensorBoard
11 | ============
12 |
13 | 1. 设置 config 中的 ``visualizer`` 字段,并将 ``vis_backends`` 设置为 `TensorboardVisBackend `__\ :
14 |
15 | .. code:: diff
16 |
17 | # set visualizer
18 | - visualizer = None
19 | + from mmengine.visualization import Visualizer, TensorboardVisBackend
20 | + visualizer = dict(type=Visualizer, vis_backends=[dict(type=TensorboardVisBackend)])
21 |
22 | 2. 启动实验后,tensorboard 产生的相关文件会存在 ``vis_data`` 中,通过 tensorboard 命令可以启动进行实时可视化:
23 |
24 | |image1|
25 |
26 | .. code::
27 |
28 | tensorboard --logdir=$PATH_TO_VIS_DATA
29 |
30 | WandB
31 | ======
32 |
33 | 1. 使用 WandB 前需安装依赖库 ``wandb`` 并登录至 wandb。
34 |
35 | .. code:: console
36 |
37 | $ pip install wandb
38 | $ wandb login
39 |
40 | 2. 设置 config 中的 ``visualizer`` 字段,并将 ``vis_backends`` 设置为 `WandbVisBackend `__\ :
41 |
42 | .. code:: diff
43 |
44 | # set visualizer
45 | + from mmengine.visualization import Visualizer, WandbVisBackend
46 | - visualizer = None
47 | + visualizer = dict(type=Visualizer, vis_backends=[dict(type=WandbVisBackend)])
48 |
49 | .. tip::
50 | 可以点击 `WandbVisBackend
51 | API `__
52 | 查看 ``WandbVisBackend`` 可配置的参数。例如
53 | ``init_kwargs``\ ,该参数会传给
54 | `wandb.init `__ 方法。
55 |
56 | .. code:: diff
57 |
58 | # set visualizer
59 | - visualizer = None
60 | + from mmengine.visualization import Visualizer, WandbVisBackend
61 | + visualizer = dict(
62 | + type=Visualizer,
63 | + vis_backends=[
64 | + dict(type=WandbVisBackend, init_kwargs=dict(project='toy-example'))])
65 |
66 |
67 | 3. 启动实验后,可在 wandb 网页端 ``https://wandb.ai`` 上查看可视化结果:
68 |
69 | |image2|
70 |
71 |
72 | .. |image1| image:: https://github.com/InternLM/xtuner/assets/67539920/abacb28f-5afd-46d0-91b2-acdd20887969
73 | .. |image2| image:: https://github.com/InternLM/xtuner/assets/41630003/fc16387a-3c83-4015-9235-8ec811077953
74 |
--------------------------------------------------------------------------------
/docs/zh_cn/user_guides/ceph.md:
--------------------------------------------------------------------------------
1 | ## 功能说明
2 |
3 | ### 已支持的功能
4 |
5 | - 保存 DeepSpeed Checkpoint 至 CEPH
6 | - 从 Ceph 上的 DeepSpeed Checkpoint 续训
7 | - `pth_to_hf` 支持 Ceph 上的 DeepSpeed Checkpoint
8 |
9 | ### 暂不支持的功能
10 |
11 | - 训练时从 Ceph 加载 Huggingface 模型, 与 `zero3` 加载权重冲突
12 | - HuggingFace `save_pretrained` 保存至 Ceph, 逻辑过于复杂,没办法 patch
13 |
14 | ## 使用说明
15 |
16 | #### 1. 验证 ceph 环境
17 |
18 | 使用前需确保 `petrel sdk` 可用,并且要使用的 Ceph bucket 存在且可用
19 |
20 | 验证 `aws` 命令行工具
21 |
22 | ```bash
23 | # 验证 aws 命令行工具
24 | aws s3 ls $YOUR_BUCKET
25 | ```
26 |
27 | 验证 `petrel sdk`
28 |
29 | ```python
30 | bucket = 's3://xxx'
31 |
32 | from mmengine import get_file_backend
33 | backend = get_file_backend(bucket)
34 |
35 | for f in backend.list_dir_or_file(bucket):
36 | print(f)
37 | ```
38 |
39 | #### 2. 训练时保存 Checkpoint 至 Ceph
40 |
41 | `XTuner` 根据环境变量 `DS_CEPH_DIR` 来判断是否将 checkpoint 保存至 ceph
42 |
43 | ```bash
44 | DS_CEPH_DIR=s3://xxxx srun ${SRUN_ARGS} xtuner train $CONFIG --launcher slurm
45 | ```
46 |
47 | #### 3. 从 Ceph 上的 Checkpoint 续训
48 |
49 | Resume 时,要填写 checkpoint 在 ceph 上的完整路径
50 |
51 | ```bash
52 | DS_CEPH_DIR=s3://xxxx srun ${SRUN_ARGS} xtuner train $CONFIG --launcher slurm --resume s3://xxx/yyy/epoch_x.pth
53 | ```
54 |
55 | #### 4. 将 Ceph 上的 Checkpoint 转换为 HF 模型
56 |
57 | 不支持 `$HF_DIR` 为 ceph 路径
58 |
59 | 由于 Checkpoint 中存储了优化器状态,加载比较耗时,对于 ZeRO 1&2 可以直接加载 checkpoint 中的 `model_states.pt` 文件加速转换过程;ZeRO 3 必须先加载整个 checkpoint
60 |
61 | ```bash
62 | srun ${SRUN_ARGS} xtuner convert pth_to_hf $CONFIG s3://xxx/yyy/epoch_x.pth $HF_DIR
63 |
64 | ```
65 |
--------------------------------------------------------------------------------
/docs/zh_cn/user_guides/ftdp_dataset/README.md:
--------------------------------------------------------------------------------
1 | ftdp 是一个闭源的处理数据工具,开源社区用户可以忽略此文档。
2 |
3 | 本节介绍了常见的 4 种使用 ftdp 数据集训练的使用场景:
4 |
5 | - [Case 1: 使用 Processed 数据集训练 InternLM2](Case1.md)
6 | - [Case 2: 使用 Processed 数据集训练非 InternLM2 模型](Case2.md)
7 | - [Case 3: 使用 Processed 普通对话数据集训任意模型](Case3.md)
8 | - [Case 4: 使用 Tokenized 数据集训练 InternLM2](Case4.md)
9 |
10 | 请先参考下方流程图,选择自己的使用场景。
11 |
12 | ```mermaid
13 | graph TD;
14 | A{ftdp 数据}
15 | A -->|是| B{数据 tokenized}
16 | B -->|否| C{使用 Internlm2 对话模板}
17 | C -->|是| D{训练 Internlm2 }
18 | D -->|是| E[Case 1]
19 | D -->|否| F[Case 2]
20 | C -->|否| G{离线处理数据集}
21 | G -->|是| H[尚不支持]
22 | G -->|否| I[Case 3]
23 | B -->|是| J[Case 4]
24 | ```
25 |
--------------------------------------------------------------------------------
/docs/zh_cn/user_guides/llava_offline.md:
--------------------------------------------------------------------------------
1 | # 离线处理 Llava 训练数据集
2 |
3 | 当训练数据量非常大时,每次训练的时候都先在线处理数据可能会极为耗时。我们可以先对原始数据进行离线处理并保存至本地,随后的多次训练可以读入本地离线处理好的数据后直接开始训练。
4 |
5 | ## Step 1, 导出模板 config 文件
6 |
7 | 可使用以下命令查看 XTuner 中提供的 Llava 训练相关的 config:
8 |
9 | ```
10 | xtuner list-cfg -p llava
11 | ```
12 |
13 | 找到需要使用的 config 文件并导出至当前目录下:
14 |
15 | ```
16 | xtuner copy-cfg ${CONFIG_NAME} .
17 | ```
18 |
19 | ## Step 2, 离线处理数据集
20 |
21 | 使用以下命令可离线处理训练数据集中的文本数据:
22 |
23 | ```
24 | python xtuner/tools/process_untokenized_llava_data.py \
25 | ${CONFIG_PATH} \
26 | --save-folder /folder/to/save/processed/dataset
27 | ```
28 |
29 | 其中,${CONFIG_PATH} 为第一步中导出的 config 文件路径,`/folder/to/save/processed/dataset` 则需要指定为离线处理数据的保存路径。
30 |
31 | ## Step 3, 修改 config 文件
32 |
33 | 对 Step 1 中导出的 config 文件做如下修改:
34 |
35 | ```diff
36 | #######################################################################
37 | # PART 3 Dataset & Dataloader #
38 | #######################################################################
39 | llava_dataset = dict(
40 | - data_path=data_path,
41 | - tokenizer=tokenizer,
42 | + offline_processed_text_folder=/folder/to/save/processed/dataset
43 | ...)
44 | ```
45 |
46 | 其中,`/folder/to/save/processed/dataset` 为 Step 2 保存的离线处理数据路径。
47 |
48 | ## Step 4,开始训练
49 |
50 | 使用 Step 3 修改得到的 config 训练即可。
51 |
--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_1/data.json:
--------------------------------------------------------------------------------
1 | [{
2 | "messages":[
3 | {
4 | "toy_system": "You are a helpful AI assistant.",
5 | "toy_input": "Give three tips for staying healthy.",
6 | "toy_output": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
7 | },
8 | {
9 | "toy_input": "How to study English?",
10 | "toy_output": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
11 | }
12 | ]
13 | },
14 | {
15 | "messages":[
16 | {
17 | "toy_system": "You are a helpful AI assistant.",
18 | "toy_input": "How to study English?",
19 | "toy_output": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
20 | },
21 | {
22 | "toy_input": "Give three tips for staying healthy.",
23 | "toy_output": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
24 | }
25 | ]
26 | }]
27 |
--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_1/map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def multi_turn_1_map_fn(example):
3 | messages = example["messages"]
4 | conversation = []
5 | for msg in messages:
6 | conversation.append(
7 | {
8 | "system": msg["toy_system"],
9 | "input": msg["toy_input"],
10 | "output": msg["toy_output"],
11 | }
12 | )
13 | return {"conversation": conversation}
14 |
--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_2/data.json:
--------------------------------------------------------------------------------
1 | [{
2 | "messages":[
3 | {
4 | "role": "system",
5 | "content": "You are a helpful AI assistant."
6 | },
7 | {
8 | "role": "user",
9 | "content": "Give three tips for staying healthy."
10 | },
11 | {
12 | "role": "assistant",
13 | "content": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
14 | },
15 | {
16 | "role": "user",
17 | "content": "How to study English?"
18 | },
19 | {
20 | "role": "assistant",
21 | "content": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
22 | }
23 | ]
24 | },
25 | {
26 | "messages":[
27 | {
28 | "role": "system",
29 | "content": "You are a helpful AI assistant."
30 | },
31 | {
32 | "role": "user",
33 | "content": "How to study English?"
34 | },
35 | {
36 | "role": "assistant",
37 | "content": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
38 | },
39 | {
40 | "role": "user",
41 | "content": "Give three tips for staying healthy."
42 | },
43 | {
44 | "role": "assistant",
45 | "content": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
46 | }
47 | ]
48 | }]
49 |
--------------------------------------------------------------------------------
/examples/demo_data/multi_turn_2/map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def multi_turn_2_map_fn(example):
3 | messages = example["messages"]
4 | system = ""
5 | input = ""
6 | conversation = []
7 | while messages and messages[0]["role"] == "assistant":
8 | # Skip the first one if it is from assistant
9 | messages = messages[1:]
10 | for msg in messages:
11 | if msg["role"] == "system":
12 | system = msg["content"]
13 | elif msg["role"] == "user":
14 | input += msg["content"]
15 | elif msg["role"] == "assistant":
16 | conversation.append(
17 | {"system": system, "input": input, "output": msg["content"]}
18 | )
19 | system = ""
20 | input = ""
21 | else:
22 | raise NotImplementedError
23 | return {"conversation": conversation}
24 |
--------------------------------------------------------------------------------
/examples/demo_data/pretrain/data.json:
--------------------------------------------------------------------------------
1 | [{
2 | "toy_text": "I am an artificial intelligence (AI) assistant named InternLM. I was created by the Shanghai AI Laboratory and my purpose is to assist users with various tasks through natural language processing technology."
3 | },
4 | {
5 | "toy_text": "I am an artificial intelligence programmed to assist with various types of tasks, including answering questions, providing information, and performing automated processes."
6 | }]
7 |
--------------------------------------------------------------------------------
/examples/demo_data/pretrain/map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def pretrain_map_fn(example):
3 | return {"conversation": [{"input": "", "output": example["toy_text"].strip()}]}
4 |
--------------------------------------------------------------------------------
/examples/demo_data/single_turn/data.json:
--------------------------------------------------------------------------------
1 | [{
2 | "toy_system": "You are a helpful AI assistant.",
3 | "toy_input": "Give three tips for staying healthy.",
4 | "toy_output": "1.Eat a balanced diet. 2. Exercise regularly. 3. Get enough sleep."
5 | },
6 | {
7 | "toy_system": "You are a helpful AI assistant.",
8 | "toy_input": "How to study English?",
9 | "toy_output": "1. Set clear goals. 2. Create a study plan. 3. Build vocabulary. 4. Practice speaking."
10 | }]
11 |
--------------------------------------------------------------------------------
/examples/demo_data/single_turn/map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def single_turn_map_fn(example):
3 | return {
4 | "conversation": [
5 | {
6 | "system": example["toy_system"],
7 | "input": example["toy_input"],
8 | "output": example["toy_output"],
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/examples/huggingface_trainer/README.md:
--------------------------------------------------------------------------------
1 | # How to use XTuner in HuggingFace training pipeline
2 |
3 | ## Quick run
4 |
5 | 1. step in `examples`
6 |
7 | ```shell
8 | cd ./examples
9 | ```
10 |
11 | 2. run training scripts
12 |
13 | ```shell
14 | # qlora-training internlm-7b with alpaca dataset
15 | python train_qlora_hf.py --model_name_or_path internlm/internlm-7b --dataset_name_or_path tatsu-lab/alpaca
16 | ```
17 |
18 | `--model_name_or_path`: specify the model name or path to train.
19 |
20 | `--dataset_name_or_path`: specify the dataset name or path to use.
21 |
22 | ## How to customize your experiment
23 |
24 | XTuner APIs are compatible with the usage of HuggingFace's transformers.
25 | If you want to customize your experiment, you just need to pass in your hyperparameters like HuggingFace.
26 |
27 | ```
28 | # training example
29 | python train_qlora_hf.py \
30 | # custom training args
31 | --model_name_or_path internlm/internlm-7b \
32 | --dataset_name_or_path tatsu-lab/alpaca \
33 | # HuggingFace's default training args
34 | --do_train = True
35 | --per_device_train_batch_size = 1
36 | --learning_rate = 2e-5
37 | --save_strategy = 'epoch'
38 | --lr_scheduler_type = 'cosine'
39 | --logging_steps = 1
40 | ```
41 |
--------------------------------------------------------------------------------
/examples/huggingface_trainer/train_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import transformers
3 | from transformers import Trainer
4 |
5 | from xtuner.apis import DefaultTrainingArguments, build_model
6 | from xtuner.apis.datasets import alpaca_data_collator, alpaca_dataset
7 |
8 |
9 | def train():
10 | # get DefaultTrainingArguments and to be updated with passed args
11 | parser = transformers.HfArgumentParser(DefaultTrainingArguments)
12 | training_args = parser.parse_args_into_dataclasses()[0]
13 |
14 | # init model and dataset
15 | model, tokenizer = build_model(
16 | model_name_or_path=training_args.model_name_or_path, return_tokenizer=True
17 | )
18 | train_dataset = alpaca_dataset(
19 | tokenizer=tokenizer, path=training_args.dataset_name_or_path
20 | )
21 | data_collator = alpaca_data_collator(return_hf_format=True)
22 |
23 | # build trainer
24 | trainer = Trainer(
25 | model=model,
26 | args=training_args,
27 | train_dataset=train_dataset,
28 | data_collator=data_collator,
29 | )
30 |
31 | # training
32 | trainer.train()
33 |
34 | trainer.save_state()
35 | trainer.save_model(output_dir=training_args.output_dir)
36 |
37 |
38 | if __name__ == "__main__":
39 | train()
40 |
--------------------------------------------------------------------------------
/examples/huggingface_trainer/train_lora_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import transformers
3 | from transformers import Trainer
4 |
5 | from xtuner.apis import DefaultTrainingArguments, build_lora_model
6 | from xtuner.apis.datasets import alpaca_data_collator, alpaca_dataset
7 |
8 |
9 | def train():
10 | # get DefaultTrainingArguments and to be updated with passed args
11 | parser = transformers.HfArgumentParser(DefaultTrainingArguments)
12 | training_args = parser.parse_args_into_dataclasses()[0]
13 |
14 | # init model and dataset
15 | model, tokenizer = build_lora_model(
16 | model_name_or_path=training_args.model_name_or_path, return_tokenizer=True
17 | )
18 | train_dataset = alpaca_dataset(
19 | tokenizer=tokenizer, path=training_args.dataset_name_or_path
20 | )
21 | data_collator = alpaca_data_collator(return_hf_format=True)
22 |
23 | # build trainer
24 | trainer = Trainer(
25 | model=model,
26 | args=training_args,
27 | train_dataset=train_dataset,
28 | data_collator=data_collator,
29 | )
30 |
31 | # training
32 | trainer.train()
33 |
34 | trainer.save_state()
35 | trainer.save_model(output_dir=training_args.output_dir)
36 |
37 |
38 | if __name__ == "__main__":
39 | train()
40 |
--------------------------------------------------------------------------------
/examples/huggingface_trainer/train_qlora_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import transformers
3 | from transformers import Trainer
4 |
5 | from xtuner.apis import DefaultTrainingArguments, build_qlora_model
6 | from xtuner.apis.datasets import alpaca_data_collator, alpaca_dataset
7 |
8 |
9 | def train():
10 | # get DefaultTrainingArguments and to be updated with passed args
11 | parser = transformers.HfArgumentParser(DefaultTrainingArguments)
12 | training_args = parser.parse_args_into_dataclasses()[0]
13 |
14 | # init model and dataset
15 | model, tokenizer = build_qlora_model(
16 | model_name_or_path=training_args.model_name_or_path, return_tokenizer=True
17 | )
18 | train_dataset = alpaca_dataset(
19 | tokenizer=tokenizer, path=training_args.dataset_name_or_path
20 | )
21 | data_collator = alpaca_data_collator(return_hf_format=True)
22 |
23 | # build trainer
24 | trainer = Trainer(
25 | model=model,
26 | args=training_args,
27 | train_dataset=train_dataset,
28 | data_collator=data_collator,
29 | )
30 |
31 | # training
32 | trainer.train()
33 |
34 | trainer.save_state()
35 | trainer.save_model(output_dir=training_args.output_dir)
36 |
37 |
38 | if __name__ == "__main__":
39 | train()
40 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/runtime.txt
2 | -r requirements/deepspeed.txt
3 | -r requirements/modelscope.txt
4 |
--------------------------------------------------------------------------------
/requirements/deepspeed.txt:
--------------------------------------------------------------------------------
1 | deepspeed==0.16.2
2 | mpi4py-mpich
3 |
--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | docutils
2 | myst-parser==2.0.0
3 | sphinx==6.2.1
4 | sphinx-argparse
5 | sphinx-book-theme==1.0.1
6 | sphinx-copybutton==0.5.2
7 | sphinx_markdown_tables
8 |
--------------------------------------------------------------------------------
/requirements/lmdeploy.txt:
--------------------------------------------------------------------------------
1 | lmdeploy>=0.6.2 --no-deps
2 |
--------------------------------------------------------------------------------
/requirements/modelscope.txt:
--------------------------------------------------------------------------------
1 | modelscope
2 |
--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
1 | bitsandbytes==0.45.0
2 | datasets>=3.2.0
3 | einops
4 | loguru
5 | mmengine==0.10.6
6 | openpyxl
7 | peft>=0.14.0
8 | scikit-image
9 | scipy
10 | SentencePiece
11 | tiktoken
12 | torch
13 | torchvision
14 | transformers==4.48.0
15 | transformers_stream_generator
16 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [codespell]
2 | ignore-words-list = nd, ba, warmup, ans
3 |
4 | [flake8]
5 | max-line-length = 119
6 | ignore = D107,D202,D203,D401,E203,W503
7 | inline-quotes = double
8 |
9 | [black]
10 | line-length = 119
11 |
12 | [isort]
13 | profile = black
14 |
15 |
16 |
--------------------------------------------------------------------------------
/xtuner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os
3 |
4 | from mmengine.utils import digit_version
5 |
6 | from .entry_point import cli
7 | from .version import __version__, version_info
8 |
9 | HF_CEPH_HUB = os.getenv("HF_CEPH_HUB", "")
10 | HF_USE_CEPH = os.getenv("HF_USE_CEPH", 0) or HF_CEPH_HUB != ""
11 | DS_CEPH_DIR = os.getenv("DS_CEPH_DIR", None)
12 | if HF_USE_CEPH:
13 | from .utils.fileio import patch_hf_auto_from_pretrained, patch_hf_save_pretrained
14 |
15 | patch_hf_auto_from_pretrained(HF_CEPH_HUB)
16 | patch_hf_save_pretrained()
17 |
18 | if DS_CEPH_DIR:
19 | from .utils.fileio import patch_deepspeed_engine
20 |
21 | patch_deepspeed_engine()
22 |
23 | __all__ = [
24 | "__version__",
25 | "version_info",
26 | "digit_version",
27 | "cli",
28 | "HF_USE_CEPH",
29 | "DS_CEPH_DIR",
30 | ]
31 |
--------------------------------------------------------------------------------
/xtuner/_lite/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os
3 | import subprocess
4 | import sys
5 |
6 | from loguru import logger
7 |
8 | from .device import get_device, get_torch_device_module
9 |
10 | _LOGGER = None
11 |
12 |
13 | def log_format(debug=False):
14 | formatter = "[XTuner][{time:YYYY-MM-DD HH:mm:ss}][{level}]"
15 |
16 | if debug:
17 | formatter += "[{name}:"
18 | formatter += "{function}:"
19 | formatter += "{line}]"
20 |
21 | formatter += " {message}"
22 | return formatter
23 |
24 |
25 | def get_logger(level="INFO"):
26 | global _LOGGER
27 | if _LOGGER is None:
28 | # Remove the original logger in Python to prevent duplicate printing.
29 | logger.remove()
30 | logger.add(sys.stderr, level=level, format=log_format(debug=level == "DEBUG"))
31 | _LOGGER = logger
32 | return _LOGGER
33 |
34 |
35 | def get_repo_git_info(repo_path):
36 | original_directory = os.getcwd()
37 | os.chdir(repo_path)
38 |
39 | try:
40 | branch = (
41 | subprocess.check_output(
42 | ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.STDOUT
43 | )
44 | .strip()
45 | .decode("utf-8")
46 | )
47 |
48 | commit_id = (
49 | subprocess.check_output(
50 | ["git", "rev-parse", "HEAD"], stderr=subprocess.STDOUT
51 | )
52 | .strip()
53 | .decode("utf-8")
54 | )
55 |
56 | remote_url = (
57 | subprocess.check_output(
58 | ["git", "remote", "get-url", "origin"], stderr=subprocess.STDOUT
59 | )
60 | .strip()
61 | .decode("utf-8")
62 | )
63 |
64 | return branch, commit_id, remote_url
65 | except subprocess.CalledProcessError:
66 | return None, None, None
67 | finally:
68 | os.chdir(original_directory)
69 |
70 |
71 | __all__ = [
72 | "AutoConfig",
73 | "AutoModelForCausalLM",
74 | "AutoTokenizer",
75 | "get_device",
76 | "get_torch_device_module",
77 | ]
78 |
--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .lora import LORA_TARGET_MAP
3 | from .packed import pack_sequence, unpack_sequence
4 | from .utils import (
5 | liger_kernel_is_available,
6 | lmdeploy_is_available,
7 | mlu_is_available,
8 | npu_is_available,
9 | profile_time_and_memory,
10 | varlen_attn_is_available,
11 | )
12 |
13 | __all__ = [
14 | "LORA_TARGET_MAP",
15 | "pack_sequence",
16 | "packed_sequence",
17 | "unpack_sequence",
18 | "liger_kernel_is_available",
19 | "varlen_attn_is_available",
20 | "lmdeploy_is_available",
21 | "npu_is_available",
22 | "mlu_is_available",
23 | "profile_time_and_memory",
24 | ]
25 |
--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/lora.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | LORA_TARGET_MAP = {
3 | "InternLM2ForCausalLM": ["wqkv", "wo", "w1", "w2", "w3"],
4 | "CLIPVisionModel": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"],
5 | }
6 |
--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .moe_permute import GROUPED_GEMM_INSTALLED, permute_func, unpermute_func
3 |
4 | __all__ = ["GROUPED_GEMM_INSTALLED", "permute_func", "unpermute_func"]
5 |
--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/packed.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import List, Union
3 |
4 | import torch
5 |
6 |
7 | def unpack_sequence(packed: torch.Tensor, num_tokens: Union[torch.Tensor, List], dim=1):
8 | if isinstance(num_tokens, torch.Tensor):
9 | num_tokens = num_tokens.tolist()
10 | sequences = torch.split(packed, num_tokens, dim=dim)
11 | return sequences
12 |
13 |
14 | def pack_sequence(sequences, dim=1):
15 | num_tokens = torch.IntTensor([seq.size(dim) for seq in sequences])
16 | packed = torch.cat(sequences, dim=dim)
17 | return packed, num_tokens.to(packed.device)
18 |
19 |
20 | def packed_cumulative_length(num_tokens: torch.Tensor):
21 | device = num_tokens.device
22 | _zero_pad = torch.zeros(1, device=device)
23 | _pad_length = torch.cat([_zero_pad, num_tokens]).int()
24 | return torch.cumsum(_pad_length, 0).int()
25 |
--------------------------------------------------------------------------------
/xtuner/_lite/accelerate/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import time
3 | from contextlib import contextmanager
4 |
5 | from transformers.utils.import_utils import is_flash_attn_2_available
6 |
7 | from xtuner._lite import get_device, get_logger, get_torch_device_module
8 |
9 | logger = get_logger()
10 |
11 |
12 | def npu_is_available():
13 | return get_device() == "npu"
14 |
15 |
16 | def mlu_is_available():
17 | return get_device() == "mlu"
18 |
19 |
20 | def varlen_attn_is_available():
21 | return is_flash_attn_2_available() or npu_is_available()
22 |
23 |
24 | def lmdeploy_is_available():
25 | available = False
26 | try:
27 | import lmdeploy # noqa: F401
28 |
29 | available = True
30 | except ImportError:
31 | available = False
32 |
33 | return available
34 |
35 |
36 | def liger_kernel_is_available():
37 | available = False
38 | try:
39 | import liger_kernel # noqa: F401
40 |
41 | available = True
42 | except ImportError:
43 | available = False
44 |
45 | return available
46 |
47 |
48 | @contextmanager
49 | def profile_time_and_memory(desc):
50 | torch_device = get_torch_device_module()
51 | start_t = time.time()
52 | torch_device.reset_peak_memory_stats()
53 |
54 | yield
55 |
56 | max_memory = torch_device.max_memory_allocated()
57 | cost_time = time.time() - start_t
58 |
59 | logger.success(
60 | f"{desc} Elapsed time {cost_time:.2f} seconds, "
61 | f"peak gpu memory {max_memory/1024**3:.1f}G"
62 | )
63 |
--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/ppo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset import (
3 | InferDataset,
4 | PPOTokenizeFunction,
5 | RewardBuffer,
6 | RewardBufferCollator,
7 | )
8 | from .loss import (
9 | CriticLoss,
10 | PPOPolicyLoss,
11 | compute_advantages_and_returns,
12 | compute_kl_rewards,
13 | gather_logprobs,
14 | )
15 | from .model import build_actor_model, build_reward_model
16 |
17 | __all__ = [
18 | "InferDataset",
19 | "RewardBuffer",
20 | "RewardBufferCollator",
21 | "PPOCollator",
22 | "PPODataset",
23 | "PPOTokenizeFunction",
24 | "CriticLoss",
25 | "PPOPolicyLoss",
26 | "compute_advantages_and_returns",
27 | "compute_kl_rewards",
28 | "compute_rewards",
29 | "gather_logprobs",
30 | "build_actor_model",
31 | "build_reward_model",
32 | ]
33 |
--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/ppo/model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
4 | from transformers.utils.import_utils import (
5 | is_flash_attn_2_available,
6 | is_torch_sdpa_available,
7 | )
8 |
9 | from xtuner._lite.accelerate import LoadWoInit
10 |
11 |
12 | def build_actor_model(model_path, dtype=torch.float32, trust_remote_code=True):
13 | config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
14 | if is_flash_attn_2_available():
15 | config.attn_implementation = "flash_attention_2"
16 | elif is_torch_sdpa_available():
17 | config.attn_implementation = "sdpa"
18 |
19 | with LoadWoInit():
20 | policy = AutoModelForCausalLM.from_pretrained(
21 | model_path,
22 | attn_implementation="flash_attention_2",
23 | torch_dtype=dtype,
24 | trust_remote_code=trust_remote_code,
25 | )
26 |
27 | return policy
28 |
29 |
30 | def build_reward_model(model_path, dtype=torch.float32, trust_remote_code=True):
31 | config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
32 | if is_flash_attn_2_available():
33 | config.attn_implementation = "flash_attention_2"
34 | elif is_torch_sdpa_available():
35 | config.attn_implementation = "sdpa"
36 |
37 | config.use_cache = False
38 | config.torch_dtype = dtype
39 | with LoadWoInit():
40 | reward = AutoModel.from_pretrained(
41 | model_path,
42 | attn_implementation="flash_attention_2",
43 | torch_dtype=dtype,
44 | trust_remote_code=trust_remote_code,
45 | )
46 |
47 | reward.model.use_cache = False
48 |
49 | return reward
50 |
--------------------------------------------------------------------------------
/xtuner/_lite/algorithms/sft/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset import SftCollator, SftTokenizeFunction
3 |
4 | __all__ = ["SftCollator", "SftTokenizeFunction"]
5 |
--------------------------------------------------------------------------------
/xtuner/_lite/chat/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .messages import ChatMessages
3 | from .templates import CHAT_TEMPLATE_MAP, ChatTemplate, HybridChatTemplate
4 |
5 | __all__ = ["ChatMessages", "CHAT_TEMPLATE_MAP", "ChatTemplate", "HybridChatTemplate"]
6 |
--------------------------------------------------------------------------------
/xtuner/_lite/chat/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
--------------------------------------------------------------------------------
/xtuner/_lite/chat/messages/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseMessages
3 | from .chat import ChatMessages
4 |
5 | __all__ = ["BaseMessages", "ChatMessages"]
6 |
--------------------------------------------------------------------------------
/xtuner/_lite/chat/messages/base.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from abc import abstractclassmethod, abstractmethod
3 | from typing import Dict
4 |
5 | from pydantic import BaseModel
6 | from transformers import PreTrainedTokenizer
7 |
8 | from ..templates import ChatTemplate
9 |
10 |
11 | class BaseMessages(BaseModel):
12 | @abstractmethod
13 | def add(self, role: str, content):
14 | pass
15 |
16 | @abstractmethod
17 | def pop(self):
18 | pass
19 |
20 | @abstractmethod
21 | def get_prompt(self, chat_template: ChatTemplate) -> str:
22 | pass
23 |
24 | @abstractmethod
25 | def tokenize(
26 | self, tokenizer: PreTrainedTokenizer, chat_template: ChatTemplate
27 | ) -> Dict:
28 | pass
29 |
30 | @abstractclassmethod
31 | def from_dict(cls, item: Dict) -> "BaseMessages":
32 | pass
33 |
--------------------------------------------------------------------------------
/xtuner/_lite/chat/templates/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .chat import ChatTemplate
3 | from .hybrid import HybridChatTemplate
4 |
5 | CHAT_TEMPLATE_MAP = {
6 | "internlm2": HybridChatTemplate(
7 | system="<|im_start|>system\n{system}<|im_end|>\n",
8 | user="<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n",
9 | assistant="{assistant}<|im_end|>",
10 | stop_words=["<|im_end|>"],
11 | ),
12 | "qwen2": HybridChatTemplate(
13 | system="<|im_start|>system\n{system}<|im_end|>\n",
14 | user="<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n",
15 | assistant="{assistant}<|im_end|>",
16 | stop_words=["<|im_end|>", "<|endoftext|>"],
17 | ),
18 | "llama3": HybridChatTemplate(
19 | system=("<|start_header_id|>system<|end_header_id|>\n\n{system}" "<|eot_id|>"),
20 | user=(
21 | "<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|>"
22 | "<|start_header_id|>assistant<|end_header_id|>\n\n"
23 | ),
24 | assistant="{assistant}<|eot_id|>",
25 | sep="",
26 | stop_words=["<|eot_id|>"],
27 | ),
28 | }
29 |
30 | __all__ = ["ChatTemplate", "HybridChatTemplate"]
31 |
--------------------------------------------------------------------------------
/xtuner/_lite/chat/templates/chat.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import List
3 |
4 | from pydantic import BaseModel, field_validator
5 |
6 |
7 | class ChatTemplate(BaseModel):
8 | """Define a Pydantic data model for a hybrid chat with attributes for
9 | system, user and assistant chat as well as function and interpreter calls
10 | and results."""
11 |
12 | # Normal Chat
13 | system: str # System message format
14 | user: str # User message format
15 | assistant: str # Assistant message format
16 | stop_words: List[str] # List of stop words
17 | sep: str = "\n"
18 |
19 | def decorate_system(self, text: str) -> str:
20 | """Decorate text with the `system` template."""
21 | return self.system.format(system=text)
22 |
23 | def decorate_assistant(self, text: str) -> str:
24 | """Decorate text with the `assistant` template."""
25 | return self.assistant.format(assistant=text)
26 |
27 | def decorate_user(self, text: str) -> str:
28 | """Decorate text with the `user` template."""
29 | return self.user.format(user=text)
30 |
31 | @field_validator("system")
32 | def check_system(cls, v: str) -> str:
33 | """Validate that `system` contains '{system}'.
34 |
35 | If not, raises a ValueError.
36 | """
37 | if v is not None and "{system}" not in v:
38 | raise ValueError("system must contain the keyword '{system}'")
39 | return v
40 |
41 | @field_validator("user")
42 | def check_user(cls, v: str) -> str:
43 | """Validate that `user` contains '{user}'.
44 |
45 | If not, raises a ValueError.
46 | """
47 | if v is not None and "{user}" not in v:
48 | raise ValueError("user must contain the keyword '{user}'")
49 | return v
50 |
51 | @field_validator("assistant")
52 | def check_assistant(cls, v: str) -> str:
53 | """Validate that `assistant` contains '{assistant}'.
54 |
55 | If not, raises a ValueError.
56 | """
57 | if v is not None and "{assistant}" not in v:
58 | raise ValueError("assistant must contain the keyword '{assistant}'")
59 | return v
60 |
--------------------------------------------------------------------------------
/xtuner/_lite/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .json import JsonDataset
3 | from .jsonl import JsonlDataset
4 | from .pack import SoftPackDataset
5 | from .utils import DATASET_CLS_MAP, OPENAI_CONVERT_MAP, load_datasets
6 |
7 | __all__ = [
8 | "JsonDataset",
9 | "JsonlDataset",
10 | "SoftPackDataset",
11 | "DATASET_CLS_MAP",
12 | "OPENAI_CONVERT_MAP",
13 | "load_datasets",
14 | ]
15 |
--------------------------------------------------------------------------------
/xtuner/_lite/datasets/streaming.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
3 |
4 | class Streaming:
5 | def __init__(self, file, max_epoch=1):
6 | self.file = file
7 | self.offset = 0
8 | self.epoch = 1
9 | self.max_epoch = max_epoch
10 |
11 | def __iter__(self):
12 | return self
13 |
14 | def __next__(self):
15 | with open(self.file) as f:
16 | f.seek(self.offset)
17 | line = f.readline()
18 |
19 | if not line and self.epoch < self.max_epoch:
20 | self.offset = 0
21 | self.epoch += 1
22 | return next(self)
23 |
24 | elif not line and self.epoch == self.max_epoch:
25 | raise StopIteration
26 |
27 | self.offset = f.tell()
28 | return line
29 |
--------------------------------------------------------------------------------
/xtuner/_lite/datasets/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .convert import OPENAI_CONVERT_MAP
3 | from .load import DATASET_CLS_MAP, load_datasets
4 | from .utils import apply_exif_orientation, move_data_to_device
5 |
6 | __all__ = [
7 | "OPENAI_CONVERT_MAP",
8 | "DATASET_CLS_MAP",
9 | "load_datasets",
10 | "apply_exif_orientation",
11 | "move_data_to_device",
12 | ]
13 |
--------------------------------------------------------------------------------
/xtuner/_lite/datasets/utils/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from collections.abc import Mapping
3 |
4 | import torch
5 | from PIL import Image
6 |
7 | _EXIF_ORIENT = 274 # exif 'Orientation' tag
8 |
9 |
10 | def apply_exif_orientation(image):
11 | """Applies the exif orientation correctly.
12 |
13 | This code exists per the bug:
14 | https://github.com/python-pillow/Pillow/issues/3973
15 | with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
16 | various methods, especially `tobytes`
17 |
18 | Function based on:
19 | https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
20 | https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
21 |
22 | Args:
23 | image (PIL.Image): a PIL image
24 |
25 | Returns:
26 | (PIL.Image): the PIL image with exif orientation applied, if applicable
27 | """
28 | if not hasattr(image, "getexif"):
29 | return image
30 |
31 | try:
32 | exif = image.getexif()
33 | except Exception: # https://github.com/facebookresearch/detectron2/issues/1885
34 | exif = None
35 |
36 | if exif is None:
37 | return image
38 |
39 | orientation = exif.get(_EXIF_ORIENT)
40 |
41 | method = {
42 | 2: Image.FLIP_LEFT_RIGHT,
43 | 3: Image.ROTATE_180,
44 | 4: Image.FLIP_TOP_BOTTOM,
45 | 5: Image.TRANSPOSE,
46 | 6: Image.ROTATE_270,
47 | 7: Image.TRANSVERSE,
48 | 8: Image.ROTATE_90,
49 | }.get(orientation)
50 |
51 | if method is not None:
52 | return image.transpose(method)
53 | return image
54 |
55 |
56 | def move_data_to_device(data, device="cuda"):
57 | """Prepares one `data` before feeding it to the model, be it a tensor or a
58 | nested list/dictionary of tensors."""
59 | if isinstance(data, Mapping):
60 | return type(data)({k: move_data_to_device(v) for k, v in data.items()})
61 | elif isinstance(data, (tuple, list)):
62 | return type(data)(move_data_to_device(v) for v in data)
63 | elif isinstance(data, torch.Tensor):
64 | kwargs = {"device": device}
65 | return data.to(non_blocking=True, **kwargs)
66 | return data
67 |
--------------------------------------------------------------------------------
/xtuner/_lite/device.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 |
4 |
5 | def get_device():
6 | device = None
7 | if torch.cuda.is_available():
8 | device = "cuda"
9 | else:
10 | try:
11 | import torch_npu # noqa: F401
12 |
13 | device = "npu"
14 | except ImportError:
15 | pass
16 | try:
17 | import torch_mlu # noqa: F401
18 |
19 | device = "mlu"
20 | except ImportError:
21 | pass
22 |
23 | if device is None:
24 | raise NotImplementedError(
25 | "Supports only CUDA or NPU. If your device is CUDA or NPU, "
26 | "please make sure that your environmental settings are "
27 | "configured correctly."
28 | )
29 |
30 | return device
31 |
32 |
33 | def get_torch_device_module():
34 | device = get_device()
35 | if device == "cuda":
36 | return torch.cuda
37 | elif device == "npu":
38 | return torch.npu
39 | elif device == "mlu":
40 | return torch.mlu
41 | else:
42 | raise NotImplementedError
43 |
--------------------------------------------------------------------------------
/xtuner/_lite/modelings/__init__.py:
--------------------------------------------------------------------------------
1 | from .internlm2 import InternLM2Config, InternLM2ForCausalLM
2 | from .internlm3 import InternLM3Config, InternLM3ForCausalLM, InternLM3Tokenizer
3 | from .llava.modeling_llava import LlavaForConditionalGeneration
4 | from .llava.configuration_llava import EnhancedLlavaConfig
5 | from .llava.processing_llava import LlavaProcessor
6 |
7 | def register_remote_code():
8 | from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
9 | AutoConfig.register('internlm2', InternLM2Config, exist_ok=True)
10 | AutoModelForCausalLM.register(
11 | InternLM2Config, InternLM2ForCausalLM, exist_ok=True)
12 |
13 | AutoConfig.register('internlm3', InternLM3Config, exist_ok=True)
14 | AutoModelForCausalLM.register(
15 | InternLM3Config, InternLM3ForCausalLM, exist_ok=True)
16 | AutoTokenizer.register(
17 | InternLM3Config, InternLM3Tokenizer, exist_ok=True)
18 |
--------------------------------------------------------------------------------
/xtuner/_lite/modelings/internlm2/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_internlm2 import InternLM2Config
2 | from .modeling_internlm2 import InternLM2ForCausalLM
3 |
--------------------------------------------------------------------------------
/xtuner/_lite/modelings/internlm3/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_internlm3 import InternLM3Config
2 | from .modeling_internlm3 import InternLM3ForCausalLM
3 | from .tokenization_internlm3 import InternLM3Tokenizer
4 |
--------------------------------------------------------------------------------
/xtuner/_lite/modelings/internvl2/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_intern_vit import InternVisionModel
2 |
3 | __all__ = ['InternVisionModel']
4 |
--------------------------------------------------------------------------------
/xtuner/_lite/modelings/llava/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_llava import EnhancedLlavaConfig
2 | from .modeling_llava import LlavaForConditionalGeneration
3 | from .processing_llava import LlavaProcessor
4 |
--------------------------------------------------------------------------------
/xtuner/_lite/parallel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .comm import all_to_all, all_to_all_list, barrier
3 | from .sampler import LengthGroupedSampler, ParallelSampler, VLMLengthGroupedSampler
4 | from .sequence import * # noqa: F401, F403
5 | from .setup import setup_parallel
6 |
7 | __all__ = [
8 | "ParallelSampler",
9 | "LengthGroupedSampler",
10 | "VLMLengthGroupedSampler",
11 | "all_to_all",
12 | "all_to_all_list",
13 | "setup_parallel",
14 | "barrier",
15 | ]
16 |
--------------------------------------------------------------------------------
/xtuner/_lite/parallel/sequence/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.dist import init_dist
3 |
4 | from .attention import (
5 | post_process_for_sequence_parallel_attn,
6 | pre_process_for_sequence_parallel_attn,
7 | )
8 | from .ops import (
9 | gather_for_sequence_parallel,
10 | gather_forward_split_backward,
11 | split_for_sequence_parallel,
12 | split_forward_gather_backward,
13 | )
14 |
15 | __all__ = [
16 | "pre_process_for_sequence_parallel_attn",
17 | "post_process_for_sequence_parallel_attn",
18 | "split_for_sequence_parallel",
19 | "init_dist",
20 | "gather_for_sequence_parallel",
21 | "split_forward_gather_backward",
22 | "gather_forward_split_backward",
23 | ]
24 |
--------------------------------------------------------------------------------
/xtuner/_lite/parallel/sequence/attention.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from torch.distributed.device_mesh import DeviceMesh
4 |
5 | from ..comm import all_to_all
6 |
7 |
8 | def pre_process_for_sequence_parallel_attn(
9 | query_states: torch.Tensor,
10 | key_states: torch.Tensor,
11 | value_states: torch.Tensor,
12 | sp_mesh: DeviceMesh,
13 | scatter_dim: int = 2,
14 | gather_dim: int = 1,
15 | ):
16 | sp_size = sp_mesh.size()
17 | n_head = query_states.shape[2]
18 | assert n_head % sp_size == 0, (
19 | "The number of attention heads should be divisible by "
20 | f"sequence_parallel_world_size. But got n_head = {n_head} and "
21 | f"sequence_parallel_world_size = {sp_size}."
22 | )
23 |
24 | # (b, s // sp_world_size, nd, dim) -> (b, s, nd // sp_world_size, dim)
25 | sp_group = sp_mesh.get_group()
26 | query_states = all_to_all(
27 | query_states, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
28 | )
29 | key_states = all_to_all(
30 | key_states, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
31 | )
32 | value_states = all_to_all(
33 | value_states, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
34 | )
35 |
36 | return query_states, key_states, value_states
37 |
38 |
39 | def post_process_for_sequence_parallel_attn(
40 | attn_output: torch.Tensor, sp_mesh: DeviceMesh, scatter_dim=1, gather_dim=2
41 | ):
42 | # (b, s, nd // sp_world_size, dim) -> (b, s // sp_world_size, nd, dim)
43 | sp_group = sp_mesh.get_group()
44 | output = all_to_all(
45 | attn_output, sp_group, scatter_dim=scatter_dim, gather_dim=gather_dim
46 | )
47 | return output
48 |
--------------------------------------------------------------------------------
/xtuner/_lite/parallel/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | import torch.distributed as dist
4 | from mmengine.dist import infer_launcher, init_dist
5 | from torch._C._distributed_c10d import ReduceOp
6 | from torch.distributed.c10d_logger import _exception_logger
7 |
8 | from xtuner._lite import get_device
9 |
10 | origin_reduce_scatter_tensor = torch.distributed.reduce_scatter_tensor
11 |
12 |
13 | # mlu's reduce_scatter_tensor do not support ReduceOp.AVG, use ReduceOp.SUM / group_world_size instead.
14 | @_exception_logger
15 | def mlu_reduce_scatter_tensor(
16 | output, input, op=ReduceOp.SUM, group=None, async_op=False
17 | ):
18 | if op == ReduceOp.AVG:
19 | result = origin_reduce_scatter_tensor(
20 | output, input, ReduceOp.SUM, group, async_op
21 | )
22 | output.div_(torch.distributed.get_world_size(group))
23 | return result
24 | else:
25 | return origin_reduce_scatter_tensor(output, input, op, group, async_op)
26 |
27 |
28 | def setup_parallel():
29 | if not dist.is_initialized():
30 | dist_launcher = infer_launcher()
31 | init_dist(dist_launcher)
32 |
33 | device = get_device()
34 |
35 | if device == "mlu":
36 | torch.distributed.reduce_scatter_tensor = mlu_reduce_scatter_tensor
37 |
--------------------------------------------------------------------------------
/xtuner/_lite/patches/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .auto import AutoPatch
3 | from .base import FSDPConfig
4 | from .utils import pad_to_max_length, pad_to_multiple_of
5 |
6 | __all__ = ["AutoPatch", "FSDPConfig", "pad_to_max_length", "pad_to_multiple_of"]
7 |
--------------------------------------------------------------------------------
/xtuner/_lite/patches/auto.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from transformers.models.llama import LlamaForCausalLM
3 | from transformers.models.qwen2 import Qwen2ForCausalLM
4 |
5 | from xtuner._lite.modelings.internlm3 import InternLM3ForCausalLM
6 |
7 | from .base import FSDPConfig, PatchedCausalLM
8 | from .internlm3 import (
9 | CUDAPatchedInternLM3ForCausalLM,
10 | MLUPatchedInternLM3ForCausalLM,
11 | MuxiPatchedInternLM3ForCausalLM,
12 | )
13 | from .llama import (
14 | CUDAPatchedLlamaForCausalLM,
15 | MLUPatchedLlamaForCausalLM,
16 | MuxiPatchedLlamaForCausalLM,
17 | )
18 | from .qwen2 import CUDAPatchedQwen2ForCausalLM
19 |
20 | CUDA_PATCH_MAP = {
21 | LlamaForCausalLM: CUDAPatchedLlamaForCausalLM,
22 | InternLM3ForCausalLM: CUDAPatchedInternLM3ForCausalLM,
23 | Qwen2ForCausalLM: CUDAPatchedQwen2ForCausalLM,
24 | }
25 |
26 | MLU_PATCH_MAP = {
27 | LlamaForCausalLM: MLUPatchedLlamaForCausalLM,
28 | InternLM3ForCausalLM: MLUPatchedInternLM3ForCausalLM,
29 | }
30 |
31 | MUXI_PATCH_MAP = {
32 | LlamaForCausalLM: MuxiPatchedLlamaForCausalLM,
33 | InternLM3ForCausalLM: MuxiPatchedInternLM3ForCausalLM,
34 | }
35 |
36 |
37 | class AutoPatch:
38 | @classmethod
39 | def from_causal_lm(
40 | cls, model, fsdp_config: FSDPConfig, device_type="cuda"
41 | ) -> PatchedCausalLM:
42 | if device_type == "cuda":
43 | patch_cls = CUDA_PATCH_MAP[type(model)]
44 | elif device_type == "mlu":
45 | patch_cls = MLU_PATCH_MAP[type(model)]
46 | elif device_type == "muxi":
47 | patch_cls = MUXI_PATCH_MAP[type(model)]
48 | else:
49 | raise NotImplementedError
50 |
51 | patched_model = patch_cls(model, fsdp_config)
52 |
53 | return patched_model
54 |
--------------------------------------------------------------------------------
/xtuner/_lite/patches/internlm3.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner._lite.chat import HybridChatTemplate
3 | from xtuner._lite.modelings.internlm3.modeling_internlm3 import (
4 | InternLM3Attention,
5 | InternLM3DecoderLayer,
6 | InternLM3ForCausalLM,
7 | InternLM3RotaryEmbedding,
8 | )
9 |
10 | from .llama import CUDAPatchedLlamaForCausalLM
11 |
12 |
13 | class CUDAPatchedInternLM3ForCausalLM(CUDAPatchedLlamaForCausalLM):
14 | rotary_emb_cls = InternLM3RotaryEmbedding
15 | attn_cls = InternLM3Attention
16 | layer_cls = InternLM3DecoderLayer
17 | causal_cls = InternLM3ForCausalLM
18 |
19 | chat_template = HybridChatTemplate(
20 | system="<|im_start|>system\n{system}<|im_end|>\n",
21 | user="<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n",
22 | assistant="{assistant}<|im_end|>",
23 | stop_words=["<|im_end|>"],
24 | )
25 |
26 | def __init__(self, model, fsdp_config=None):
27 | super().__init__(model, fsdp_config)
28 |
29 | if fsdp_config.max_length is not None:
30 | self.patched_model.config.rope_scaling = {"rope_type": "default"}
31 | ori_max_len = self.patched_model.config.max_position_embeddings
32 | self.patched_model.config.max_position_embeddings = max(
33 | fsdp_config.max_length, ori_max_len
34 | )
35 | self.patched_model.model.rotary_emb = InternLM3RotaryEmbedding(
36 | self.patched_model.config
37 | ).to(self.device_type)
38 |
39 |
40 | class MLUPatchedInternLM3ForCausalLM(CUDAPatchedInternLM3ForCausalLM):
41 | device_type = "mlu"
42 |
43 |
44 | class MuxiPatchedInternLM3ForCausalLM(CUDAPatchedInternLM3ForCausalLM):
45 | device_type = "muxi"
46 |
--------------------------------------------------------------------------------
/xtuner/_lite/patches/mixins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .generate import GenerateMixin
3 |
4 | __all__ = ["GenerateMixin"]
5 |
--------------------------------------------------------------------------------
/xtuner/_lite/patches/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import List, Union
3 |
4 | import torch
5 |
6 |
7 | def pad_to_multiple_of(sequence, padding_value, multiple_of, dim=-1):
8 | length = sequence.shape[dim]
9 | if length % multiple_of == 0:
10 | return sequence
11 |
12 | pad_num = multiple_of - (length % multiple_of)
13 | pad_shape = (
14 | (*sequence.shape[:dim], pad_num, *sequence.shape[dim + 1 :])
15 | if dim != -1
16 | else (*sequence.shape[:dim], pad_num)
17 | )
18 | pad = torch.full(
19 | pad_shape, padding_value, dtype=sequence.dtype, device=sequence.device
20 | )
21 | sequence = torch.cat([sequence, pad], dim=dim)
22 | return sequence
23 |
24 |
25 | def pad_to_max_length(sequence, padding_value, max_length, dim=-1):
26 | length = sequence.shape[dim]
27 | assert length <= max_length
28 | pad_num = max_length - length
29 | pad_shape = (
30 | (*sequence.shape[:dim], pad_num, *sequence.shape[dim + 1 :])
31 | if dim != -1
32 | else (*sequence.shape[:dim], pad_num)
33 | )
34 | pad = torch.full(
35 | pad_shape, padding_value, dtype=sequence.dtype, device=sequence.device
36 | )
37 | sequence = torch.cat([sequence, pad], dim=dim)
38 | return sequence
39 |
40 |
41 | def unpack_sequence(packed: torch.Tensor, num_tokens: Union[torch.Tensor, List], dim=1):
42 | if isinstance(num_tokens, torch.Tensor):
43 | num_tokens = num_tokens.tolist()
44 | sequences = torch.split(packed, num_tokens, dim=dim)
45 | return sequences
46 |
47 |
48 | def pack_sequence(sequences, dim=1):
49 | num_tokens = torch.IntTensor([seq.size(dim) for seq in sequences])
50 | packed = torch.cat(sequences, dim=dim)
51 | return packed, num_tokens.to(packed.device)
52 |
53 |
54 | def packed_cumulative_length(num_tokens: torch.Tensor):
55 | device = num_tokens.device
56 | _zero_pad = torch.zeros(1, device=device)
57 | _pad_length = torch.cat([_zero_pad, num_tokens]).int()
58 | return torch.cumsum(_pad_length, 0).int()
59 |
--------------------------------------------------------------------------------
/xtuner/apis/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .datasets import * # noqa: F401, F403
3 | from .model import * # noqa: F401, F403
4 | from .training_args import * # noqa: F401, F403
5 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .alpaca import (
3 | alpaca_data_collator,
4 | alpaca_dataset,
5 | alpaca_enzh_data_collator,
6 | alpaca_enzh_dataset,
7 | alpaca_zh_data_collator,
8 | alpaca_zh_dataset,
9 | )
10 | from .arxiv import arxiv_data_collator, arxiv_dataset
11 | from .code_alpaca import code_alpaca_data_collator, code_alpaca_dataset
12 | from .colorist import colorist_data_collator, colorist_dataset
13 | from .lawyer import (
14 | lawyer_crime_data_collator,
15 | lawyer_crime_dataset,
16 | lawyer_data_collator,
17 | lawyer_dataset,
18 | lawyer_reference_data_collator,
19 | lawyer_reference_dataset,
20 | )
21 | from .medical import medical_data_collator, medical_dataset
22 | from .moss_003_sft import (
23 | moss_003_sft_data_collator,
24 | moss_003_sft_dataset,
25 | moss_003_sft_no_plugins_data_collator,
26 | moss_003_sft_no_plugins_dataset,
27 | moss_003_sft_plugins_data_collator,
28 | moss_003_sft_plugins_dataset,
29 | )
30 | from .oasst1 import oasst1_data_collator, oasst1_dataset
31 | from .open_orca import openorca_data_collator, openorca_dataset
32 | from .sql import sql_data_collator, sql_dataset
33 | from .tiny_codes import tiny_codes_data_collator, tiny_codes_dataset
34 | from .wizardlm import wizardlm_data_collator, wizardlm_dataset
35 |
36 | __all__ = [
37 | "alpaca_data_collator",
38 | "alpaca_dataset",
39 | "alpaca_enzh_data_collator",
40 | "alpaca_enzh_dataset",
41 | "alpaca_zh_data_collator",
42 | "alpaca_zh_dataset",
43 | "arxiv_data_collator",
44 | "arxiv_dataset",
45 | "medical_data_collator",
46 | "medical_dataset",
47 | "moss_003_sft_data_collator",
48 | "moss_003_sft_dataset",
49 | "moss_003_sft_no_plugins_data_collator",
50 | "moss_003_sft_no_plugins_dataset",
51 | "moss_003_sft_plugins_data_collator",
52 | "moss_003_sft_plugins_dataset",
53 | "oasst1_data_collator",
54 | "oasst1_dataset",
55 | "openorca_data_collator",
56 | "openorca_dataset",
57 | "lawyer_crime_dataset",
58 | "lawyer_crime_data_collator",
59 | "lawyer_reference_dataset",
60 | "lawyer_reference_data_collator",
61 | "lawyer_dataset",
62 | "lawyer_data_collator",
63 | "colorist_dataset",
64 | "colorist_data_collator",
65 | "sql_dataset",
66 | "sql_data_collator",
67 | "code_alpaca_dataset",
68 | "code_alpaca_data_collator",
69 | "tiny_codes_dataset",
70 | "tiny_codes_data_collator",
71 | "wizardlm_data_collator",
72 | "wizardlm_dataset",
73 | ]
74 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/arxiv.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import arxiv_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def arxiv_dataset(
13 | tokenizer,
14 | data_file=None,
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=True,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | # 1. Download data from https://kaggle.com/datasets/Cornell-University/arxiv # noqa: E501
22 | # 2. Process data with `./tools/data_preprocess/arxiv.py`
23 | if data_file is None:
24 | data_file = "./data/arxiv_postprocess_csAIcsCLcsCV_20200101.json"
25 | dataset_org = load_dataset(path="json", data_files=dict(train=data_file))
26 | dataset = process_hf_dataset(
27 | dataset=dataset_org,
28 | tokenizer=tokenizer,
29 | max_length=max_length,
30 | dataset_map_fn=arxiv_map_fn,
31 | template_map_fn=template_map_fn,
32 | remove_unused_columns=remove_unused_columns,
33 | shuffle_before_pack=True,
34 | pack_to_max_length=pack_to_max_length,
35 | )
36 |
37 | return dataset
38 |
39 |
40 | def arxiv_data_collator(return_hf_format=False):
41 | return partial(default_collate_fn, return_hf_format=return_hf_format)
42 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/code_alpaca.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import code_alpaca_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def code_alpaca_dataset(
13 | tokenizer,
14 | path="HuggingFaceH4/CodeAlpaca_20K",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=True,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=code_alpaca_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def code_alpaca_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/colorist.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import colors_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def colorist_dataset(
13 | tokenizer,
14 | path="burkelibbey/colors",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=True,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=colors_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def colorist_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/medical.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import medical_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def medical_dataset(
13 | tokenizer,
14 | path="shibing624/medical",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=False,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=medical_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def medical_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/moss_003_sft.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from torch.utils.data import ConcatDataset
5 |
6 | from xtuner.dataset import MOSSSFTDataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 |
9 |
10 | def moss_003_sft_dataset(
11 | tokenizer,
12 | plugins_data_file=None,
13 | no_plugins_data_file=None,
14 | bot_name=None,
15 | max_length=2048,
16 | ):
17 | plugins = moss_003_sft_plugins_dataset(
18 | tokenizer, data_file=plugins_data_file, bot_name=bot_name, max_length=max_length
19 | )
20 | no_plugins = moss_003_sft_no_plugins_dataset(
21 | tokenizer,
22 | data_file=no_plugins_data_file,
23 | bot_name=bot_name,
24 | max_length=max_length,
25 | )
26 | dataset = ConcatDataset([plugins, no_plugins])
27 | return dataset
28 |
29 |
30 | def moss_003_sft_data_collator(return_hf_format=False):
31 | return partial(default_collate_fn, return_hf_format=return_hf_format)
32 |
33 |
34 | def moss_003_sft_no_plugins_dataset(
35 | tokenizer, data_file=None, bot_name=None, max_length=2048
36 | ):
37 | # Download data from https://huggingface.co/datasets/fnlp/moss-003-sft-data
38 | if data_file is None:
39 | data_file = "./data/moss-003-sft-no-tools.jsonl"
40 | dataset = MOSSSFTDataset(
41 | data_file=data_file,
42 | bot_name=bot_name,
43 | tokenizer=tokenizer,
44 | max_length=max_length,
45 | )
46 |
47 | return dataset
48 |
49 |
50 | def moss_003_sft_no_plugins_data_collator(return_hf_format=False):
51 | return partial(default_collate_fn, return_hf_format=return_hf_format)
52 |
53 |
54 | def moss_003_sft_plugins_dataset(
55 | tokenizer, data_file=None, bot_name=None, max_length=2048
56 | ):
57 | # Download data from https://huggingface.co/datasets/fnlp/moss-003-sft-data
58 | if data_file is None:
59 | data_file = "./data/conversations_with_tools_with_inner_instruction_no_text2image_train_all_random_meta0.5_0.1_0.01_moss_0709.jsonl" # noqa: E501
60 | dataset = MOSSSFTDataset(
61 | data_file=data_file,
62 | bot_name=bot_name,
63 | tokenizer=tokenizer,
64 | max_length=max_length,
65 | )
66 |
67 | return dataset
68 |
69 |
70 | def moss_003_sft_plugins_data_collator(return_hf_format=False):
71 | return partial(default_collate_fn, return_hf_format=return_hf_format)
72 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/oasst1.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import oasst1_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def oasst1_dataset(
13 | tokenizer,
14 | path="timdettmers/openassistant-guanaco",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=False,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=oasst1_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def oasst1_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/open_orca.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import openorca_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def openorca_dataset(
13 | tokenizer,
14 | path="Open-Orca/OpenOrca",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=True,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=openorca_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def openorca_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/sql.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import sql_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def sql_dataset(
13 | tokenizer,
14 | path="b-mc2/sql-create-context",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=True,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=sql_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def sql_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/tiny_codes.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import template_map_fn_factory, tiny_codes_map_fn
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def tiny_codes_dataset(
13 | tokenizer,
14 | path="nampdn-ai/tiny-codes",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=True,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=tiny_codes_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def tiny_codes_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/datasets/wizardlm.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from datasets import load_dataset
5 |
6 | from xtuner.dataset import process_hf_dataset
7 | from xtuner.dataset.collate_fns import default_collate_fn
8 | from xtuner.dataset.map_fns import template_map_fn_factory, wizardlm_map_fn
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 |
12 | def wizardlm_dataset(
13 | tokenizer,
14 | path="WizardLM/WizardLM_evol_instruct_V2_196k",
15 | max_length=2048,
16 | prompt_template=PROMPT_TEMPLATE.default,
17 | remove_unused_columns=False,
18 | pack_to_max_length=True,
19 | ):
20 | template_map_fn = template_map_fn_factory(template=prompt_template)
21 | dataset_org = load_dataset(path)
22 | dataset = process_hf_dataset(
23 | dataset=dataset_org,
24 | tokenizer=tokenizer,
25 | max_length=max_length,
26 | dataset_map_fn=wizardlm_map_fn,
27 | template_map_fn=template_map_fn,
28 | remove_unused_columns=remove_unused_columns,
29 | shuffle_before_pack=True,
30 | pack_to_max_length=pack_to_max_length,
31 | )
32 |
33 | return dataset
34 |
35 |
36 | def wizardlm_data_collator(return_hf_format=False):
37 | return partial(default_collate_fn, return_hf_format=return_hf_format)
38 |
--------------------------------------------------------------------------------
/xtuner/apis/model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from peft import LoraConfig
4 | from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5 |
6 | from xtuner.model import SupervisedFinetune
7 |
8 | __all__ = ["build_model", "build_lora_model", "build_qlora_model"]
9 |
10 |
11 | def build_qlora_model(
12 | model_name_or_path,
13 | quantization_config=None,
14 | lora_config=None,
15 | return_tokenizer=True,
16 | ):
17 | if quantization_config is None:
18 | quantization_config = BitsAndBytesConfig(
19 | load_in_4bit=True,
20 | load_in_8bit=False,
21 | llm_int8_threshold=6.0,
22 | llm_int8_has_fp16_weight=False,
23 | bnb_4bit_compute_dtype=torch.float16,
24 | bnb_4bit_use_double_quant=True,
25 | bnb_4bit_quant_type="nf4",
26 | )
27 | if lora_config is None:
28 | lora_config = LoraConfig(
29 | r=64, lora_alpha=16, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
30 | )
31 |
32 | llm = AutoModelForCausalLM.from_pretrained(
33 | model_name_or_path,
34 | torch_dtype=torch.float16,
35 | trust_remote_code=True,
36 | quantization_config=quantization_config,
37 | )
38 |
39 | model = SupervisedFinetune(llm, lora=lora_config)
40 |
41 | if return_tokenizer:
42 | tokenizer = AutoTokenizer.from_pretrained(
43 | model_name_or_path, trust_remote_code=True, encode_special_tokens=True
44 | )
45 | return model.llm, tokenizer
46 | else:
47 | return model.llm
48 |
49 |
50 | def build_lora_model(model_name_or_path, lora_config=None, return_tokenizer=True):
51 | if lora_config is None:
52 | lora_config = LoraConfig(
53 | r=64, lora_alpha=16, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
54 | )
55 |
56 | llm = AutoModelForCausalLM.from_pretrained(
57 | model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True
58 | )
59 |
60 | model = SupervisedFinetune(llm, lora=lora_config)
61 |
62 | if return_tokenizer:
63 | tokenizer = AutoTokenizer.from_pretrained(
64 | model_name_or_path, trust_remote_code=True, encode_special_tokens=True
65 | )
66 | return model.llm, tokenizer
67 | else:
68 | return model.llm
69 |
70 |
71 | def build_model(model_name_or_path, return_tokenizer=True):
72 | model = AutoModelForCausalLM.from_pretrained(
73 | model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True
74 | )
75 |
76 | if return_tokenizer:
77 | tokenizer = AutoTokenizer.from_pretrained(
78 | model_name_or_path, trust_remote_code=True, encode_special_tokens=True
79 | )
80 | return model, tokenizer
81 | else:
82 | return model
83 |
--------------------------------------------------------------------------------
/xtuner/apis/training_args.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from dataclasses import dataclass, field
3 | from typing import Union
4 |
5 | from transformers import TrainingArguments
6 | from transformers.trainer_utils import IntervalStrategy, SchedulerType
7 |
8 | __all__ = ["DefaultTrainingArguments"]
9 |
10 |
11 | @dataclass
12 | class DefaultTrainingArguments(TrainingArguments):
13 | # custom
14 | model_name_or_path: str = field(
15 | default=None,
16 | metadata={"help": "model name or path."},
17 | )
18 | dataset_name_or_path: str = field(
19 | default=None,
20 | metadata={"help": "dataset name or path."},
21 | )
22 |
23 | # huggingface
24 | default_output_dir = "./work_dirs"
25 | default_do_train = True
26 | default_per_device_train_batch_size = 1
27 | default_learning_rate = 2e-5
28 | default_save_strategy = "epoch"
29 | default_lr_scheduler_type = "cosine"
30 | default_logging_steps = 5
31 |
32 | output_dir: str = field(
33 | default=default_output_dir,
34 | metadata={
35 | "help": (
36 | "The output directory where the model predictions and "
37 | "checkpoints will be written."
38 | )
39 | },
40 | )
41 | do_train: bool = field(
42 | default=default_do_train, metadata={"help": "Whether to run training."}
43 | )
44 | per_device_train_batch_size: int = field(
45 | default=default_per_device_train_batch_size,
46 | metadata={"help": "Batch size per GPU/TPU core/CPU for training."},
47 | )
48 | learning_rate: float = field(
49 | default=default_learning_rate,
50 | metadata={"help": "The initial learning rate for AdamW."},
51 | )
52 | save_strategy: Union[IntervalStrategy, str] = field(
53 | default=default_save_strategy,
54 | metadata={"help": "The checkpoint save strategy to use."},
55 | )
56 | lr_scheduler_type: Union[SchedulerType, str] = field(
57 | default=default_lr_scheduler_type,
58 | metadata={"help": "The scheduler type to use."},
59 | )
60 | logging_steps: float = field(
61 | default=default_logging_steps,
62 | metadata={
63 | "help": (
64 | "Log every X updates steps. Should be an integer or a "
65 | "float in range `[0,1)`. If smaller than 1, will be "
66 | "interpreted as ratio of total training steps."
67 | )
68 | },
69 | )
70 |
--------------------------------------------------------------------------------
/xtuner/configs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os
3 |
4 |
5 | def get_cfgs_name_path():
6 | path = os.path.dirname(__file__)
7 | mapping = {}
8 | for root, dirs, files in os.walk(path):
9 | for file_ in files:
10 | if (
11 | file_.endswith((".py", ".json"))
12 | and not file_.startswith(".")
13 | and not file_.startswith("_")
14 | ):
15 | mapping[os.path.splitext(file_)[0]] = os.path.join(root, file_)
16 | return mapping
17 |
18 |
19 | cfgs_name_path = get_cfgs_name_path()
20 |
21 | __all__ = ["cfgs_name_path"]
22 |
--------------------------------------------------------------------------------
/xtuner/configs/cohere/README.md:
--------------------------------------------------------------------------------
1 | # Cohere 8x7B
2 |
3 | ## Install
4 |
5 | ```bash
6 | # Install the latest xtuner
7 | pip install -U 'xtuner[deepspeed]'
8 |
9 | # Cohere requires the latest version of transformers.
10 | pip install git+https://github.com/huggingface/transformers.git
11 |
12 | # Sequence parallel requires flash-attn
13 | pip install flash-attn
14 | ```
15 |
16 | ## Full Parameter Fine-tune
17 |
18 | Full parameter fine-tune needs 64 A100-80G
19 |
20 | ### slurm
21 |
22 | Note: `$PARTITION` means the virtual partition of slurm.
23 |
24 | ```bash
25 | srun -p $PARTITION --job-name=Cohere --nodes=8 --gres=gpu:8 --ntasks-per-node=8 xtuner train cohere_100b_128k_sp32 --deepspeed deepspeed_zero3 --launcher slurm
26 | ```
27 |
28 | ### torchrun
29 |
30 | Note: `$NODE_0_ADDR` means the ip address of the node_0 machine.
31 |
32 | ```bash
33 | # excuete on node 0
34 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=0 xtuner train cohere_100b_128k_sp32 --deepspeed deepspeed_zero3
35 |
36 | # excuete on node 1
37 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=1 xtuner train cohere_100b_128k_sp32 --deepspeed deepspeed_zero3
38 | ```
39 |
40 | ### Speed
41 |
42 | 16 * A100 80G:
43 |
44 | | Model | Sequence Length | GPUs Number | Sequence Parallel World Size | Tokens per Second | TFLOPs |
45 | | :---------: | :-------------: | :---------: | :--------------------------: | :---------------: | :----: |
46 | | Cohere_100b | 128k | 64 | 32 | 97.3 | 173.4 |
47 | | Cohere_100b | 128k | 128 | 16 | 102.1 | 182.7 |
48 | | Cohere_100b | 128k | 256 | 16 | 101.3 | 181.3 |
49 |
--------------------------------------------------------------------------------
/xtuner/configs/deepseek/README.md:
--------------------------------------------------------------------------------
1 | # DeepSeek V2
2 |
3 | ## Install
4 |
5 | ```bash
6 | # Git clone the latest xtuner
7 | git clone https://github.com/InternLM/xtuner.git
8 |
9 | # Install the latest xtuner
10 | cd xtuner
11 | pip install -e '.[all]'
12 |
13 | # Mixtral requires flash-attn
14 | pip install flash-attn
15 |
16 | # install the latest transformers
17 | pip install -U transformers
18 | ```
19 |
20 | ## Full Parameter Fine-tune
21 |
22 | Full parameter fine-tune DeepSeek V2 236B needs at least 64 A100-80G. The full-tuned model will be saved to `${WORK_DIRS}/hf_model` by `HFCheckpointHook`.
23 |
24 | ### slurm
25 |
26 | Note: `$PARTITION` means the virtual partition of slurm.
27 |
28 | ```bash
29 | srun -p $PARTITION --job-name=mixtral --nodes=8 --gres=gpu:8 --ntasks-per-node=8 xtuner train deepseek_v2_chat_full_alpaca_e3 --deepspeed deepspeed_zero3 --launcher slurm
30 | ```
31 |
32 | ### torchrun
33 |
34 | Note: `$NODE_0_ADDR` means the ip address of the node_0 machine.
35 |
36 | ```bash
37 | # excuete on node 0
38 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=0 xtuner train deepseek_v2_chat_full_alpaca_e3 --deepspeed deepspeed_zero3 --launcher pytorch
39 |
40 | # excuete on node 1
41 | NPROC_PER_NODE=8 NNODES=8 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=1 xtuner train deepseek_v2_chat_full_alpaca_e3 --deepspeed deepspeed_zero3 --launcher pytorch
42 |
43 | # excuete on node 2, 3, ..., 7
44 | ```
45 |
46 | ### Speed
47 |
48 | 128 * A100 80G:
49 |
50 | | Model | Sequence Length | Use Varlen Attn | Sequence Parallel World Size | Tokens per Second |
51 | | :--------------------: | :-------------: | :-------------: | :--------------------------: | :---------------: |
52 | | deepseek v2 hf | 8k | False | 1 | 60 |
53 | | **deepseek v2 XTuner** | **8k** | **False** | **1** | **120 (2x)** |
54 | | deepseek v2 hf | 8k | True | 1 | 60 |
55 | | **deepseek v2 XTuner** | **8k** | **True** | **1** | **130 (2.2x)** |
56 | | deepseek v2 hf | 16k | False | 1 | OOM |
57 | | **deepseek v2 XTuner** | **16k** | **False** | **1** | **148** |
58 | | deepseek v2 hf | 16k | True | 1 | 95 |
59 | | **deepseek v2 XTuner** | **16k** | **True** | **1** | **180 (1.9x)** |
60 |
--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero1.json:
--------------------------------------------------------------------------------
1 | {
2 | "gradient_accumulation_steps": "auto",
3 | "train_micro_batch_size_per_gpu": "auto",
4 | "gradient_clipping": "auto",
5 | "zero_allow_untested_optimizer": true,
6 | "zero_force_ds_cpu_optimizer": false,
7 | "zero_optimization": {
8 | "stage": 1,
9 | "overlap_comm": true
10 | },
11 | "fp16": {
12 | "enabled": "auto",
13 | "initial_scale_power": 16
14 | },
15 | "bf16": {
16 | "enabled": "auto"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero2.json:
--------------------------------------------------------------------------------
1 | {
2 | "gradient_accumulation_steps": "auto",
3 | "train_micro_batch_size_per_gpu": "auto",
4 | "gradient_clipping": "auto",
5 | "zero_allow_untested_optimizer": true,
6 | "zero_force_ds_cpu_optimizer": false,
7 | "zero_optimization": {
8 | "stage": 2,
9 | "overlap_comm": true
10 | },
11 | "fp16": {
12 | "enabled": "auto",
13 | "initial_scale_power": 16
14 | },
15 | "bf16": {
16 | "enabled": "auto"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero2_offload.json:
--------------------------------------------------------------------------------
1 | {
2 | "gradient_accumulation_steps": "auto",
3 | "train_micro_batch_size_per_gpu": "auto",
4 | "gradient_clipping": "auto",
5 | "zero_allow_untested_optimizer": true,
6 | "zero_force_ds_cpu_optimizer": false,
7 | "zero_optimization": {
8 | "stage": 2,
9 | "overlap_comm": true,
10 | "offload_optimizer": {
11 | "device": "cpu",
12 | "pin_memory": true
13 | }
14 | },
15 | "fp16": {
16 | "enabled": "auto",
17 | "initial_scale_power": 16
18 | },
19 | "bf16": {
20 | "enabled": "auto"
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero3.json:
--------------------------------------------------------------------------------
1 | {
2 | "gradient_accumulation_steps": "auto",
3 | "train_micro_batch_size_per_gpu": "auto",
4 | "gradient_clipping": "auto",
5 | "zero_allow_untested_optimizer": true,
6 | "zero_force_ds_cpu_optimizer": false,
7 | "zero_optimization": {
8 | "stage": 3,
9 | "overlap_comm": true,
10 | "stage3_gather_16bit_weights_on_model_save": true
11 | },
12 | "fp16": {
13 | "enabled": "auto",
14 | "initial_scale_power": 16
15 | },
16 | "bf16": {
17 | "enabled": "auto"
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/xtuner/configs/deepspeed/deepspeed_zero3_offload.json:
--------------------------------------------------------------------------------
1 | {
2 | "gradient_accumulation_steps": "auto",
3 | "train_micro_batch_size_per_gpu": "auto",
4 | "gradient_clipping": "auto",
5 | "zero_allow_untested_optimizer": true,
6 | "zero_force_ds_cpu_optimizer": false,
7 | "zero_optimization": {
8 | "stage": 3,
9 | "overlap_comm": true,
10 | "offload_optimizer": {
11 | "device": "cpu",
12 | "pin_memory": true
13 | },
14 | "offload_param": {
15 | "device": "cpu",
16 | "pin_memory": true
17 | },
18 | "stage3_gather_16bit_weights_on_model_save": true
19 | },
20 | "fp16": {
21 | "enabled": "auto",
22 | "initial_scale_power": 16
23 | },
24 | "bf16": {
25 | "enabled": "auto"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/xtuner/configs/internlm/internlm_7b/internlm_7b_qlora_oasst1_e3_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from datasets import load_dataset
4 | from peft import LoraConfig
5 | from transformers import (
6 | AutoModelForCausalLM,
7 | AutoTokenizer,
8 | BitsAndBytesConfig,
9 | Trainer,
10 | TrainingArguments,
11 | )
12 |
13 | from xtuner.dataset import process_hf_dataset
14 | from xtuner.dataset.map_fns import oasst1_map_fn, template_map_fn_factory
15 | from xtuner.utils import PROMPT_TEMPLATE
16 |
17 | framework = "huggingface"
18 | pretrained_model_name_or_path = "internlm/internlm-7b"
19 | dataset_name_or_path = "timdettmers/openassistant-guanaco"
20 | max_length = 2048
21 | pack_to_max_length = True
22 | prompt_template = PROMPT_TEMPLATE.default
23 |
24 | trainer = Trainer
25 |
26 | training_args = dict(
27 | type=TrainingArguments,
28 | do_train=True,
29 | learning_rate=2e-4,
30 | weight_decay=0,
31 | lr_scheduler_type="cosine",
32 | warmup_steps=100,
33 | per_device_train_batch_size=1,
34 | gradient_accumulation_steps=16,
35 | num_train_epochs=3,
36 | fp16=True,
37 | logging_steps=1,
38 | optim="paged_adamw_32bit",
39 | save_strategy="steps",
40 | save_steps=1000,
41 | save_total_limit=2,
42 | ddp_find_unused_parameters=False,
43 | )
44 |
45 | tokenizer = dict(
46 | type=AutoTokenizer.from_pretrained,
47 | pretrained_model_name_or_path=pretrained_model_name_or_path,
48 | trust_remote_code=True,
49 | padding_side="right",
50 | )
51 |
52 | model = dict(
53 | type=AutoModelForCausalLM.from_pretrained,
54 | pretrained_model_name_or_path=pretrained_model_name_or_path,
55 | trust_remote_code=True,
56 | torch_dtype=torch.float16,
57 | quantization_config=dict(
58 | type=BitsAndBytesConfig,
59 | load_in_4bit=True,
60 | load_in_8bit=False,
61 | llm_int8_threshold=6.0,
62 | llm_int8_has_fp16_weight=False,
63 | bnb_4bit_compute_dtype=torch.float16,
64 | bnb_4bit_use_double_quant=True,
65 | bnb_4bit_quant_type="nf4",
66 | ),
67 | )
68 |
69 | lora = dict(
70 | type=LoraConfig,
71 | r=64,
72 | lora_alpha=16,
73 | lora_dropout=0.1,
74 | bias="none",
75 | task_type="CAUSAL_LM",
76 | )
77 |
78 | train_dataset = dict(
79 | type=process_hf_dataset,
80 | dataset=dict(type=load_dataset, path=dataset_name_or_path),
81 | tokenizer=tokenizer,
82 | max_length=max_length,
83 | dataset_map_fn=oasst1_map_fn,
84 | template_map_fn=dict(type=template_map_fn_factory, template=prompt_template),
85 | remove_unused_columns=True,
86 | shuffle_before_pack=True,
87 | pack_to_max_length=pack_to_max_length,
88 | )
89 |
--------------------------------------------------------------------------------
/xtuner/configs/internvl/v1_5/convert_to_official.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os.path as osp
4 |
5 | import torch
6 | from mmengine.config import Config
7 | from transformers import AutoTokenizer
8 |
9 | from xtuner.model.utils import LoadWoInit
10 | from xtuner.registry import BUILDER
11 |
12 |
13 | def convert_to_official(config, trained_path, save_path):
14 | cfg = Config.fromfile(config)
15 | cfg.model.pretrained_pth = trained_path
16 | cfg.model.quantization_vit = False
17 | cfg.model.quantization_llm = False
18 |
19 | with LoadWoInit():
20 | model = BUILDER.build(cfg.model)
21 | model.to(torch.bfloat16)
22 |
23 | if model.use_visual_encoder_lora:
24 | vision_model = model.model.vision_model.merge_and_unload()
25 | model.model.vision_model = vision_model
26 |
27 | if model.use_llm_lora:
28 | language_model = model.model.language_model.merge_and_unload()
29 | model.model.language_model = language_model
30 |
31 | model.model.save_pretrained(save_path)
32 |
33 | tokenizer = AutoTokenizer.from_pretrained(
34 | cfg.model.model_path, trust_remote_code=True
35 | )
36 | tokenizer.save_pretrained(save_path)
37 |
38 | print(model)
39 |
40 |
41 | def main():
42 | parser = argparse.ArgumentParser(
43 | description="Convert the pth model to HuggingFace model"
44 | )
45 | parser.add_argument("config", help="config file name or path.")
46 | parser.add_argument("trained_model_pth", help="The trained model path.")
47 | parser.add_argument("save_path", help="The path to save the converted model.")
48 | args = parser.parse_args()
49 |
50 | if osp.realpath(args.trained_model_pth) == osp.realpath(args.save_path):
51 | raise ValueError("The trained path and save path should not be the same.")
52 |
53 | convert_to_official(args.config, args.trained_model_pth, args.save_path)
54 |
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/xtuner/configs/llama/llama2_70b/llama2_70b_int8_lora_open_platypus_e1_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from datasets import load_dataset
4 | from peft import LoraConfig
5 | from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
6 |
7 | from xtuner.dataset import process_hf_dataset
8 | from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
9 | from xtuner.utils import PROMPT_TEMPLATE
10 |
11 | framework = "huggingface"
12 | pretrained_model_name_or_path = "meta-llama/Llama-2-70b-hf"
13 | dataset_name_or_path = "garage-bAInd/Open-Platypus"
14 | max_length = 2048
15 | pack_to_max_length = True
16 | prompt_template = PROMPT_TEMPLATE.llama2_chat
17 |
18 | trainer = Trainer
19 |
20 | training_args = dict(
21 | type=TrainingArguments,
22 | do_train=True,
23 | learning_rate=3e-4,
24 | weight_decay=0,
25 | lr_scheduler_type="cosine",
26 | warmup_steps=100,
27 | per_device_train_batch_size=1,
28 | gradient_accumulation_steps=16,
29 | num_train_epochs=1,
30 | fp16=True,
31 | logging_steps=1,
32 | optim="adamw_torch",
33 | save_strategy="steps",
34 | save_steps=1000,
35 | save_total_limit=2,
36 | ddp_find_unused_parameters=False,
37 | )
38 |
39 | tokenizer = dict(
40 | type=AutoTokenizer.from_pretrained,
41 | pretrained_model_name_or_path=pretrained_model_name_or_path,
42 | trust_remote_code=True,
43 | padding_side="right",
44 | )
45 |
46 | model = dict(
47 | type=AutoModelForCausalLM.from_pretrained,
48 | pretrained_model_name_or_path=pretrained_model_name_or_path,
49 | trust_remote_code=True,
50 | torch_dtype=torch.float16,
51 | load_in_8bit=True,
52 | )
53 |
54 | lora = dict(
55 | type=LoraConfig,
56 | r=16,
57 | lora_alpha=16,
58 | lora_dropout=0.05,
59 | target_modules=["gate_proj", "down_proj", "up_proj"],
60 | bias="none",
61 | task_type="CAUSAL_LM",
62 | )
63 |
64 | train_dataset = dict(
65 | type=process_hf_dataset,
66 | dataset=dict(type=load_dataset, path=dataset_name_or_path),
67 | tokenizer=tokenizer,
68 | max_length=max_length,
69 | dataset_map_fn=alpaca_map_fn,
70 | template_map_fn=dict(type=template_map_fn_factory, template=prompt_template),
71 | remove_unused_columns=True,
72 | shuffle_before_pack=True,
73 | pack_to_max_length=pack_to_max_length,
74 | )
75 |
--------------------------------------------------------------------------------
/xtuner/configs/llama/llama2_70b/llama2_70b_qlora_open_platypus_e1_hf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | from datasets import load_dataset
4 | from peft import LoraConfig
5 | from transformers import (
6 | AutoModelForCausalLM,
7 | AutoTokenizer,
8 | BitsAndBytesConfig,
9 | Trainer,
10 | TrainingArguments,
11 | )
12 |
13 | from xtuner.dataset import process_hf_dataset
14 | from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
15 | from xtuner.utils import PROMPT_TEMPLATE
16 |
17 | framework = "huggingface"
18 | pretrained_model_name_or_path = "meta-llama/Llama-2-70b-hf"
19 | dataset_name_or_path = "garage-bAInd/Open-Platypus"
20 | max_length = 2048
21 | pack_to_max_length = True
22 | prompt_template = PROMPT_TEMPLATE.llama2_chat
23 |
24 | trainer = Trainer
25 |
26 | training_args = dict(
27 | type=TrainingArguments,
28 | do_train=True,
29 | learning_rate=3e-4,
30 | weight_decay=0,
31 | lr_scheduler_type="cosine",
32 | warmup_steps=100,
33 | per_device_train_batch_size=1,
34 | gradient_accumulation_steps=16,
35 | num_train_epochs=1,
36 | fp16=True,
37 | logging_steps=1,
38 | optim="adamw_torch",
39 | save_strategy="steps",
40 | save_steps=1000,
41 | save_total_limit=2,
42 | ddp_find_unused_parameters=False,
43 | )
44 |
45 | tokenizer = dict(
46 | type=AutoTokenizer.from_pretrained,
47 | pretrained_model_name_or_path=pretrained_model_name_or_path,
48 | trust_remote_code=True,
49 | padding_side="right",
50 | )
51 |
52 | model = dict(
53 | type=AutoModelForCausalLM.from_pretrained,
54 | pretrained_model_name_or_path=pretrained_model_name_or_path,
55 | trust_remote_code=True,
56 | torch_dtype=torch.float16,
57 | quantization_config=dict(
58 | type=BitsAndBytesConfig,
59 | load_in_4bit=True,
60 | load_in_8bit=False,
61 | llm_int8_threshold=6.0,
62 | llm_int8_has_fp16_weight=False,
63 | bnb_4bit_compute_dtype=torch.float16,
64 | bnb_4bit_use_double_quant=True,
65 | bnb_4bit_quant_type="nf4",
66 | ),
67 | )
68 |
69 | lora = dict(
70 | type=LoraConfig,
71 | r=64,
72 | lora_alpha=16,
73 | lora_dropout=0.1,
74 | target_modules=["gate_proj", "down_proj", "up_proj"],
75 | bias="none",
76 | task_type="CAUSAL_LM",
77 | )
78 |
79 | train_dataset = dict(
80 | type=process_hf_dataset,
81 | dataset=dict(type=load_dataset, path=dataset_name_or_path),
82 | tokenizer=tokenizer,
83 | max_length=max_length,
84 | dataset_map_fn=alpaca_map_fn,
85 | template_map_fn=dict(type=template_map_fn_factory, template=prompt_template),
86 | remove_unused_columns=True,
87 | shuffle_before_pack=True,
88 | pack_to_max_length=pack_to_max_length,
89 | )
90 |
--------------------------------------------------------------------------------
/xtuner/configs/llama/llama3_8b/README.md:
--------------------------------------------------------------------------------
1 | # Llama3 8B
2 |
3 | ## Install
4 |
5 | ```bash
6 | # Install the latest xtuner
7 | pip install -U 'xtuner[deepspeed]'
8 |
9 | # install the latest transformers
10 | pip install -U transformers
11 | ```
12 |
13 | ## QLoRA Fine-tune
14 |
15 | QLoRA only need a single A100-80G
16 |
17 | ```bash
18 | xtuner train llama3_8b_instruct_qlora_alpaca_e3
19 | ```
20 |
21 | ## Full Parameter Fine-tune
22 |
23 | Full parameter fine-tune Llama3 8B in 8k context only requires 2 * A100-80G
24 |
25 | ### torchrun
26 |
27 | ```bash
28 | NPROC_PER_NODE=${GPU_NUM} xtuner train llama3_8b_instruct_full_alpaca_e3 --deepspeed deepspeed_zero2
29 | ```
30 |
31 | ### slurm
32 |
33 | ```bash
34 | srun ${SRUN_ARGS} xtuner train llama3_8b_instruct_full_alpaca_e3 --launcher slurm --deepspeed deepspeed_zero3
35 | ```
36 |
37 | ### Speed
38 |
39 | | Model | Sequence Length | GPU Number | ZeRO | Sequence Parallel | Tokens per Second | TFLOPs |
40 | | :-------: | :-------------: | :--------: | :----: | :---------------: | :---------------: | :----: |
41 | | Llama3 8B | 8k | 2 | ZeRO-3 | 2 | 1037.0 | 76.8 |
42 | | Llama3 8B | 8k | 4 | ZeRO-3 | 1 | 2331.3 | 172.6 |
43 | | Llama3 8B | 8k | 8 | ZeRO-3 | 1 | 2771.2 | 205.1 |
44 |
45 | | Model | Sequence Length | GPU Number | ZeRO | Sequence Parallel | Tokens per Second | TFLOPs |
46 | | :-------: | :-------------: | :--------: | :----: | :---------------: | :---------------: | :----: |
47 | | Llama3 8B | 8k | 8 | ZeRO-3 | 1 | 2771.2 | 205.1 |
48 | | Llama3 8B | 16k | 8 | ZeRO-3 | 2 | 2320.7 | 191.7 |
49 | | Llama3 8B | 32k | 8 | ZeRO-3 | 4 | 1870.2 | 186.6 |
50 | | Llama3 8B | 64k | 8 | ZeRO-3 | 8 | 1356.4 | 182.0 |
51 | | Llama3 8B | 128k | 8 | ZeRO-3 | 8 | 875.7 | 177.7 |
52 |
--------------------------------------------------------------------------------
/xtuner/configs/mixtral/README.md:
--------------------------------------------------------------------------------
1 | # Mixtral 8x7B
2 |
3 | ## Install
4 |
5 | ```bash
6 | # Install the latest xtuner
7 | pip install -U 'xtuner[deepspeed]'
8 |
9 | # Mixtral requires flash-attn
10 | pip install flash-attn
11 |
12 | # install the latest transformers
13 | pip install -U transformers
14 | ```
15 |
16 | ## QLoRA Fine-tune
17 |
18 | QLoRA only need a single A100-80G
19 |
20 | ```bash
21 | xtuner train mixtral_8x7b_instruct_qlora_oasst1_e3 --deepspeed deepspeed_zero2
22 | ```
23 |
24 | ## Full Parameter Fine-tune
25 |
26 | Full parameter fine-tune needs 16 A100-80G
27 |
28 | ### slurm
29 |
30 | Note: `$PARTITION` means the virtual partition of slurm.
31 |
32 | ```bash
33 | srun -p $PARTITION --job-name=mixtral --nodes=2 --gres=gpu:8 --ntasks-per-node=8 xtuner train mixtral_8x7b_instruct_full_oasst1_e3 --deepspeed deepspeed_zero3 --launcher slurm
34 | ```
35 |
36 | ### torchrun
37 |
38 | Note: `$NODE_0_ADDR` means the ip address of the node_0 machine.
39 |
40 | ```bash
41 | # excuete on node 0
42 | NPROC_PER_NODE=8 NNODES=2 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=0 xtuner train mixtral_8x7b_instruct_full_oasst1_e3 --deepspeed deepspeed_zero3
43 |
44 | # excuete on node 1
45 | NPROC_PER_NODE=8 NNODES=2 PORT=29600 ADDR=$NODE_0_ADDR NODE_RANK=1 xtuner train mixtral_8x7b_instruct_full_oasst1_e3 --deepspeed deepspeed_zero3
46 | ```
47 |
48 | ### Speed
49 |
50 | 16 * A100 80G:
51 |
52 | | Model | Sequence Length | Use Varlen Attn | Sequence Parallel World Size | Tokens per Second |
53 | | :----------: | :-------------: | :-------------: | :--------------------------: | :---------------: |
54 | | mixtral_8x7b | 32k | False | 1 | 853.7 |
55 | | mixtral_8x7b | 32k | True | 1 | 910.1 |
56 | | mixtral_8x7b | 32k | False | 2 | 635.2 |
57 | | mixtral_8x7b | 32k | True | 2 | 650.9 |
58 |
--------------------------------------------------------------------------------
/xtuner/configs/qwen/qwen1_5/qwen1_5_110b_chat/README.md:
--------------------------------------------------------------------------------
1 | # Qwen 110B
2 |
3 | ## Install
4 |
5 | ```bash
6 | # Install the latest xtuner
7 | pip install -U 'xtuner[deepspeed]'
8 |
9 | # We recommend installing flash_attn
10 | # pip install flash-attn
11 |
12 | # install the latest transformers
13 | pip install -U transformers
14 | ```
15 |
16 | ## QLoRA Fine-tune
17 |
18 | Training Qwen 110B with 32k context capability requires only 2 * A100 80G.
19 |
20 | ```bash
21 | xtuner train xtuner/configs/qwen/qwen1_5/qwen1_5_110b_chat/qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py --deepspeed deepspeed_zero3
22 | ```
23 |
24 |
25 |

26 |
27 |
--------------------------------------------------------------------------------
/xtuner/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import warnings
3 |
4 | from .concat_dataset import ConcatDataset
5 | from .huggingface import process_hf_dataset
6 | from .intern_repo import (
7 | build_packed_dataset,
8 | load_intern_repo_tokenized_dataset,
9 | load_intern_repo_untokenized_dataset,
10 | )
11 | from .internvl_dataset import InternVL_V1_5_Dataset
12 | from .json_dataset import load_json_file
13 | from .llava import LLaVADataset
14 | from .modelscope import process_ms_dataset
15 | from .moss_sft import MOSSSFTDataset
16 | from .refcoco_json import (
17 | InvRefCOCOJsonDataset,
18 | RefCOCOJsonDataset,
19 | RefCOCOJsonEvalDataset,
20 | )
21 | from .utils import decode_base64_to_image, expand2square, load_image
22 |
23 | # ignore FutureWarning in hf datasets
24 | warnings.simplefilter(action="ignore", category=FutureWarning)
25 |
26 | __all__ = [
27 | "process_hf_dataset",
28 | "ConcatDataset",
29 | "MOSSSFTDataset",
30 | "process_ms_dataset",
31 | "LLaVADataset",
32 | "expand2square",
33 | "decode_base64_to_image",
34 | "load_image",
35 | "load_intern_repo_tokenized_dataset",
36 | "load_intern_repo_untokenized_dataset",
37 | "build_packed_dataset",
38 | "RefCOCOJsonDataset",
39 | "RefCOCOJsonEvalDataset",
40 | "InvRefCOCOJsonDataset",
41 | "load_json_file",
42 | "InternVL_V1_5_Dataset",
43 | ]
44 |
--------------------------------------------------------------------------------
/xtuner/dataset/collate_fns/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .default_collate_fn import default_collate_fn
3 | from .mmlu_collate_fn import mmlu_collate_fn
4 |
5 | __all__ = ["default_collate_fn", "mmlu_collate_fn"]
6 |
--------------------------------------------------------------------------------
/xtuner/dataset/collate_fns/mmlu_collate_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import Dict, Sequence
3 |
4 | import torch
5 | from torch.nn.utils.rnn import pad_sequence
6 |
7 | from xtuner.utils import DEFAULT_PAD_TOKEN_INDEX, IGNORE_INDEX
8 |
9 |
10 | def mmlu_collate_fn(
11 | instances: Sequence[Dict],
12 | pad_index: int = DEFAULT_PAD_TOKEN_INDEX,
13 | return_hf_format: bool = False,
14 | ) -> Dict[str, torch.Tensor]:
15 | input_ids = []
16 | labels = []
17 | data_samples = {"labels": [], "subjects": []}
18 | for example in instances:
19 | input_ids.append(torch.tensor(example["input_ids"]))
20 | labels.append(torch.tensor(example["labels"]))
21 | data_samples["labels"].append(example["output"])
22 | data_samples["subjects"].append(example["subject"])
23 | if len(instances) > 1:
24 | input_ids = pad_sequence(input_ids, batch_first=True, padding_value=pad_index)
25 | labels = pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
26 | else:
27 | input_ids = torch.stack(input_ids)
28 | labels = torch.stack(labels)
29 |
30 | data_dict = {
31 | "input_ids": input_ids,
32 | "attention_mask": input_ids.ne(pad_index),
33 | "labels": labels,
34 | }
35 |
36 | if return_hf_format:
37 | return data_dict
38 | else:
39 | return {"data": data_dict, "data_samples": data_samples}
40 |
--------------------------------------------------------------------------------
/xtuner/dataset/concat_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from torch.utils.data import ConcatDataset as TorchConcatDataset
3 |
4 | from xtuner.registry import BUILDER
5 |
6 |
7 | class ConcatDataset(TorchConcatDataset):
8 | def __init__(self, datasets):
9 | datasets_instance = []
10 | for cfg in datasets:
11 | datasets_instance.append(BUILDER.build(cfg))
12 | super().__init__(datasets=datasets_instance)
13 |
14 | def __repr__(self):
15 | main_str = "Dataset as a concatenation of multiple datasets. \n"
16 | main_str += ",\n".join([f"{repr(dataset)}" for dataset in self.datasets])
17 | return main_str
18 |
--------------------------------------------------------------------------------
/xtuner/dataset/json_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import json
3 | import os
4 |
5 | from datasets import Dataset, concatenate_datasets
6 |
7 |
8 | def load_json_file(data_files=None, data_dir=None, suffix=None):
9 | assert (data_files is not None) != (data_dir is not None)
10 | if data_dir is not None:
11 | data_files = os.listdir(data_dir)
12 | data_files = [os.path.join(data_dir, fn) for fn in data_files]
13 | if suffix is not None:
14 | data_files = [fp for fp in data_files if fp.endswith(suffix)]
15 | elif isinstance(data_files, str):
16 | data_files = [data_files]
17 |
18 | dataset_list = []
19 | for fp in data_files:
20 | with open(fp, encoding="utf-8") as file:
21 | data = json.load(file)
22 | ds = Dataset.from_list(data)
23 | dataset_list.append(ds)
24 | dataset = concatenate_datasets(dataset_list)
25 | return dataset
26 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset_map_fns import * # noqa: F401, F403
3 | from .template_map_fn import template_map_fn # noqa: F401
4 | from .template_map_fn import template_map_fn_factory # noqa: F401
5 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .alpaca_map_fn import alpaca_map_fn
3 | from .alpaca_zh_map_fn import alpaca_zh_map_fn
4 | from .arxiv_map_fn import arxiv_map_fn
5 | from .code_alpaca_map_fn import code_alpaca_map_fn
6 | from .colors_map_fn import colors_map_fn
7 | from .crime_kg_assitant_map_fn import crime_kg_assitant_map_fn
8 | from .default_map_fn import default_map_fn
9 | from .law_reference_map_fn import law_reference_map_fn
10 | from .llava_map_fn import llava_image_only_map_fn, llava_map_fn
11 | from .medical_map_fn import medical_map_fn
12 | from .msagent_map_fn import msagent_react_map_fn
13 | from .oasst1_map_fn import oasst1_map_fn
14 | from .openai_map_fn import openai_map_fn
15 | from .openorca_map_fn import openorca_map_fn
16 | from .pretrain_map_fn import pretrain_map_fn
17 | from .sql_map_fn import sql_map_fn
18 | from .stack_exchange_map_fn import stack_exchange_map_fn
19 | from .tiny_codes_map_fn import tiny_codes_map_fn
20 | from .wizardlm_map_fn import wizardlm_map_fn
21 |
22 | DATASET_FORMAT_MAPPING = dict(
23 | alpaca=alpaca_map_fn,
24 | alpaca_zh=alpaca_zh_map_fn,
25 | arxiv=arxiv_map_fn,
26 | code_alpaca=code_alpaca_map_fn,
27 | colors=colors_map_fn,
28 | crime_kg_assitan=crime_kg_assitant_map_fn,
29 | default=default_map_fn,
30 | law_reference=law_reference_map_fn,
31 | llava_image_only=llava_image_only_map_fn,
32 | llava=llava_map_fn,
33 | medical=medical_map_fn,
34 | msagent_react=msagent_react_map_fn,
35 | oasst1=oasst1_map_fn,
36 | openai=openai_map_fn,
37 | openorca=openorca_map_fn,
38 | pretrain=pretrain_map_fn,
39 | sql=sql_map_fn,
40 | stack_exchange=stack_exchange_map_fn,
41 | tiny_codes=tiny_codes_map_fn,
42 | wizardlm=wizardlm_map_fn,
43 | )
44 |
45 | __all__ = [
46 | "alpaca_map_fn",
47 | "alpaca_zh_map_fn",
48 | "oasst1_map_fn",
49 | "arxiv_map_fn",
50 | "medical_map_fn",
51 | "openorca_map_fn",
52 | "code_alpaca_map_fn",
53 | "tiny_codes_map_fn",
54 | "colors_map_fn",
55 | "law_reference_map_fn",
56 | "crime_kg_assitant_map_fn",
57 | "sql_map_fn",
58 | "openai_map_fn",
59 | "wizardlm_map_fn",
60 | "stack_exchange_map_fn",
61 | "msagent_react_map_fn",
62 | "pretrain_map_fn",
63 | "default_map_fn",
64 | "llava_image_only_map_fn",
65 | "llava_map_fn",
66 | "DATASET_FORMAT_MAPPING",
67 | ]
68 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/alpaca_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
3 |
4 | def alpaca_map_fn(example):
5 | if example.get("output") == "":
6 | return {"conversation": []}
7 | else:
8 | return {
9 | "conversation": [
10 | {
11 | "input": f"{example['instruction']}\n{example['input']}",
12 | "output": example["output"],
13 | }
14 | ]
15 | }
16 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/alpaca_zh_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
3 |
4 | def alpaca_zh_map_fn(example):
5 | return {
6 | "conversation": [
7 | {
8 | "input": f"{example['instruction_zh']}\n{example['input_zh']}",
9 | "output": example["output_zh"],
10 | }
11 | ]
12 | }
13 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/arxiv_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def arxiv_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.arxiv_gentile,
10 | "input": example["abstract"],
11 | "output": example["title"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/code_alpaca_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def code_alpaca_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.coder,
10 | "input": example["prompt"],
11 | "output": example["completion"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/colors_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def colors_map_fn(example):
6 | desc = ":".join(example["description"].split(":")[1:]).strip()
7 | return {
8 | "conversation": [
9 | {
10 | "system": SYSTEM_TEMPLATE.colorist,
11 | "input": desc,
12 | "output": example["color"],
13 | }
14 | ]
15 | }
16 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/crime_kg_assitant_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def crime_kg_assitant_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.lawyer,
10 | "input": example["input"],
11 | "output": example["output"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/default_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def default_map_fn(example):
3 | return {"conversation": [{"input": example["input"], "output": example["output"]}]}
4 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/law_reference_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def law_reference_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.lawyer,
10 | "input": example["question"],
11 | "output": example["answer"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/llava_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import DEFAULT_IMAGE_TOKEN
3 |
4 |
5 | def llava_image_only_map_fn(example):
6 | # input contains the DEFAULT_IMAGE_TOKEN only
7 | messages = example["conversations"]
8 | input = ""
9 | conversation = []
10 | while messages and messages[0]["from"] == "gpt":
11 | # Skip the first one if it is from gpt
12 | messages = messages[1:]
13 | for msg in messages:
14 | if msg["from"] == "human":
15 | assert DEFAULT_IMAGE_TOKEN in msg["value"]
16 | input += DEFAULT_IMAGE_TOKEN
17 | elif msg["from"] == "gpt":
18 | conversation.append({"input": input, "output": msg["value"]})
19 | input = ""
20 | else:
21 | raise NotImplementedError
22 | return {"conversation": conversation}
23 |
24 |
25 | def llava_map_fn(example):
26 | messages = example["conversations"]
27 | input = ""
28 | conversation = []
29 | while messages and messages[0]["from"] == "gpt":
30 | # Skip the first one if it is from gpt
31 | messages = messages[1:]
32 | for msg in messages:
33 | if msg["from"] == "human":
34 | if DEFAULT_IMAGE_TOKEN in msg["value"]:
35 | msg["value"] = msg["value"].replace(DEFAULT_IMAGE_TOKEN, "").strip()
36 | msg["value"] = DEFAULT_IMAGE_TOKEN + "\n" + msg["value"]
37 | msg["value"] = msg["value"].strip()
38 | input += msg["value"]
39 |
40 | elif msg["from"] == "gpt":
41 | conversation.append({"input": input, "output": msg["value"]})
42 | input = ""
43 | else:
44 | raise NotImplementedError
45 | return {"conversation": conversation}
46 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/medical_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def medical_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.medical,
10 | "input": "{instruction}\n{input}".format(**example),
11 | "output": example["output"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/oasst1_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def oasst1_map_fn(example):
3 | r"""Example before preprocessing:
4 | example['text'] = '### Human: Can you explain xxx'
5 | '### Assistant: Sure! xxx'
6 | '### Human: I didn't understand how xxx'
7 | '### Assistant: It has to do with a process xxx.'
8 |
9 | Example after preprocessing:
10 | example['conversation'] = [
11 | {
12 | 'input': 'Can you explain xxx',
13 | 'output': 'Sure! xxx'
14 | },
15 | {
16 | 'input': 'I didn't understand how xxx',
17 | 'output': 'It has to do with a process xxx.'
18 | }
19 | ]
20 | """
21 | data = []
22 | for sentence in example["text"].strip().split("###"):
23 | sentence = sentence.strip()
24 | if sentence[:6] == "Human:":
25 | data.append(sentence[6:].strip())
26 | elif sentence[:10] == "Assistant:":
27 | data.append(sentence[10:].strip())
28 | if len(data) % 2:
29 | # The last round of conversation solely consists of input
30 | # without any output.
31 | # Discard the input part of the last round, as this part is ignored in
32 | # the loss calculation.
33 | data.pop()
34 | conversation = []
35 | for i in range(0, len(data), 2):
36 | single_turn_conversation = {"input": data[i], "output": data[i + 1]}
37 | conversation.append(single_turn_conversation)
38 | return {"conversation": conversation}
39 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def openai_map_fn(example):
3 | """
4 | Example before preprocessing:
5 | example["messages"] = [
6 | { "role": "system", "content": "You are an assistant that
7 | occasionally misspells words." },
8 | { "role": "user", "content": "Tell me a story." },
9 | { "role": "assistant", "content": "One day a student
10 | went to schoool." }
11 | ]
12 | Example after preprocessing:
13 | example["conversation"] = [
14 | {
15 | "system": "You are an assistant that occasionally misspells
16 | words.",
17 | "input": "Tell me a story.",
18 | "output": "One day a student went to schoool."
19 | }
20 | ]
21 | """
22 | messages = example["messages"]
23 | system = ""
24 | input = ""
25 | conversation = []
26 | while messages and messages[0]["role"] == "assistant":
27 | # Skip the first one if it is from assistant
28 | messages = messages[1:]
29 | for msg in messages:
30 | if msg["role"] == "system":
31 | system = msg["content"]
32 | elif msg["role"] == "user":
33 | input += msg["content"]
34 | elif msg["role"] == "assistant":
35 | output_with_loss = msg.get("loss", "True")
36 | output_with_loss = str(output_with_loss)
37 | output_with_loss = output_with_loss.lower() == "true"
38 | conversation.append(
39 | {
40 | "system": system,
41 | "input": input,
42 | "output": msg["content"],
43 | "output_with_loss": output_with_loss,
44 | }
45 | )
46 | system = ""
47 | input = ""
48 | else:
49 | raise NotImplementedError
50 | return {"conversation": conversation}
51 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/openorca_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def openorca_map_fn(example):
3 | return {
4 | "conversation": [
5 | {
6 | "system": example["system_prompt"],
7 | "input": example["question"],
8 | "output": example["response"],
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/pretrain_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def pretrain_map_fn(example):
3 | r"""Example before preprocessing:
4 | example['text'] = 'xxx'
5 |
6 | Example after preprocessing:
7 | example['conversation'] = [
8 | {
9 | 'input': '',
10 | 'output': 'xxx'
11 | },
12 | ]
13 | """
14 | return {
15 | "conversation": [
16 | {"input": "", "output": example["text"].strip(), "need_eos_token": False}
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/sql_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def sql_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.sql,
10 | "input": "{context}\n{question}".format(**example),
11 | "output": example["answer"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/stack_exchange_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def stack_exchange_map_fn(example):
3 | return {
4 | "conversation": [{"input": example["question"], "output": example["response"]}]
5 | }
6 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/tiny_codes_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.utils import SYSTEM_TEMPLATE
3 |
4 |
5 | def tiny_codes_map_fn(example):
6 | return {
7 | "conversation": [
8 | {
9 | "system": SYSTEM_TEMPLATE.coder,
10 | "input": example["prompt"],
11 | "output": example["response"],
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/dataset_map_fns/wizardlm_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | def wizardlm_map_fn(example):
3 | messages = example["conversations"]
4 | input = ""
5 | conversation = []
6 | while messages and messages[0]["from"] == "gpt":
7 | # Skip the first one if it is from gpt
8 | messages = messages[1:]
9 | for msg in messages:
10 | if msg["from"] == "human":
11 | input += msg["value"]
12 | elif msg["from"] == "gpt":
13 | conversation.append({"input": input, "output": msg["value"]})
14 | input = ""
15 | else:
16 | raise NotImplementedError
17 | return {"conversation": conversation}
18 |
--------------------------------------------------------------------------------
/xtuner/dataset/map_fns/template_map_fn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from functools import partial
3 |
4 | from mmengine.utils.misc import get_object_from_string
5 |
6 |
7 | def template_map_fn(example, template):
8 | conversation = example.get("conversation", [])
9 | for i, single_turn_conversation in enumerate(conversation):
10 | input = single_turn_conversation.get("input", "")
11 | if input is None:
12 | input = ""
13 | input_text = template.INSTRUCTION.format(input=input, round=i + 1)
14 | system = single_turn_conversation.get("system", "")
15 | if system != "" and system is not None:
16 | system = template.SYSTEM.format(system=system)
17 | input_text = system + input_text
18 | single_turn_conversation["input"] = input_text
19 |
20 | if template.get("SUFFIX", None):
21 | output_text = single_turn_conversation.get("output", "")
22 | output_text += template.SUFFIX
23 | single_turn_conversation["output"] = output_text
24 |
25 | # SUFFIX_AS_EOS is False ==> need_eos_token is True
26 | single_turn_conversation["need_eos_token"] = not template.get(
27 | "SUFFIX_AS_EOS", False
28 | )
29 | single_turn_conversation["sep"] = template.get("SEP", "")
30 |
31 | return {"conversation": conversation}
32 |
33 |
34 | def template_map_fn_factory(template):
35 | if isinstance(template, str): # for resume
36 | template = get_object_from_string(template)
37 | return partial(template_map_fn, template=template)
38 |
--------------------------------------------------------------------------------
/xtuner/dataset/modelscope.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.config import Config, ConfigDict
3 |
4 | from xtuner.registry import BUILDER
5 |
6 | from .huggingface import process_hf_dataset
7 |
8 |
9 | def process_ms_dataset(dataset, split="train", *args, **kwargs):
10 | """Post-process the dataset loaded from the ModelScope Hub."""
11 |
12 | if isinstance(dataset, (Config, ConfigDict)):
13 | dataset = BUILDER.build(dataset)
14 | if isinstance(dataset, dict):
15 | dataset = dataset[split]
16 | dataset = dataset.to_hf_dataset()
17 | return process_hf_dataset(dataset, *args, **kwargs)
18 |
--------------------------------------------------------------------------------
/xtuner/dataset/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .intern_repo import InternlmRepoSampler, InternRepoSampler
3 | from .length_grouped import LengthGroupedSampler
4 |
5 | __all__ = ["LengthGroupedSampler", "InternRepoSampler", "InternlmRepoSampler"]
6 |
--------------------------------------------------------------------------------
/xtuner/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from ._strategy import DeepSpeedStrategy
3 | from .hooks import (
4 | DatasetInfoHook,
5 | EvaluateChatHook,
6 | ThroughputHook,
7 | VarlenAttnArgsToMessageHubHook,
8 | )
9 | from .runner import TrainLoop
10 |
11 | __all__ = [
12 | "EvaluateChatHook",
13 | "DatasetInfoHook",
14 | "ThroughputHook",
15 | "VarlenAttnArgsToMessageHubHook",
16 | "DeepSpeedStrategy",
17 | "TrainLoop",
18 | ]
19 |
--------------------------------------------------------------------------------
/xtuner/engine/_strategy/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .deepspeed import DeepSpeedStrategy
3 |
4 | __all__ = ["DeepSpeedStrategy"]
5 |
--------------------------------------------------------------------------------
/xtuner/engine/_strategy/deepspeed.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import Optional
3 |
4 | from mmengine._strategy import DeepSpeedStrategy as MMEngineDeepSpeedStrategy
5 |
6 | from xtuner import DS_CEPH_DIR
7 | from xtuner.parallel.sequence import init_sequence_parallel
8 | from xtuner.utils.device import get_device
9 | from xtuner.utils.fileio import patch_fileio
10 |
11 |
12 | class DeepSpeedStrategy(MMEngineDeepSpeedStrategy):
13 | def __init__(self, *args, **kwargs):
14 | sequence_parallel_size = kwargs.pop("sequence_parallel_size", 1)
15 | self.sequence_parallel_size = sequence_parallel_size
16 |
17 | super().__init__(*args, **kwargs)
18 |
19 | from transformers.integrations.deepspeed import HfDeepSpeedConfig
20 |
21 | # hf_deepspeed_config has to be saved as an attribute.
22 | self.hf_deepspeed_config = HfDeepSpeedConfig(self.config)
23 |
24 | def _wrap_model(self, model):
25 | wrapper = super()._wrap_model(model)
26 | # hard code for deepspeed zero3
27 | # When utilizing Zero3, the model isn't allocated to CUDA within the
28 | # `deepspeed.initialize` process.
29 | assert hasattr(wrapper.model, "data_preprocessor")
30 | wrapper.model.data_preprocessor.to(get_device())
31 | return wrapper
32 |
33 | def save_checkpoint(self, *args, **kwargs) -> None:
34 | if DS_CEPH_DIR:
35 | from os import path as osp
36 |
37 | work_dir_prefix = osp.split(self.work_dir)[0]
38 |
39 | filename = kwargs["filename"].replace(work_dir_prefix, DS_CEPH_DIR)
40 | kwargs["filename"] = filename
41 | with patch_fileio():
42 | super().save_checkpoint(*args, **kwargs)
43 | else:
44 | super().save_checkpoint(*args, **kwargs)
45 |
46 | def load_checkpoint(self, *args, **kwargs) -> None:
47 | if DS_CEPH_DIR:
48 | with patch_fileio():
49 | checkpoint = super().load_checkpoint(*args, **kwargs)
50 | else:
51 | checkpoint = super().load_checkpoint(*args, **kwargs)
52 | return checkpoint
53 |
54 | def resume(self, *args, **kwargs) -> None:
55 | if DS_CEPH_DIR:
56 | with patch_fileio():
57 | checkpoint = super().resume(*args, **kwargs)
58 | else:
59 | checkpoint = super().resume(*args, **kwargs)
60 | return checkpoint
61 |
62 | def _setup_distributed( # type: ignore
63 | self,
64 | launcher: Optional[str] = None,
65 | backend: str = "nccl",
66 | **kwargs,
67 | ):
68 | super()._setup_distributed(launcher, backend, **kwargs)
69 | init_sequence_parallel(self.sequence_parallel_size)
70 |
--------------------------------------------------------------------------------
/xtuner/engine/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dataset_info_hook import DatasetInfoHook
3 | from .evaluate_chat_hook import EvaluateChatHook
4 | from .hf_checkpoint_hook import HFCheckpointHook
5 | from .throughput_hook import ThroughputHook
6 | from .varlen_attn_args_to_messagehub_hook import VarlenAttnArgsToMessageHubHook
7 |
8 | __all__ = [
9 | "EvaluateChatHook",
10 | "DatasetInfoHook",
11 | "ThroughputHook",
12 | "VarlenAttnArgsToMessageHubHook",
13 | "HFCheckpointHook",
14 | ]
15 |
--------------------------------------------------------------------------------
/xtuner/engine/hooks/dataset_info_hook.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.hooks import Hook
3 |
4 | from xtuner.registry import BUILDER
5 | from xtuner.utils import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
6 |
7 |
8 | def split_list(lst, value):
9 | res = []
10 | tmp_res = []
11 | for i in lst:
12 | if i == value:
13 | res.append(tmp_res)
14 | tmp_res = []
15 | else:
16 | tmp_res.append(i)
17 | res.append(tmp_res)
18 | return res
19 |
20 |
21 | class DatasetInfoHook(Hook):
22 | def __init__(self, tokenizer, is_intern_repo_dataset=False):
23 | self.tokenizer = BUILDER.build(tokenizer)
24 | self.is_intern_repo_dataset = is_intern_repo_dataset
25 |
26 | def log(self, runner, dataset, mode="train"):
27 | def _log(input_ids, log_prefix=""):
28 | if self.is_intern_repo_dataset:
29 | input_ids = [abs(x) for x in input_ids]
30 | # Try to split list to be compatible with IMAGE token
31 | input_ids = split_list(input_ids, IMAGE_TOKEN_INDEX)
32 | text = log_prefix
33 | for idx, ids in enumerate(input_ids):
34 | text += self.tokenizer.decode(ids)
35 | if idx != len(input_ids) - 1:
36 | text += DEFAULT_IMAGE_TOKEN
37 | runner.logger.info(text)
38 |
39 | runner.logger.info(f"Num {mode} samples {len(dataset)}")
40 | runner.logger.info(f"{mode} example:")
41 | if "chosen_ids" in dataset[0]:
42 | _log(dataset[0]["chosen_ids"], log_prefix="chosen: ")
43 | _log(dataset[0]["rejected_ids"], log_prefix="rejected: ")
44 | else:
45 | _log(dataset[0]["input_ids"])
46 |
47 | def before_train(self, runner) -> None:
48 | do_train = runner.train_loop is not None
49 | do_eval = runner.val_loop is not None
50 | if do_train:
51 | train_dataset = runner.train_dataloader.dataset
52 | self.log(runner, train_dataset, mode="train")
53 | if do_eval:
54 | eval_dataset = runner.val_dataloader.dataset
55 | self.log(runner, eval_dataset, mode="eval")
56 |
57 | def before_val(self, runner) -> None:
58 | eval_dataset = runner.val_dataloader.dataset
59 | self.log(runner, eval_dataset, mode="eval")
60 |
61 | def before_test(self, runner) -> None:
62 | test_dataset = runner.test_dataloader.dataset
63 | self.log(runner, test_dataset, mode="test")
64 |
--------------------------------------------------------------------------------
/xtuner/engine/runner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .loops import TrainLoop
3 |
4 | __all__ = ["TrainLoop"]
5 |
--------------------------------------------------------------------------------
/xtuner/engine/runner/loops.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from typing import Dict, Optional, Union
3 |
4 | from mmengine.runner import IterBasedTrainLoop
5 | from torch.utils.data import DataLoader
6 |
7 |
8 | class TrainLoop(IterBasedTrainLoop):
9 | def __init__(
10 | self,
11 | runner,
12 | dataloader: Union[DataLoader, Dict],
13 | max_iters: Optional[int] = None,
14 | max_epochs: Union[int, float] = None,
15 | **kwargs,
16 | ) -> None:
17 | if max_iters is None and max_epochs is None:
18 | raise RuntimeError(
19 | "Please specify the `max_iters` or " "`max_epochs` in `train_cfg`."
20 | )
21 | elif max_iters is not None and max_epochs is not None:
22 | raise RuntimeError(
23 | "Only one of `max_iters` or `max_epochs` can " "exist in `train_cfg`."
24 | )
25 | else:
26 | if max_iters is not None:
27 | iters = int(max_iters)
28 | assert iters == max_iters, (
29 | "`max_iters` should be a integer " f"number, but get {max_iters}"
30 | )
31 | elif max_epochs is not None:
32 | if isinstance(dataloader, dict):
33 | diff_rank_seed = runner._randomness_cfg.get("diff_rank_seed", False)
34 | dataloader = runner.build_dataloader(
35 | dataloader, seed=runner.seed, diff_rank_seed=diff_rank_seed
36 | )
37 | iters = max_epochs * len(dataloader)
38 | else:
39 | raise NotImplementedError
40 | super().__init__(
41 | runner=runner, dataloader=dataloader, max_iters=iters, **kwargs
42 | )
43 |
--------------------------------------------------------------------------------
/xtuner/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .metrics import MMLUMetric
3 |
4 | __all__ = ["MMLUMetric"]
5 |
--------------------------------------------------------------------------------
/xtuner/evaluation/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .mmlu_metric import MMLUMetric
3 |
4 | __all__ = ["MMLUMetric"]
5 |
--------------------------------------------------------------------------------
/xtuner/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .internvl import InternVL_V1_5
3 | from .llava import LLaVAModel
4 | from .sft import SupervisedFinetune
5 |
6 | __all__ = ["SupervisedFinetune", "LLaVAModel", "InternVL_V1_5"]
7 |
--------------------------------------------------------------------------------
/xtuner/model/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .dispatch import dispatch_modules
3 | from .projector import ProjectorConfig, ProjectorModel
4 |
5 | __all__ = ["dispatch_modules", "ProjectorConfig", "ProjectorModel"]
6 |
--------------------------------------------------------------------------------
/xtuner/model/modules/dispatch/triton_kernels/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .layer_norm import layer_norm_forward
3 | from .rms_norm import rms_norm_forward
4 | from .rotary import apply_rotary_emb
5 |
6 | __all__ = ["rms_norm_forward", "layer_norm_forward", "apply_rotary_emb"]
7 |
--------------------------------------------------------------------------------
/xtuner/model/modules/dispatch/triton_kernels/layer_norm.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | import torch.nn.functional as F
4 |
5 |
6 | def layer_norm_forward(self, hidden_states):
7 | input_dtype = hidden_states.dtype
8 | hidden_states = hidden_states.to(torch.float32)
9 | hidden_states = F.layer_norm(
10 | hidden_states, (hidden_states.shape[-1],), eps=self.variance_epsilon
11 | )
12 | hidden_states = self.weight.to(torch.float32) * hidden_states
13 | return hidden_states.to(input_dtype)
14 |
--------------------------------------------------------------------------------
/xtuner/model/modules/dispatch/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | import torch.nn.functional as F
4 |
5 | try:
6 | from flash_attn.bert_padding import index_first_axis, unpad_input
7 | except ImportError:
8 | pass
9 |
10 |
11 | def _get_unpad_data(attention_mask):
12 | seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
13 | indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
14 | max_seqlen_in_batch = seqlens_in_batch.max().item()
15 | cu_seqlens = F.pad(
16 | torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0)
17 | )
18 | return (
19 | indices,
20 | cu_seqlens,
21 | max_seqlen_in_batch,
22 | )
23 |
24 |
25 | def upad_qkv(query_layer, key_layer, value_layer, attention_mask, query_length):
26 | indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
27 | batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
28 |
29 | key_layer = index_first_axis(
30 | key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim),
31 | indices_k,
32 | )
33 | value_layer = index_first_axis(
34 | value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim),
35 | indices_k,
36 | )
37 | if query_length == kv_seq_len:
38 | # Different from the origin version as sequence parallel change
39 | # the number of attention heads.
40 | query_layer = index_first_axis(
41 | query_layer.reshape(batch_size * kv_seq_len, -1, head_dim), indices_k
42 | )
43 | cu_seqlens_q = cu_seqlens_k
44 | max_seqlen_in_batch_q = max_seqlen_in_batch_k
45 | indices_q = indices_k
46 | elif query_length == 1:
47 | max_seqlen_in_batch_q = 1
48 | cu_seqlens_q = torch.arange(
49 | batch_size + 1, dtype=torch.int32, device=query_layer.device
50 | ) # There is a memcpy here, that is very bad.
51 | indices_q = cu_seqlens_q[:-1]
52 | query_layer = query_layer.squeeze(1)
53 | else:
54 | # The -q_len: slice assumes left padding.
55 | attention_mask = attention_mask[:, -query_length:]
56 | query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(
57 | query_layer, attention_mask
58 | )
59 |
60 | return (
61 | query_layer,
62 | key_layer,
63 | value_layer,
64 | indices_q,
65 | (cu_seqlens_q, cu_seqlens_k),
66 | (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
67 | )
68 |
--------------------------------------------------------------------------------
/xtuner/model/modules/projector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from transformers import AutoConfig, AutoModel
3 |
4 | from .configuration_projector import ProjectorConfig
5 | from .modeling_projector import ProjectorModel
6 |
7 | AutoConfig.register("projector", ProjectorConfig)
8 | AutoModel.register(ProjectorConfig, ProjectorModel)
9 |
10 | __all__ = ["ProjectorConfig", "ProjectorModel"]
11 |
--------------------------------------------------------------------------------
/xtuner/model/modules/projector/configuration_projector.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from transformers import PretrainedConfig
3 |
4 |
5 | class ProjectorConfig(PretrainedConfig):
6 | model_type = "projector"
7 | _auto_class = "AutoConfig"
8 |
9 | def __init__(
10 | self,
11 | visual_hidden_size=4096,
12 | llm_hidden_size=4096,
13 | depth=2,
14 | hidden_act="gelu",
15 | bias=True,
16 | **kwargs,
17 | ):
18 | self.visual_hidden_size = visual_hidden_size
19 | self.llm_hidden_size = llm_hidden_size
20 | self.depth = depth
21 | self.hidden_act = hidden_act
22 | self.bias = bias
23 | super().__init__(**kwargs)
24 |
--------------------------------------------------------------------------------
/xtuner/model/modules/projector/modeling_projector.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | import torch.nn as nn
4 | from transformers import PreTrainedModel
5 | from transformers.activations import ACT2FN
6 |
7 | from .configuration_projector import ProjectorConfig
8 |
9 |
10 | class ProjectorModel(PreTrainedModel):
11 | _auto_class = "AutoModel"
12 | config_class = ProjectorConfig
13 | base_model_prefix = "model"
14 | supports_gradient_checkpointing = True
15 |
16 | def __init__(self, config: ProjectorConfig) -> None:
17 | super().__init__(config)
18 | self.gradient_checkpointing = False
19 |
20 | modules = [
21 | nn.Linear(
22 | config.visual_hidden_size, config.llm_hidden_size, bias=config.bias
23 | )
24 | ]
25 | for _ in range(1, config.depth):
26 | modules.append(ACT2FN[config.hidden_act])
27 | modules.append(
28 | nn.Linear(
29 | config.llm_hidden_size, config.llm_hidden_size, bias=config.bias
30 | )
31 | )
32 | self.model = nn.Sequential(*modules)
33 |
34 | def enable_input_require_grads(self):
35 | def make_inputs_require_grad(module, input, output):
36 | output.requires_grad_(True)
37 |
38 | self.model.register_forward_hook(make_inputs_require_grad)
39 |
40 | def _set_gradient_checkpointing(self, module, value=False):
41 | if isinstance(module, ProjectorModel):
42 | module.gradient_checkpointing = value
43 |
44 | def forward(self, x):
45 | if self.gradient_checkpointing and self.training:
46 | layer_outputs = torch.utils.checkpoint.checkpoint(self.model, x)
47 | else:
48 | layer_outputs = self.model(x)
49 | return layer_outputs
50 |
--------------------------------------------------------------------------------
/xtuner/model/transformers_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .deepseek_v2 import DeepseekTokenizerFast, DeepseekV2Config, DeepseekV2ForCausalLM, DeepseekV2Model
2 | from .mixtral import MixtralConfig, MixtralForCausalLM, MixtralModel
3 |
4 | __all__ = [
5 | 'DeepseekTokenizerFast', 'DeepseekV2Config', 'DeepseekV2ForCausalLM', 'DeepseekV2Model', 'MixtralConfig',
6 | 'MixtralForCausalLM', 'MixtralModel'
7 | ]
8 |
--------------------------------------------------------------------------------
/xtuner/model/transformers_models/deepseek_v2/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_deepseek import DeepseekV2Config
2 | from .modeling_deepseek import DeepseekV2ForCausalLM, DeepseekV2Model
3 | from .tokenization_deepseek_fast import DeepseekTokenizerFast
4 |
5 | __all__ = ['DeepseekV2ForCausalLM', 'DeepseekV2Model', 'DeepseekV2Config', 'DeepseekTokenizerFast']
6 |
--------------------------------------------------------------------------------
/xtuner/model/transformers_models/deepseek_v2/tokenization_deepseek_fast.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional, Union
2 |
3 | from transformers.models.llama import LlamaTokenizerFast
4 |
5 |
6 | class DeepseekTokenizerFast(LlamaTokenizerFast):
7 |
8 | def convert_ids_to_tokens(self,
9 | ids: Union[int, List[int]],
10 | skip_special_tokens: bool = False) -> Union[str, List[str]]:
11 | """Converts a single index or a sequence of indices in a token or a
12 | sequence of tokens, using the vocabulary and added tokens.
13 |
14 | Args:
15 | ids (`int` or `List[int]`):
16 | The token id (or token ids) to convert to tokens.
17 | skip_special_tokens (`bool`, *optional*, defaults to `False`):
18 | Whether or not to remove special tokens in the decoding.
19 |
20 | Returns:
21 | `str` or `List[str]`: The decoded token(s).
22 | """
23 | if isinstance(ids, int):
24 | return self._convert_id_to_token(ids)
25 | tokens = []
26 | for index in ids:
27 | index = int(index)
28 | if skip_special_tokens and index in self.all_special_ids:
29 | continue
30 | token = self._tokenizer.id_to_token(index)
31 | tokens.append(token if token is not None else '')
32 | return tokens
33 |
34 | def _convert_id_to_token(self, index: int) -> Optional[str]:
35 | token = self._tokenizer.id_to_token(int(index))
36 | return token if token is not None else ''
37 |
--------------------------------------------------------------------------------
/xtuner/model/transformers_models/mixtral/__init__.py:
--------------------------------------------------------------------------------
1 | from .configuration_mixtral import MixtralConfig
2 | from .modeling_mixtral import MixtralForCausalLM, MixtralModel
3 |
4 | __all__ = ['MixtralForCausalLM', 'MixtralModel', 'MixtralConfig']
5 |
--------------------------------------------------------------------------------
/xtuner/parallel/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .sequence import * # noqa: F401, F403
3 |
--------------------------------------------------------------------------------
/xtuner/parallel/sequence/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.dist import init_dist
3 |
4 | from .attention import (
5 | post_process_for_sequence_parallel_attn,
6 | pre_process_for_sequence_parallel_attn,
7 | sequence_parallel_wrapper,
8 | )
9 | from .comm import (
10 | all_to_all,
11 | gather_for_sequence_parallel,
12 | gather_forward_split_backward,
13 | split_for_sequence_parallel,
14 | split_forward_gather_backward,
15 | )
16 | from .data_collate import (
17 | pad_cumulative_len_for_sequence_parallel,
18 | pad_for_sequence_parallel,
19 | )
20 | from .reduce_loss import reduce_sequence_parallel_loss
21 | from .sampler import SequenceParallelSampler
22 | from .setup_distributed import (
23 | get_data_parallel_group,
24 | get_data_parallel_rank,
25 | get_data_parallel_world_size,
26 | get_inner_sequence_parallel_group,
27 | get_inner_sequence_parallel_rank,
28 | get_inner_sequence_parallel_world_size,
29 | get_sequence_parallel_group,
30 | get_sequence_parallel_rank,
31 | get_sequence_parallel_world_size,
32 | init_inner_sequence_parallel,
33 | init_sequence_parallel,
34 | is_inner_sequence_parallel_initialized,
35 | )
36 |
37 | __all__ = [
38 | "sequence_parallel_wrapper",
39 | "pre_process_for_sequence_parallel_attn",
40 | "post_process_for_sequence_parallel_attn",
41 | "pad_for_sequence_parallel",
42 | "split_for_sequence_parallel",
43 | "SequenceParallelSampler",
44 | "init_sequence_parallel",
45 | "get_sequence_parallel_group",
46 | "get_sequence_parallel_world_size",
47 | "get_sequence_parallel_rank",
48 | "get_data_parallel_group",
49 | "get_data_parallel_world_size",
50 | "get_data_parallel_rank",
51 | "reduce_sequence_parallel_loss",
52 | "init_dist",
53 | "all_to_all",
54 | "gather_for_sequence_parallel",
55 | "split_forward_gather_backward",
56 | "gather_forward_split_backward",
57 | "get_inner_sequence_parallel_group",
58 | "get_inner_sequence_parallel_rank",
59 | "get_inner_sequence_parallel_world_size",
60 | "init_inner_sequence_parallel",
61 | "is_inner_sequence_parallel_initialized",
62 | "pad_cumulative_len_for_sequence_parallel",
63 | ]
64 |
--------------------------------------------------------------------------------
/xtuner/parallel/sequence/data_collate.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 |
4 | from .setup_distributed import get_sequence_parallel_world_size
5 |
6 |
7 | def pad_for_sequence_parallel(tensor, padding_value, dim=-1):
8 | length = tensor.shape[dim]
9 | seq_parallel_world_size = get_sequence_parallel_world_size()
10 | if length % seq_parallel_world_size == 0:
11 | return tensor
12 |
13 | pad_num = seq_parallel_world_size - (length % seq_parallel_world_size)
14 | pad_shape = (
15 | (*tensor.shape[:dim], pad_num, *tensor.shape[dim + 1 :])
16 | if dim != -1
17 | else (*tensor.shape[:dim], pad_num)
18 | )
19 | pad = torch.full(pad_shape, padding_value, dtype=tensor.dtype, device=tensor.device)
20 | tensor = torch.cat([tensor, pad], dim=dim)
21 | return tensor
22 |
23 |
24 | # This function only meets the following two conditions:
25 | # 1. use_varlen_attn = True
26 | # 2. pack_to_max_length = True and the lengths of each sequence are different
27 | def pad_cumulative_len_for_sequence_parallel(cumulative_len):
28 | assert len(cumulative_len) == 1
29 | seqlen = cumulative_len[0][-1]
30 | seq_parallel_world_size = get_sequence_parallel_world_size()
31 | if seqlen % seq_parallel_world_size == 0:
32 | return cumulative_len, None
33 |
34 | bs = len(cumulative_len)
35 | pad_len = seq_parallel_world_size - (seqlen % seq_parallel_world_size)
36 | seqlen_new = seqlen + pad_len
37 | attention_mask = torch.zeros(
38 | bs, seqlen_new, dtype=torch.bool, device=cumulative_len[0].device
39 | )
40 | attention_mask[:, :seqlen] = True
41 |
42 | for i, cu_len in enumerate(cumulative_len):
43 | pad = torch.tensor([seqlen_new], device=cu_len.device, dtype=cu_len.dtype)
44 | cumulative_len[i] = torch.cat([cu_len, pad], dim=0)
45 |
46 | return cumulative_len, attention_mask
47 |
--------------------------------------------------------------------------------
/xtuner/parallel/sequence/reduce_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import torch
3 | import torch.distributed as dist
4 |
5 | from .setup_distributed import get_sequence_parallel_group
6 |
7 |
8 | class _ReduceLoss(torch.autograd.Function):
9 | @staticmethod
10 | def forward(ctx, mean_loss, loss_scale, process_group):
11 | ctx.mode = process_group
12 | if loss_scale == 0:
13 | # convert nan to 0 just for logging
14 | mean_loss = torch.nan_to_num(mean_loss)
15 | loss_sum = mean_loss * loss_scale
16 | dist.all_reduce(loss_sum, group=process_group)
17 | dist.all_reduce(loss_scale, group=process_group)
18 | loss = loss_sum / loss_scale
19 | return loss
20 |
21 | @staticmethod
22 | def backward(ctx, grad_output):
23 | return grad_output, None, None
24 |
25 |
26 | def reduce_sequence_parallel_loss(
27 | mean_loss, loss_scale, sp_group: dist.ProcessGroup = None
28 | ):
29 | if dist.get_world_size(sp_group) == 1:
30 | return mean_loss
31 | if sp_group is None:
32 | # avoid bc breaking
33 | sp_group = get_sequence_parallel_group()
34 | return _ReduceLoss.apply(mean_loss, loss_scale, sp_group)
35 |
--------------------------------------------------------------------------------
/xtuner/parallel/sequence/sampler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import math
3 | from typing import Optional, Sized
4 |
5 | from mmengine.dataset import DefaultSampler
6 | from mmengine.dist import sync_random_seed
7 |
8 | from .setup_distributed import get_data_parallel_rank, get_data_parallel_world_size
9 |
10 |
11 | class SequenceParallelSampler(DefaultSampler):
12 | def __init__(
13 | self,
14 | dataset: Sized,
15 | shuffle: bool = True,
16 | seed: Optional[int] = None,
17 | round_up: bool = True,
18 | ) -> None:
19 | rank = get_data_parallel_rank()
20 | world_size = get_data_parallel_world_size()
21 | self.rank = rank
22 | self.world_size = world_size
23 |
24 | self.dataset = dataset
25 | self.shuffle = shuffle
26 | if seed is None:
27 | seed = sync_random_seed()
28 | self.seed = seed
29 | self.epoch = 0
30 | self.round_up = round_up
31 |
32 | if self.round_up:
33 | self.num_samples = math.ceil(len(self.dataset) / world_size)
34 | self.total_size = self.num_samples * self.world_size
35 | else:
36 | self.num_samples = math.ceil((len(self.dataset) - rank) / world_size)
37 | self.total_size = len(self.dataset)
38 |
--------------------------------------------------------------------------------
/xtuner/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from mmengine.registry import Registry
3 |
4 | __all__ = ["BUILDER", "MAP_FUNC"]
5 |
6 | BUILDER = Registry("builder")
7 | MAP_FUNC = Registry("map_fn")
8 |
--------------------------------------------------------------------------------
/xtuner/tools/copy_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os.path as osp
4 | import shutil
5 |
6 | from mmengine.utils import mkdir_or_exist
7 |
8 | from xtuner.configs import cfgs_name_path
9 |
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("config_name", help="config name")
14 | parser.add_argument("save_dir", help="save directory for copied config")
15 | args = parser.parse_args()
16 | return args
17 |
18 |
19 | def add_copy_suffix(string):
20 | file_name, ext = osp.splitext(string)
21 | return f"{file_name}_copy{ext}"
22 |
23 |
24 | def main():
25 | args = parse_args()
26 | mkdir_or_exist(args.save_dir)
27 | config_path = cfgs_name_path[args.config_name]
28 | save_path = osp.join(args.save_dir, add_copy_suffix(osp.basename(config_path)))
29 | shutil.copyfile(config_path, save_path)
30 | print(f"Copy to {save_path}")
31 |
32 |
33 | if __name__ == "__main__":
34 | main()
35 |
--------------------------------------------------------------------------------
/xtuner/tools/data_preprocess/arxiv.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import json
4 | from datetime import datetime
5 |
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument("src_file", help="source file path")
10 | parser.add_argument("dst_file", help="destination file path")
11 | parser.add_argument(
12 | "--categories",
13 | nargs="+",
14 | default=["cs.AI", "cs.CL", "cs.CV"],
15 | help="target categories",
16 | )
17 | parser.add_argument(
18 | "--start-date", default="2020-01-01", help="start date (format: YYYY-MM-DD)"
19 | )
20 |
21 | args = parser.parse_args()
22 | return args
23 |
24 |
25 | def has_intersection(list1, list2):
26 | set1 = set(list1)
27 | set2 = set(list2)
28 | return len(set1.intersection(set2)) > 0
29 |
30 |
31 | def read_json_file(file_path):
32 | data = []
33 | with open(file_path) as file:
34 | for line in file:
35 | try:
36 | json_data = json.loads(line)
37 | data.append(json_data)
38 | except json.JSONDecodeError:
39 | print(f"Failed to parse line: {line}")
40 | return data
41 |
42 |
43 | def main():
44 | args = parse_args()
45 | json_data = read_json_file(args.src_file)
46 | from_time = datetime.strptime(args.start_date, "%Y-%m-%d")
47 | filtered_data = [
48 | item
49 | for item in json_data
50 | if has_intersection(args.categories, item["categories"].split())
51 | and datetime.strptime(item["update_date"], "%Y-%m-%d") >= from_time
52 | ]
53 |
54 | with open(args.dst_file, "w") as file:
55 | json.dump(filtered_data, file)
56 |
57 | print(f"Save to {args.dst_file}\n{len(filtered_data)} items")
58 |
59 |
60 | if __name__ == "__main__":
61 | main()
62 |
--------------------------------------------------------------------------------
/xtuner/tools/data_preprocess/convert_refcoco.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import json
4 |
5 | from xtuner.dataset.refcoco_json import RefCOCOJsonDataset
6 |
7 |
8 | def parse_args():
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument(
11 | "--ann-path",
12 | default="data/refcoco_annotations",
13 | help="Refcoco annotation path",
14 | )
15 | parser.add_argument(
16 | "--image-path",
17 | default="data/llava_data/llava_images/coco/train2017",
18 | help="COCO image path",
19 | )
20 | parser.add_argument(
21 | "--save-path", default="./", help="The folder to save converted data"
22 | )
23 | args = parser.parse_args()
24 | return args
25 |
26 |
27 | if __name__ == "__main__":
28 | args = parse_args()
29 |
30 | data_info = [
31 | ("refcoco", "unc"),
32 | ("refcoco+", "unc"),
33 | ("refcocog", "umd"),
34 | ]
35 | all_data = []
36 | for dataset, split in data_info:
37 | data = RefCOCOJsonDataset.get_data_json(
38 | ann_path=args.ann_path,
39 | image_path=args.image_path,
40 | dataset=dataset,
41 | splitBy=split,
42 | )[0]
43 | all_data.extend(data)
44 | save_path = args.save_path + "/train.json"
45 | with open(save_path, "w") as f:
46 | print(f"save to {save_path} with {len(all_data)} items.")
47 | print(all_data[0])
48 | json.dump(all_data, f, indent=4)
49 |
--------------------------------------------------------------------------------
/xtuner/tools/get_data_order.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os
4 |
5 |
6 | def parse_args():
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument("--data-folder", help="Data folder")
9 | parser.add_argument("--save-folder", help="The folder to save data order.")
10 | parser.add_argument(
11 | "--file-type",
12 | default=".bin",
13 | help="We want to get the order of the file in this type.",
14 | )
15 | args = parser.parse_args()
16 | return args
17 |
18 |
19 | def save_data_order(data_folder, save_folder, file_type=".bin"):
20 | assert os.path.exists(data_folder), f"{data_folder} does not exist."
21 | triples = list(os.walk(data_folder, followlinks=True))
22 | data_order = []
23 | for root, dirs, files in triples:
24 | dirs.sort()
25 | print(f"Reading {root}...")
26 | for fn in sorted(files):
27 | if fn.endswith(file_type):
28 | fp = os.path.join(root, fn)
29 | # Using relative paths so that you can get the same result
30 | # on different clusters
31 | fp = fp.replace(data_folder, "")[1:]
32 | data_order.append(fp)
33 |
34 | save_path = os.path.join(save_folder, "data_order.txt")
35 | with open(save_path, "w") as f:
36 | for fp in data_order:
37 | f.write(fp + "\n")
38 |
39 |
40 | if __name__ == "__main__":
41 | args = parse_args()
42 | save_data_order(args.data_folder, args.save_folder, args.file_type)
43 |
--------------------------------------------------------------------------------
/xtuner/tools/list_cfg.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 |
4 | from xtuner.configs import cfgs_name_path
5 |
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument(
10 | "-p", "--pattern", default=None, help="Pattern for fuzzy matching"
11 | )
12 | args = parser.parse_args()
13 | return args
14 |
15 |
16 | def main(pattern=None):
17 | args = parse_args()
18 | configs_names = sorted(list(cfgs_name_path.keys()))
19 | print("==========================CONFIGS===========================")
20 | if args.pattern is not None:
21 | print(f"PATTERN: {args.pattern}")
22 | print("-------------------------------")
23 | for name in configs_names:
24 | if args.pattern is None or args.pattern.lower() in name.lower():
25 | print(name)
26 | print("=============================================================")
27 |
28 |
29 | if __name__ == "__main__":
30 | main()
31 |
--------------------------------------------------------------------------------
/xtuner/tools/list_dataset_format.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from xtuner.dataset.map_fns import DATASET_FORMAT_MAPPING
3 |
4 |
5 | def main():
6 | dataset_format = DATASET_FORMAT_MAPPING.keys()
7 | print("======================DATASET_FORMAT======================")
8 | for format in dataset_format:
9 | print(format)
10 | print("==========================================================")
11 |
12 |
13 | if __name__ == "__main__":
14 | main()
15 |
--------------------------------------------------------------------------------
/xtuner/tools/log_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 |
4 | from mmengine.config import Config
5 |
6 | from xtuner.registry import BUILDER
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(description="Log processed dataset.")
11 | parser.add_argument("config", help="config file name or path.")
12 | # chose which kind of dataset style to show
13 | parser.add_argument(
14 | "--show",
15 | default="text",
16 | choices=["text", "masked_text", "input_ids", "labels", "all"],
17 | help="which kind of dataset style to show",
18 | )
19 | args = parser.parse_args()
20 | return args
21 |
22 |
23 | def main():
24 | args = parse_args()
25 |
26 | cfg = Config.fromfile(args.config)
27 |
28 | tokenizer = BUILDER.build(cfg.tokenizer)
29 | if cfg.get("framework", "mmengine").lower() == "huggingface":
30 | train_dataset = BUILDER.build(cfg.train_dataset)
31 | else:
32 | train_dataset = BUILDER.build(cfg.train_dataloader.dataset)
33 |
34 | if args.show == "text" or args.show == "all":
35 | print("#" * 20 + " text " + "#" * 20)
36 | print(tokenizer.decode(train_dataset[0]["input_ids"]))
37 | if args.show == "masked_text" or args.show == "all":
38 | print("#" * 20 + " text(masked) " + "#" * 20)
39 | masked_text = " ".join(
40 | ["[-100]" for i in train_dataset[0]["labels"] if i == -100]
41 | )
42 | unmasked_text = tokenizer.decode(
43 | [i for i in train_dataset[0]["labels"] if i != -100]
44 | )
45 | print(masked_text + " " + unmasked_text)
46 | if args.show == "input_ids" or args.show == "all":
47 | print("#" * 20 + " input_ids " + "#" * 20)
48 | print(train_dataset[0]["input_ids"])
49 | if args.show == "labels" or args.show == "all":
50 | print("#" * 20 + " labels " + "#" * 20)
51 | print(train_dataset[0]["labels"])
52 |
53 |
54 | if __name__ == "__main__":
55 | main()
56 |
--------------------------------------------------------------------------------
/xtuner/tools/model_converters/modeling_internlm2_reward/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
--------------------------------------------------------------------------------
/xtuner/tools/model_converters/split.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import copy
4 | import json
5 | import os
6 | import os.path as osp
7 | import shutil
8 |
9 | import torch
10 | from mmengine.utils import mkdir_or_exist
11 |
12 | from xtuner.utils.device import get_device_name, get_torch_device
13 |
14 |
15 | def parse_args():
16 | parser = argparse.ArgumentParser(
17 | description="Split a HuggingFace model to the smallest sharded one"
18 | )
19 | parser.add_argument("src_dir", help="the directory of the model")
20 | parser.add_argument("dst_dir", help="the directory to save the new model")
21 | args = parser.parse_args()
22 | return args
23 |
24 |
25 | def main():
26 | args = parse_args()
27 | mkdir_or_exist(args.dst_dir)
28 |
29 | all_files = os.listdir(args.src_dir)
30 | for name in all_files:
31 | if not name.startswith(("pytorch_model", ".")):
32 | src_path = osp.join(args.src_dir, name)
33 | dst_path = osp.join(args.dst_dir, name)
34 | shutil.copy(src_path, dst_path)
35 |
36 | with open(osp.join(args.src_dir, "pytorch_model.bin.index.json")) as f:
37 | index = json.load(f)
38 |
39 | n_shard = len(index["weight_map"])
40 | new_index = copy.deepcopy(index)
41 | new_index["weight_map"] = {}
42 | cnt = 1
43 |
44 | checkpoints = set(index["weight_map"].values())
45 | for ckpt in checkpoints:
46 | state_dict = torch.load(
47 | osp.join(args.src_dir, ckpt), map_location=get_device_name()
48 | )
49 | keys = sorted(list(state_dict.keys()))
50 | for k in keys:
51 | new_state_dict_name = f"pytorch_model-{cnt:05d}-of-{n_shard:05d}.bin"
52 | new_index["weight_map"][k] = new_state_dict_name
53 | new_state_dict = {k: state_dict[k]}
54 | torch.save(new_state_dict, osp.join(args.dst_dir, new_state_dict_name))
55 | cnt += 1
56 | del state_dict
57 | get_torch_device().empty_cache()
58 | with open(osp.join(args.dst_dir, "pytorch_model.bin.index.json"), "w") as f:
59 | json.dump(new_index, f)
60 | assert (
61 | new_index["weight_map"].keys() == index["weight_map"].keys()
62 | ), "Mismatch on `weight_map`!"
63 |
64 |
65 | if __name__ == "__main__":
66 | main()
67 |
--------------------------------------------------------------------------------
/xtuner/tools/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .api import plugins_api
3 |
4 | __all__ = ["plugins_api"]
5 |
--------------------------------------------------------------------------------
/xtuner/tools/plugins/api.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import re
3 |
4 |
5 | def plugins_api(input_str, calculate_open=True, solve_open=True, search_open=True):
6 | pattern = r'(Solve|solve|Solver|solver|Calculate|calculate|Calculator|calculator|Search)\("([^"]*)"\)' # noqa: E501
7 |
8 | matches = re.findall(pattern, input_str)
9 |
10 | converted_str = "<|Results|>:\n"
11 |
12 | for i in range(len(matches)):
13 | if matches[i][0] in ["Calculate", "calculate" "Calculator", "calculator"]:
14 | if calculate_open:
15 | from .calculate import Calculate
16 |
17 | result = Calculate(matches[i][1])
18 | else:
19 | result = None
20 | converted_str += f'Calculate("{matches[i][1]}") => {result}\n'
21 | elif matches[i][0] in ["Solve", "solve", "Solver", "solver"]:
22 | if solve_open:
23 | from .solve import Solve
24 |
25 | result = Solve(matches[i][1])
26 | else:
27 | result = None
28 | converted_str += f'Solve("{matches[i][1]}") =>\n{result}\n'
29 | elif matches[i][0] == "Search":
30 | if search_open:
31 | from .search import Search
32 |
33 | result = Search(matches[i][1])
34 | else:
35 | result = None
36 | converted_str += f'Search("{matches[i][1]}") =>\n{result}'
37 |
38 | converted_str += "\n"
39 | return converted_str
40 |
--------------------------------------------------------------------------------
/xtuner/tools/plugins/calculate.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from math import * # noqa: F401, F403
3 |
4 |
5 | def Calculate(expression):
6 | res = ""
7 | for exp in expression.split(";"):
8 | try:
9 | res += "{:.2f};".format(eval(exp.replace("^", "**")))
10 | except Exception:
11 | res += "No result."
12 | if res[-1] == ";":
13 | res = res[:-1]
14 | return res
15 |
--------------------------------------------------------------------------------
/xtuner/tools/plugins/search.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import os
3 | import sys
4 |
5 | import requests
6 |
7 | try:
8 | SERPER_API_KEY = os.environ["SERPER_API_KEY"]
9 | except Exception:
10 | print(
11 | "Please obtain the `SERPER_API_KEY` from https://serper.dev and "
12 | "set it using `export SERPER_API_KEY=xxx`."
13 | )
14 | sys.exit(1)
15 |
16 |
17 | def parse_results(results, k=10):
18 | snippets = []
19 |
20 | for result in results["organic"][:k]:
21 | if "snippet" in result:
22 | snippets.append(result["snippet"])
23 | for attribute, value in result.get("attributes", {}).items():
24 | snippets.append(f"{attribute}: {value}.")
25 | return snippets
26 |
27 |
28 | def search(api_key, search_term, **kwargs):
29 | headers = {
30 | "X-API-KEY": api_key,
31 | "Content-Type": "application/json",
32 | }
33 | params = {
34 | "q": search_term,
35 | **{key: value for key, value in kwargs.items() if value is not None},
36 | }
37 | try:
38 | response = requests.post(
39 | "https://google.serper.dev/search",
40 | headers=headers,
41 | params=params,
42 | timeout=5,
43 | )
44 | except Exception as e:
45 | return -1, str(e)
46 | return response.status_code, response.json()
47 |
48 |
49 | def Search(q, k=10):
50 | status_code, response = search(SERPER_API_KEY, q)
51 | if status_code != 200:
52 | ret = "None\n"
53 | else:
54 | text = parse_results(response, k=k)
55 | ret = ""
56 | for idx, res in enumerate(text):
57 | ret += f"<|{idx+1}|>: '{res}'\n"
58 | return ret
59 |
--------------------------------------------------------------------------------
/xtuner/tools/plugins/solve.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import math
3 | import re
4 | from math import * # noqa: F401, F403
5 |
6 | from sympy import Eq, solve, symbols
7 |
8 | from .calculate import Calculate
9 |
10 |
11 | def Solve(equations_str):
12 | try:
13 | equations_str = equations_str.replace(" ", "")
14 | equations_ori = re.split(r"[,;]+", equations_str)
15 | equations_str = equations_str.replace("^", "**")
16 | equations_str = re.sub(r"(\(.*\))([a-zA-Z])", r"\1 * \2", equations_str)
17 | equations_str = re.sub(r"(\d+)([a-zA-Z])", r"\1 * \2", equations_str)
18 | equations_str = equations_str.replace("pi", str(math.pi))
19 | equations = re.split(r"[,;]+", equations_str)
20 | vars_list = list(set(re.findall(r"[a-zA-Z]+", equations_str)))
21 | vars = {var: symbols(var) for var in vars_list}
22 |
23 | output = ""
24 | eqs = []
25 | for eq in equations:
26 | if "=" in eq:
27 | left, right = eq.split("=")
28 | eqs.append(
29 | Eq(eval(left.strip(), {}, vars), eval(right.strip(), {}, vars))
30 | )
31 | solutions = solve(eqs, vars, dict=True)
32 |
33 | vars_values = {var: [] for var in vars_list}
34 | if isinstance(solutions, list):
35 | for idx, solution in enumerate(solutions):
36 | for var, sol in solution.items():
37 | output += f"{var}_{idx} = {sol}\n"
38 | vars_values[str(var)].append(sol)
39 | else:
40 | for var, sol in solutions.items():
41 | output += f"{var} = {sol}\n"
42 | vars_values[str(var)].append(sol)
43 | for eq, eq_o in zip(equations, equations_ori):
44 | if "=" not in eq:
45 | for var in vars_list:
46 | need_note = True if len(vars_values[var]) > 1 else False
47 | for idx, value in enumerate(vars_values[var]):
48 | eq_to_calc = eq.replace(var, str(value))
49 | calc_result = Calculate(eq_to_calc)
50 | if need_note:
51 | eq_name = eq_o.replace(var, f"{var}_{idx}")
52 | else:
53 | eq_name = eq_o
54 | if calc_result != "No results.":
55 | output += f"{eq_name} = {calc_result}\n"
56 |
57 | return output.strip()
58 | except Exception:
59 | return "No result."
60 |
--------------------------------------------------------------------------------
/xtuner/tools/process_untokenized_llava_data.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import warnings
4 |
5 | from mmengine import Config
6 |
7 | from xtuner.registry import BUILDER
8 |
9 | # ignore FutureWarning in hf datasets
10 | warnings.simplefilter(action="ignore", category=FutureWarning)
11 |
12 |
13 | def parse_args():
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument("config", help="config file name or path.")
16 | parser.add_argument("--save-folder", help="The folder to save data order.")
17 | args = parser.parse_args()
18 | return args
19 |
20 |
21 | def build_llava_dataset(config):
22 | dataset = BUILDER.build(config.train_dataloader.dataset)
23 | return dataset
24 |
25 |
26 | if __name__ == "__main__":
27 | args = parse_args()
28 | cfg = Config.fromfile(args.config)
29 |
30 | llava_dataset = build_llava_dataset(cfg)
31 | text_data = llava_dataset.text_data
32 |
33 | text_data.save_to_disk(args.save_folder)
34 |
--------------------------------------------------------------------------------
/xtuner/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .constants import (
3 | DEFAULT_IMAGE_TOKEN,
4 | DEFAULT_PAD_TOKEN_INDEX,
5 | IGNORE_INDEX,
6 | IMAGE_TOKEN_INDEX,
7 | )
8 | from .handle_moe_load_and_save import (
9 | SUPPORT_MODELS,
10 | get_origin_state_dict,
11 | load_state_dict_into_model,
12 | )
13 | from .stop_criteria import StopWordStoppingCriteria
14 | from .templates import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
15 |
16 | __all__ = [
17 | "IGNORE_INDEX",
18 | "DEFAULT_PAD_TOKEN_INDEX",
19 | "PROMPT_TEMPLATE",
20 | "DEFAULT_IMAGE_TOKEN",
21 | "SYSTEM_TEMPLATE",
22 | "StopWordStoppingCriteria",
23 | "IMAGE_TOKEN_INDEX",
24 | "load_state_dict_into_model",
25 | "get_origin_state_dict",
26 | "SUPPORT_MODELS",
27 | ]
28 |
--------------------------------------------------------------------------------
/xtuner/utils/constants.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | IGNORE_INDEX = -100
3 | DEFAULT_PAD_TOKEN_INDEX = 0
4 | IMAGE_TOKEN_INDEX = -200
5 | DEFAULT_IMAGE_TOKEN = ""
6 |
--------------------------------------------------------------------------------
/xtuner/utils/device.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # This code is inspired by the torchtune.
3 | # https://github.com/pytorch/torchtune/blob/main/torchtune/utils/_device.py
4 |
5 | import logging
6 | from typing import Optional
7 |
8 | import torch
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | def is_torch_npu_available() -> bool:
14 | """Check the availability of NPU."""
15 | try:
16 | import torch_npu # noqa: F401
17 |
18 | return torch.npu.is_available()
19 | except ImportError:
20 | return False
21 |
22 |
23 | is_cuda_available = torch.cuda.is_available()
24 | is_npu_available = is_torch_npu_available()
25 |
26 |
27 | def get_device_name() -> str:
28 | """Function that gets the torch.device based on the current machine.
29 |
30 | This currently only supports CPU, CUDA, NPU.
31 |
32 | Returns:
33 | device
34 | """
35 | if is_cuda_available:
36 | device = "cuda"
37 | elif is_npu_available:
38 | device = "npu"
39 | else:
40 | device = "cpu"
41 | return device
42 |
43 |
44 | def get_device(device_name: Optional[str] = None) -> torch.device:
45 | """Function that takes an optional device string, verifies it's correct and
46 | available given the machine and distributed settings, and returns a
47 | :func:`~torch.device`. If device string is not provided, this function will
48 | infer the device based on the environment.
49 |
50 | If CUDA-like is available and being used, this function also sets the CUDA-like device.
51 |
52 | Args:
53 | device (Optional[str]): The name of the device to use, e.g. "cuda" or "cpu" or "npu".
54 |
55 | Example:
56 | >>> device = get_device("cuda")
57 | >>> device
58 | device(type='cuda', index=0)
59 |
60 | Returns:
61 | torch.device: Device
62 | """
63 | if device_name is None:
64 | device_name = get_device_name()
65 | device = torch.device(device_name)
66 | return device
67 |
68 |
69 | def get_torch_device() -> any:
70 | """Return the corresponding torch attribute based on the device type
71 | string.
72 |
73 | Returns:
74 | module: The corresponding torch device namespace, or torch.cuda if not found.
75 | """
76 | device_name = get_device_name()
77 | try:
78 | return getattr(torch, device_name)
79 | except AttributeError:
80 | logger.warning(
81 | f"Device namespace '{device_name}' not found in torch, try to load torch.cuda."
82 | )
83 | return torch.cuda
84 |
--------------------------------------------------------------------------------
/xtuner/utils/stop_criteria.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from transformers import StoppingCriteria
3 |
4 |
5 | class StopWordStoppingCriteria(StoppingCriteria):
6 | """StopWord stopping criteria."""
7 |
8 | def __init__(self, tokenizer, stop_word):
9 | self.tokenizer = tokenizer
10 | self.stop_word = stop_word
11 | self.length = len(self.stop_word)
12 |
13 | def __call__(self, input_ids, *args, **kwargs) -> bool:
14 | cur_text = self.tokenizer.decode(input_ids[0])
15 | cur_text = cur_text.replace("\r", "").replace("\n", "")
16 | return cur_text[-self.length :] == self.stop_word
17 |
--------------------------------------------------------------------------------
/xtuner/version.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | __version__ = "0.2.0rc0"
3 | short_version = __version__
4 |
5 |
6 | def parse_version_info(version_str):
7 | """Parse a version string into a tuple.
8 |
9 | Args:
10 | version_str (str): The version string.
11 | Returns:
12 | tuple[int or str]: The version info, e.g., "1.3.0" is parsed into
13 | (1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1').
14 | """
15 | version_info = []
16 | for x in version_str.split("."):
17 | if x.isdigit():
18 | version_info.append(int(x))
19 | elif x.find("rc") != -1:
20 | patch_version = x.split("rc")
21 | version_info.append(int(patch_version[0]))
22 | version_info.append(f"rc{patch_version[1]}")
23 | return tuple(version_info)
24 |
25 |
26 | version_info = parse_version_info(__version__)
27 |
--------------------------------------------------------------------------------