├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── eval ├── README.md ├── configs │ └── eval_qwen2_linear_moe.py ├── lm_eval_linear_moe.sh ├── models │ └── qwen2_linear_moe.py └── run.py ├── examples ├── linear_llama3 │ ├── README.md │ ├── pretrain_llama.py │ └── run_pretrain_mcore_llama.sh ├── linear_moe_deepseek_v2 │ ├── README.md │ ├── pretrain_deepseek.py │ └── run_pretrain_deepseek.sh ├── linear_moe_mixtral │ ├── README.md │ ├── pretrain_mcore_mistral.py │ └── run_pretrain_mistral.sh └── linear_moe_qwen2 │ ├── README.md │ ├── check_tensorboard.sh │ ├── evaluate_huggingface_qwen.py │ ├── evaluate_mcore_qwen.py │ ├── pretrain_qwen.py │ ├── run_evaluate_huggingface_qwen.sh │ ├── run_evaluate_mcore_qwen.sh │ └── run_pretrain_qwen.sh ├── images ├── linear-moe-fig1.png └── linear-moe-fig2.png ├── linear_moe ├── __init__.py ├── arguments.py ├── data │ ├── __init__.py │ ├── bloom.py │ ├── glm.py │ ├── llama.py │ ├── llava │ │ ├── constants.py │ │ ├── conversation.py │ │ ├── cvcuda_image_processing_clip.py │ │ ├── mm_pretrain_dataset.py │ │ └── mm_utils.py │ ├── qwen_vl.py │ ├── starcoder.py │ └── utils.py ├── finetune_utils.py ├── generation │ ├── api.py │ ├── generation.py │ ├── gpt_predictor.py │ ├── megatron.md │ └── tokenization.py ├── initialize.py ├── lm_evaluate.py ├── model │ ├── __init__.py │ ├── common_modules │ │ ├── __init__.py │ │ ├── activations.py │ │ ├── feature_map.py │ │ ├── l2norm.py │ │ ├── layernorm.py │ │ └── rotary.py │ ├── deepseek_v2 │ │ ├── __init__.py │ │ ├── hybrid │ │ │ ├── hybrid_model.py │ │ │ └── hybrid_transformer_block.py │ │ ├── layer_specs.py │ │ ├── model.py │ │ ├── moe │ │ │ ├── __init__.py │ │ │ ├── experts.py │ │ │ ├── moe_layer.py │ │ │ ├── router.py │ │ │ ├── router_old.py │ │ │ └── token_dispatcher.py │ │ ├── rms_norm.py │ │ ├── transformer │ │ │ ├── attention.py │ │ │ └── mlp.py │ │ ├── transformer_block.py │ │ ├── transformer_config.py │ │ ├── transformer_layer.py │ │ └── yarn_rotary_pos_embedding.py │ ├── llama3 │ │ ├── __init__.py │ │ ├── gpt_model.py │ │ ├── hybrid │ │ │ ├── hybrid_model.py │ │ │ └── hybrid_transformer_block.py │ │ ├── language_model.py │ │ ├── layer_specs.py │ │ ├── model.py │ │ ├── rms_norm.py │ │ ├── transformer │ │ │ ├── attention.py │ │ │ └── mlp.py │ │ ├── transformer_config.py │ │ ├── transformer_layer.py │ │ └── transformer_legacy.py │ ├── mixtral │ │ ├── __init__.py │ │ ├── hybrid │ │ │ ├── hybrid_model.py │ │ │ └── hybrid_transformer_block.py │ │ ├── layer_specs.py │ │ ├── model.py │ │ ├── moe │ │ │ ├── __init__.py │ │ │ ├── experts.py │ │ │ ├── grouped_gemm_util.py │ │ │ ├── moe_layer.py │ │ │ ├── moe_utils.py │ │ │ ├── router.py │ │ │ └── token_dispatcher.py │ │ ├── rms_norm.py │ │ ├── transformer │ │ │ ├── attention.py │ │ │ └── mlp.py │ │ ├── transformer_block.py │ │ ├── transformer_config.py │ │ └── transformer_layer.py │ └── qwen2 │ │ ├── hybrid │ │ ├── hybrid_model.py │ │ └── hybrid_transformer_block.py │ │ ├── layer_specs.py │ │ ├── model.py │ │ ├── moe │ │ ├── __init__.py │ │ ├── experts.py │ │ ├── moe_layer.py │ │ ├── router.py │ │ └── token_dispatcher.py │ │ ├── rms_norm.py │ │ ├── transformer │ │ ├── attention.py │ │ └── mlp.py │ │ ├── transformer_block.py │ │ ├── transformer_config.py │ │ └── transformer_layer.py ├── sequence_modeling │ ├── attention │ │ ├── __init__.py │ │ └── dot_product_attention.py │ ├── based │ │ ├── __init__.py │ │ └── based.py │ ├── basic_linear_attention │ │ ├── __init__.py │ │ └── basic_linear_attention.py │ ├── deltanet │ │ ├── __init__.py │ │ └── deltanet.py │ ├── gated_deltanet │ │ ├── __init__.py │ │ └── gated_deltanet.py │ ├── gla │ │ ├── __init__.py │ │ ├── gla.py │ │ └── gla_gate.py │ ├── hgrn2 │ │ ├── __init__.py │ │ └── hgrn2.py │ ├── lasp2 │ │ ├── __init__.py │ │ ├── lasp2.py │ │ ├── lasp2_with_mask_triton_op.py │ │ └── lasp2_without_mask_triton_op.py │ ├── lightning_attention │ │ ├── __init__.py │ │ └── lightning_attention.py │ ├── linear_attention.py │ ├── linear_rnn.py │ ├── mamba2 │ │ ├── __init__.py │ │ ├── mamba_block.py │ │ ├── mamba_hybrid_layer_allocation.py │ │ ├── mamba_layer.py │ │ ├── mamba_mixer.py │ │ ├── mamba_model.py │ │ └── triton_cache_manager.py │ ├── mom_linear_attention.py │ ├── rebased │ │ ├── __init__.py │ │ └── rebased.py │ ├── retention │ │ ├── __init__.py │ │ └── retention.py │ ├── rwkv6 │ │ ├── __init__.py │ │ ├── dd_lerp_linear.py │ │ └── rwkv6.py │ ├── rwkv7 │ │ ├── __init__.py │ │ ├── lora_mlp.py │ │ └── rwkv7.py │ └── ssm.py ├── tokenizer │ ├── __init__.py │ ├── icetk_glm130b_tokenizer.py │ ├── jiebabpe_tokenizer.py │ ├── tokenization_baichuan.py │ ├── tokenization_qwen_vl.py │ └── tokenization_yi.py ├── training.py └── utils.py ├── requirements.txt └── toolkits ├── model_checkpoints_convertor ├── README.md ├── baichuan │ ├── checkpoint_reshaping_and_interoperability.py │ ├── configuration_baichuan.py │ ├── hf2te.py │ ├── model_convertor.sh │ └── te_model_convertor.sh ├── baichuan2 │ ├── checkpoint_reshaping_and_interoperability.py │ ├── configuration_baichuan.py │ ├── hf2te.py │ ├── hf2te_convertor.sh │ └── model_convertor.sh ├── bloom │ ├── checkpoint_reshaping_and_interoperability.py │ ├── deepspeed_to_megatron.py │ ├── deepspeed_to_megatron_ori.py │ ├── model_convertor_huggingface_megatron.sh │ ├── reward_model_convertor_megatron.sh │ ├── reward_model_to_megatron.py │ ├── run_convert_deepspeed_to_megatron.sh │ └── run_convert_deepspeed_to_transformers.sh ├── chatglm │ ├── checkpoint_reshaping_and_interoperability.py │ └── run_convert_huggingface_to_megatron.sh ├── deepseek │ ├── hf2mcore_deepseek_v2_moe.py │ └── hf2mcore_deepseek_v2_moe_convertor.sh ├── falcon │ ├── checkpoint_reshaping_and_interoperability.py │ ├── configuration_RW.py │ └── model_convertor.sh ├── falcon40b │ ├── checkpoint_reshaping_and_interoperability.py │ ├── configuration_RW.py │ └── model_convertor.sh ├── galactica │ ├── checkpoint_reshaping_and_interoperability.py │ └── run_convert_huggingface_to_megatron.sh ├── glm │ ├── checkpoint_reshaping_and_interoperability.py │ └── run_convert_transformers_to_megatron.sh ├── glm130b │ ├── checkpoint_reshaping_and_interoperability.py │ ├── merge_130b_ckpts.py │ └── run_convert_transformers_to_megatron.sh ├── llama │ ├── hf2mcore.py │ ├── hf2mcore_70b.py │ ├── hf2mcore_convertor.sh │ ├── hf2megatron.py │ ├── hf2megatron_convertor.sh │ └── hf_llama_moe │ │ ├── config_TEMPLATE.json │ │ └── llama_moe.py ├── mistral │ ├── hf2mcore.py │ ├── hf2mcore_convertor.sh │ ├── hf2mcore_mixtral.py │ ├── hf2megatron.py │ ├── hf2megatron_convertor.sh │ └── hf_mistral_moe │ │ └── config_TEMPLATE.json ├── qwen │ ├── hf2mcore_qwen1.5_dense_convertor.sh │ ├── hf2mcore_qwen1.5_dense_gqa.py │ ├── hf2mcore_qwen1.5_dense_mha.py │ ├── hf2mcore_qwen1.5_dense_mha_to_moe.py │ ├── hf2mcore_qwen1.5_dense_to_moe_convertor.sh │ ├── hf2mcore_qwen1.5_moe.py │ ├── hf2mcore_qwen1.5_moe_convertor.sh │ ├── hf2mcore_qwen2_convertor.sh │ ├── hf2mcore_qwen2_dense_and_moe_gqa.py │ ├── hf2megablocks_qwen1.5.py │ ├── hf2megablocks_qwen1.5_convertor.sh │ ├── hf2megatron_convertor.sh │ ├── hf2megatron_qwen1.0.py │ └── hf2megatron_qwen1.5.py ├── starcoder │ ├── checkpoint_reshaping_and_interoperability.py │ └── model_convertor.sh └── yi │ ├── checkpoint_reshaping_and_interoperability.py │ └── model_convertor.sh └── pretrain_data_preprocessing ├── README.md ├── clean_raw_text.py ├── convert_json_to_list.py ├── img.png ├── preprocess_data.py ├── preprocess_data_megatron.py ├── preprocess_wudao2.py ├── qwen_hf_preprocess_datasets.py ├── run_make_pretraining_dataset.sh ├── run_make_pretraining_dataset_megatron.sh ├── run_make_pretraining_dataset_megatron_slimpajama.sh ├── run_make_pretraining_dataset_megatron_slimpajama_chunk1_chunk2.sh ├── run_prepare_dataset.sh └── run_prepare_wudao.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/README.md -------------------------------------------------------------------------------- /eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/README.md -------------------------------------------------------------------------------- /eval/configs/eval_qwen2_linear_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/configs/eval_qwen2_linear_moe.py -------------------------------------------------------------------------------- /eval/lm_eval_linear_moe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/lm_eval_linear_moe.sh -------------------------------------------------------------------------------- /eval/models/qwen2_linear_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/models/qwen2_linear_moe.py -------------------------------------------------------------------------------- /eval/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/run.py -------------------------------------------------------------------------------- /examples/linear_llama3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_llama3/README.md -------------------------------------------------------------------------------- /examples/linear_llama3/pretrain_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_llama3/pretrain_llama.py -------------------------------------------------------------------------------- /examples/linear_llama3/run_pretrain_mcore_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_llama3/run_pretrain_mcore_llama.sh -------------------------------------------------------------------------------- /examples/linear_moe_deepseek_v2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_deepseek_v2/README.md -------------------------------------------------------------------------------- /examples/linear_moe_deepseek_v2/pretrain_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_deepseek_v2/pretrain_deepseek.py -------------------------------------------------------------------------------- /examples/linear_moe_deepseek_v2/run_pretrain_deepseek.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_deepseek_v2/run_pretrain_deepseek.sh -------------------------------------------------------------------------------- /examples/linear_moe_mixtral/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_mixtral/README.md -------------------------------------------------------------------------------- /examples/linear_moe_mixtral/pretrain_mcore_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_mixtral/pretrain_mcore_mistral.py -------------------------------------------------------------------------------- /examples/linear_moe_mixtral/run_pretrain_mistral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_mixtral/run_pretrain_mistral.sh -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/README.md -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/check_tensorboard.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/check_tensorboard.sh -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/evaluate_huggingface_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/evaluate_huggingface_qwen.py -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/evaluate_mcore_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/evaluate_mcore_qwen.py -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/pretrain_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/pretrain_qwen.py -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/run_evaluate_huggingface_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/run_evaluate_huggingface_qwen.sh -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/run_evaluate_mcore_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/run_evaluate_mcore_qwen.sh -------------------------------------------------------------------------------- /examples/linear_moe_qwen2/run_pretrain_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/run_pretrain_qwen.sh -------------------------------------------------------------------------------- /images/linear-moe-fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/images/linear-moe-fig1.png -------------------------------------------------------------------------------- /images/linear-moe-fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/images/linear-moe-fig2.png -------------------------------------------------------------------------------- /linear_moe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/__init__.py -------------------------------------------------------------------------------- /linear_moe/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/arguments.py -------------------------------------------------------------------------------- /linear_moe/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/__init__.py -------------------------------------------------------------------------------- /linear_moe/data/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/bloom.py -------------------------------------------------------------------------------- /linear_moe/data/glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/glm.py -------------------------------------------------------------------------------- /linear_moe/data/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llama.py -------------------------------------------------------------------------------- /linear_moe/data/llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/constants.py -------------------------------------------------------------------------------- /linear_moe/data/llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/conversation.py -------------------------------------------------------------------------------- /linear_moe/data/llava/cvcuda_image_processing_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/cvcuda_image_processing_clip.py -------------------------------------------------------------------------------- /linear_moe/data/llava/mm_pretrain_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/mm_pretrain_dataset.py -------------------------------------------------------------------------------- /linear_moe/data/llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/mm_utils.py -------------------------------------------------------------------------------- /linear_moe/data/qwen_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/qwen_vl.py -------------------------------------------------------------------------------- /linear_moe/data/starcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/starcoder.py -------------------------------------------------------------------------------- /linear_moe/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/utils.py -------------------------------------------------------------------------------- /linear_moe/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/finetune_utils.py -------------------------------------------------------------------------------- /linear_moe/generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/api.py -------------------------------------------------------------------------------- /linear_moe/generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/generation.py -------------------------------------------------------------------------------- /linear_moe/generation/gpt_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/gpt_predictor.py -------------------------------------------------------------------------------- /linear_moe/generation/megatron.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/megatron.md -------------------------------------------------------------------------------- /linear_moe/generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/tokenization.py -------------------------------------------------------------------------------- /linear_moe/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/initialize.py -------------------------------------------------------------------------------- /linear_moe/lm_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/lm_evaluate.py -------------------------------------------------------------------------------- /linear_moe/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/__init__.py -------------------------------------------------------------------------------- /linear_moe/model/common_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/__init__.py -------------------------------------------------------------------------------- /linear_moe/model/common_modules/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/activations.py -------------------------------------------------------------------------------- /linear_moe/model/common_modules/feature_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/feature_map.py -------------------------------------------------------------------------------- /linear_moe/model/common_modules/l2norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/l2norm.py -------------------------------------------------------------------------------- /linear_moe/model/common_modules/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/layernorm.py -------------------------------------------------------------------------------- /linear_moe/model/common_modules/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/rotary.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/__init__.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/hybrid/hybrid_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/hybrid/hybrid_model.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/hybrid/hybrid_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/hybrid/hybrid_transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/layer_specs.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/model.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/experts.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/moe_layer.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/router.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/moe/router_old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/router_old.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/token_dispatcher.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/rms_norm.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer/attention.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer/mlp.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer_config.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer_layer.py -------------------------------------------------------------------------------- /linear_moe/model/deepseek_v2/yarn_rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/yarn_rotary_pos_embedding.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/__init__.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/gpt_model.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/hybrid/hybrid_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/hybrid/hybrid_model.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/hybrid/hybrid_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/hybrid/hybrid_transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/language_model.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/layer_specs.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/model.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/rms_norm.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer/attention.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer/mlp.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer_config.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer_layer.py -------------------------------------------------------------------------------- /linear_moe/model/llama3/transformer_legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer_legacy.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/__init__.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/hybrid/hybrid_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/hybrid/hybrid_model.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/hybrid/hybrid_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/hybrid/hybrid_transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/layer_specs.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/model.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/experts.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/grouped_gemm_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/grouped_gemm_util.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/moe_layer.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/moe_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/moe_utils.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/router.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/token_dispatcher.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/rms_norm.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer/attention.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer/mlp.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer_config.py -------------------------------------------------------------------------------- /linear_moe/model/mixtral/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer_layer.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/hybrid/hybrid_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/hybrid/hybrid_model.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/hybrid/hybrid_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/hybrid/hybrid_transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/layer_specs.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/model.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linear_moe/model/qwen2/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/experts.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/moe_layer.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/router.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/token_dispatcher.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/rms_norm.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer/attention.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer/mlp.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer_block.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer_config.py -------------------------------------------------------------------------------- /linear_moe/model/qwen2/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer_layer.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/attention/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/attention/dot_product_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/attention/dot_product_attention.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/based/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/based/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/based/based.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/based/based.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/basic_linear_attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/basic_linear_attention/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/basic_linear_attention/basic_linear_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/basic_linear_attention/basic_linear_attention.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/deltanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/deltanet/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/deltanet/deltanet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/deltanet/deltanet.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/gated_deltanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gated_deltanet/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/gated_deltanet/gated_deltanet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gated_deltanet/gated_deltanet.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/gla/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gla/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/gla/gla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gla/gla.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/gla/gla_gate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gla/gla_gate.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/hgrn2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/hgrn2/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/hgrn2/hgrn2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/hgrn2/hgrn2.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/lasp2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/lasp2/lasp2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/lasp2.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/lasp2/lasp2_with_mask_triton_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/lasp2_with_mask_triton_op.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/lasp2/lasp2_without_mask_triton_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/lasp2_without_mask_triton_op.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/lightning_attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lightning_attention/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/lightning_attention/lightning_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lightning_attention/lightning_attention.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/linear_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/linear_attention.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/linear_rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/linear_rnn.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/mamba_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_block.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/mamba_hybrid_layer_allocation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_hybrid_layer_allocation.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/mamba_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_layer.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/mamba_mixer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_mixer.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/mamba_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_model.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mamba2/triton_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/triton_cache_manager.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/mom_linear_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mom_linear_attention.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rebased/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rebased/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rebased/rebased.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rebased/rebased.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/retention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/retention/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/retention/retention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/retention/retention.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rwkv6/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv6/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rwkv6/dd_lerp_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv6/dd_lerp_linear.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rwkv6/rwkv6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv6/rwkv6.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rwkv7/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv7/__init__.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rwkv7/lora_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv7/lora_mlp.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/rwkv7/rwkv7.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv7/rwkv7.py -------------------------------------------------------------------------------- /linear_moe/sequence_modeling/ssm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/ssm.py -------------------------------------------------------------------------------- /linear_moe/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/__init__.py -------------------------------------------------------------------------------- /linear_moe/tokenizer/icetk_glm130b_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/icetk_glm130b_tokenizer.py -------------------------------------------------------------------------------- /linear_moe/tokenizer/jiebabpe_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/jiebabpe_tokenizer.py -------------------------------------------------------------------------------- /linear_moe/tokenizer/tokenization_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/tokenization_baichuan.py -------------------------------------------------------------------------------- /linear_moe/tokenizer/tokenization_qwen_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/tokenization_qwen_vl.py -------------------------------------------------------------------------------- /linear_moe/tokenizer/tokenization_yi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/tokenization_yi.py -------------------------------------------------------------------------------- /linear_moe/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/training.py -------------------------------------------------------------------------------- /linear_moe/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/requirements.txt -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/README.md -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan/configuration_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/configuration_baichuan.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan/hf2te.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/hf2te.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/model_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan/te_model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/te_model_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan2/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan2/configuration_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/configuration_baichuan.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan2/hf2te.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/hf2te.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan2/hf2te_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/hf2te_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/baichuan2/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/model_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron_ori.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron_ori.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/model_convertor_huggingface_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/model_convertor_huggingface_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/reward_model_convertor_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/reward_model_convertor_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/reward_model_to_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/reward_model_to_megatron.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_transformers.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_transformers.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/chatglm/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/chatglm/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/chatglm/run_convert_huggingface_to_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/chatglm/run_convert_huggingface_to_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/falcon/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/falcon/configuration_RW.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon/configuration_RW.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/falcon/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon/model_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/falcon40b/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon40b/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/falcon40b/configuration_RW.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon40b/configuration_RW.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/falcon40b/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon40b/model_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/galactica/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/galactica/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/galactica/run_convert_huggingface_to_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/galactica/run_convert_huggingface_to_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/glm/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/glm/run_convert_transformers_to_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm/run_convert_transformers_to_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/glm130b/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm130b/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/glm130b/merge_130b_ckpts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm130b/merge_130b_ckpts.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/glm130b/run_convert_transformers_to_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm130b/run_convert_transformers_to_megatron.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf2mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2mcore.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf2mcore_70b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2mcore_70b.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf2mcore_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2mcore_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf2megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2megatron.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf2megatron_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2megatron_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf_llama_moe/config_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf_llama_moe/config_TEMPLATE.json -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/llama/hf_llama_moe/llama_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf_llama_moe/llama_moe.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/mistral/hf2mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2mcore.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/mistral/hf2mcore_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2mcore_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/mistral/hf2mcore_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2mcore_mixtral.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/mistral/hf2megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2megatron.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/mistral/hf2megatron_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2megatron_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/mistral/hf_mistral_moe/config_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf_mistral_moe/config_TEMPLATE.json -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_gqa.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha_to_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha_to_moe.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_to_moe_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_to_moe_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2megatron_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megatron_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.0.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.5.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/starcoder/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/starcoder/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/starcoder/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/starcoder/model_convertor.sh -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/yi/checkpoint_reshaping_and_interoperability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/yi/checkpoint_reshaping_and_interoperability.py -------------------------------------------------------------------------------- /toolkits/model_checkpoints_convertor/yi/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/yi/model_convertor.sh -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/README.md -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/clean_raw_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/clean_raw_text.py -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/convert_json_to_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/convert_json_to_list.py -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/img.png -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/preprocess_data.py -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/preprocess_data_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/preprocess_data_megatron.py -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/preprocess_wudao2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/preprocess_wudao2.py -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/qwen_hf_preprocess_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/qwen_hf_preprocess_datasets.py -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset.sh -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron.sh -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama.sh -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama_chunk1_chunk2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama_chunk1_chunk2.sh -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/run_prepare_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_prepare_dataset.sh -------------------------------------------------------------------------------- /toolkits/pretrain_data_preprocessing/run_prepare_wudao.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_prepare_wudao.sh --------------------------------------------------------------------------------