├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── eval
    ├── README.md
    ├── configs
    │   └── eval_qwen2_linear_moe.py
    ├── lm_eval_linear_moe.sh
    ├── models
    │   └── qwen2_linear_moe.py
    └── run.py
├── examples
    ├── linear_llama3
    │   ├── README.md
    │   ├── pretrain_llama.py
    │   └── run_pretrain_mcore_llama.sh
    ├── linear_moe_deepseek_v2
    │   ├── README.md
    │   ├── pretrain_deepseek.py
    │   └── run_pretrain_deepseek.sh
    ├── linear_moe_mixtral
    │   ├── README.md
    │   ├── pretrain_mcore_mistral.py
    │   └── run_pretrain_mistral.sh
    └── linear_moe_qwen2
    │   ├── README.md
    │   ├── check_tensorboard.sh
    │   ├── evaluate_huggingface_qwen.py
    │   ├── evaluate_mcore_qwen.py
    │   ├── pretrain_qwen.py
    │   ├── run_evaluate_huggingface_qwen.sh
    │   ├── run_evaluate_mcore_qwen.sh
    │   └── run_pretrain_qwen.sh
├── images
    ├── linear-moe-fig1.png
    └── linear-moe-fig2.png
├── linear_moe
    ├── __init__.py
    ├── arguments.py
    ├── data
    │   ├── __init__.py
    │   ├── bloom.py
    │   ├── glm.py
    │   ├── llama.py
    │   ├── llava
    │   │   ├── constants.py
    │   │   ├── conversation.py
    │   │   ├── cvcuda_image_processing_clip.py
    │   │   ├── mm_pretrain_dataset.py
    │   │   └── mm_utils.py
    │   ├── qwen_vl.py
    │   ├── starcoder.py
    │   └── utils.py
    ├── finetune_utils.py
    ├── generation
    │   ├── api.py
    │   ├── generation.py
    │   ├── gpt_predictor.py
    │   ├── megatron.md
    │   └── tokenization.py
    ├── initialize.py
    ├── lm_evaluate.py
    ├── model
    │   ├── __init__.py
    │   ├── common_modules
    │   │   ├── __init__.py
    │   │   ├── activations.py
    │   │   ├── feature_map.py
    │   │   ├── l2norm.py
    │   │   ├── layernorm.py
    │   │   └── rotary.py
    │   ├── deepseek_v2
    │   │   ├── __init__.py
    │   │   ├── hybrid
    │   │   │   ├── hybrid_model.py
    │   │   │   └── hybrid_transformer_block.py
    │   │   ├── layer_specs.py
    │   │   ├── model.py
    │   │   ├── moe
    │   │   │   ├── __init__.py
    │   │   │   ├── experts.py
    │   │   │   ├── moe_layer.py
    │   │   │   ├── router.py
    │   │   │   ├── router_old.py
    │   │   │   └── token_dispatcher.py
    │   │   ├── rms_norm.py
    │   │   ├── transformer
    │   │   │   ├── attention.py
    │   │   │   └── mlp.py
    │   │   ├── transformer_block.py
    │   │   ├── transformer_config.py
    │   │   ├── transformer_layer.py
    │   │   └── yarn_rotary_pos_embedding.py
    │   ├── llama3
    │   │   ├── __init__.py
    │   │   ├── gpt_model.py
    │   │   ├── hybrid
    │   │   │   ├── hybrid_model.py
    │   │   │   └── hybrid_transformer_block.py
    │   │   ├── language_model.py
    │   │   ├── layer_specs.py
    │   │   ├── model.py
    │   │   ├── rms_norm.py
    │   │   ├── transformer
    │   │   │   ├── attention.py
    │   │   │   └── mlp.py
    │   │   ├── transformer_config.py
    │   │   ├── transformer_layer.py
    │   │   └── transformer_legacy.py
    │   ├── mixtral
    │   │   ├── __init__.py
    │   │   ├── hybrid
    │   │   │   ├── hybrid_model.py
    │   │   │   └── hybrid_transformer_block.py
    │   │   ├── layer_specs.py
    │   │   ├── model.py
    │   │   ├── moe
    │   │   │   ├── __init__.py
    │   │   │   ├── experts.py
    │   │   │   ├── grouped_gemm_util.py
    │   │   │   ├── moe_layer.py
    │   │   │   ├── moe_utils.py
    │   │   │   ├── router.py
    │   │   │   └── token_dispatcher.py
    │   │   ├── rms_norm.py
    │   │   ├── transformer
    │   │   │   ├── attention.py
    │   │   │   └── mlp.py
    │   │   ├── transformer_block.py
    │   │   ├── transformer_config.py
    │   │   └── transformer_layer.py
    │   └── qwen2
    │   │   ├── hybrid
    │   │       ├── hybrid_model.py
    │   │       └── hybrid_transformer_block.py
    │   │   ├── layer_specs.py
    │   │   ├── model.py
    │   │   ├── moe
    │   │       ├── __init__.py
    │   │       ├── experts.py
    │   │       ├── moe_layer.py
    │   │       ├── router.py
    │   │       └── token_dispatcher.py
    │   │   ├── rms_norm.py
    │   │   ├── transformer
    │   │       ├── attention.py
    │   │       └── mlp.py
    │   │   ├── transformer_block.py
    │   │   ├── transformer_config.py
    │   │   └── transformer_layer.py
    ├── sequence_modeling
    │   ├── attention
    │   │   ├── __init__.py
    │   │   └── dot_product_attention.py
    │   ├── based
    │   │   ├── __init__.py
    │   │   └── based.py
    │   ├── basic_linear_attention
    │   │   ├── __init__.py
    │   │   └── basic_linear_attention.py
    │   ├── deltanet
    │   │   ├── __init__.py
    │   │   └── deltanet.py
    │   ├── gated_deltanet
    │   │   ├── __init__.py
    │   │   └── gated_deltanet.py
    │   ├── gla
    │   │   ├── __init__.py
    │   │   ├── gla.py
    │   │   └── gla_gate.py
    │   ├── hgrn2
    │   │   ├── __init__.py
    │   │   └── hgrn2.py
    │   ├── lasp2
    │   │   ├── __init__.py
    │   │   ├── lasp2.py
    │   │   ├── lasp2_with_mask_triton_op.py
    │   │   └── lasp2_without_mask_triton_op.py
    │   ├── lightning_attention
    │   │   ├── __init__.py
    │   │   └── lightning_attention.py
    │   ├── linear_attention.py
    │   ├── linear_rnn.py
    │   ├── mamba2
    │   │   ├── __init__.py
    │   │   ├── mamba_block.py
    │   │   ├── mamba_hybrid_layer_allocation.py
    │   │   ├── mamba_layer.py
    │   │   ├── mamba_mixer.py
    │   │   ├── mamba_model.py
    │   │   └── triton_cache_manager.py
    │   ├── mom_linear_attention.py
    │   ├── rebased
    │   │   ├── __init__.py
    │   │   └── rebased.py
    │   ├── retention
    │   │   ├── __init__.py
    │   │   └── retention.py
    │   ├── rwkv6
    │   │   ├── __init__.py
    │   │   ├── dd_lerp_linear.py
    │   │   └── rwkv6.py
    │   ├── rwkv7
    │   │   ├── __init__.py
    │   │   ├── lora_mlp.py
    │   │   └── rwkv7.py
    │   └── ssm.py
    ├── tokenizer
    │   ├── __init__.py
    │   ├── icetk_glm130b_tokenizer.py
    │   ├── jiebabpe_tokenizer.py
    │   ├── tokenization_baichuan.py
    │   ├── tokenization_qwen_vl.py
    │   └── tokenization_yi.py
    ├── training.py
    └── utils.py
├── requirements.txt
└── toolkits
    ├── model_checkpoints_convertor
        ├── README.md
        ├── baichuan
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   ├── configuration_baichuan.py
        │   ├── hf2te.py
        │   ├── model_convertor.sh
        │   └── te_model_convertor.sh
        ├── baichuan2
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   ├── configuration_baichuan.py
        │   ├── hf2te.py
        │   ├── hf2te_convertor.sh
        │   └── model_convertor.sh
        ├── bloom
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   ├── deepspeed_to_megatron.py
        │   ├── deepspeed_to_megatron_ori.py
        │   ├── model_convertor_huggingface_megatron.sh
        │   ├── reward_model_convertor_megatron.sh
        │   ├── reward_model_to_megatron.py
        │   ├── run_convert_deepspeed_to_megatron.sh
        │   └── run_convert_deepspeed_to_transformers.sh
        ├── chatglm
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   └── run_convert_huggingface_to_megatron.sh
        ├── deepseek
        │   ├── hf2mcore_deepseek_v2_moe.py
        │   └── hf2mcore_deepseek_v2_moe_convertor.sh
        ├── falcon
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   ├── configuration_RW.py
        │   └── model_convertor.sh
        ├── falcon40b
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   ├── configuration_RW.py
        │   └── model_convertor.sh
        ├── galactica
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   └── run_convert_huggingface_to_megatron.sh
        ├── glm
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   └── run_convert_transformers_to_megatron.sh
        ├── glm130b
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   ├── merge_130b_ckpts.py
        │   └── run_convert_transformers_to_megatron.sh
        ├── llama
        │   ├── hf2mcore.py
        │   ├── hf2mcore_70b.py
        │   ├── hf2mcore_convertor.sh
        │   ├── hf2megatron.py
        │   ├── hf2megatron_convertor.sh
        │   └── hf_llama_moe
        │   │   ├── config_TEMPLATE.json
        │   │   └── llama_moe.py
        ├── mistral
        │   ├── hf2mcore.py
        │   ├── hf2mcore_convertor.sh
        │   ├── hf2mcore_mixtral.py
        │   ├── hf2megatron.py
        │   ├── hf2megatron_convertor.sh
        │   └── hf_mistral_moe
        │   │   └── config_TEMPLATE.json
        ├── qwen
        │   ├── hf2mcore_qwen1.5_dense_convertor.sh
        │   ├── hf2mcore_qwen1.5_dense_gqa.py
        │   ├── hf2mcore_qwen1.5_dense_mha.py
        │   ├── hf2mcore_qwen1.5_dense_mha_to_moe.py
        │   ├── hf2mcore_qwen1.5_dense_to_moe_convertor.sh
        │   ├── hf2mcore_qwen1.5_moe.py
        │   ├── hf2mcore_qwen1.5_moe_convertor.sh
        │   ├── hf2mcore_qwen2_convertor.sh
        │   ├── hf2mcore_qwen2_dense_and_moe_gqa.py
        │   ├── hf2megablocks_qwen1.5.py
        │   ├── hf2megablocks_qwen1.5_convertor.sh
        │   ├── hf2megatron_convertor.sh
        │   ├── hf2megatron_qwen1.0.py
        │   └── hf2megatron_qwen1.5.py
        ├── starcoder
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   └── model_convertor.sh
        └── yi
        │   ├── checkpoint_reshaping_and_interoperability.py
        │   └── model_convertor.sh
    └── pretrain_data_preprocessing
        ├── README.md
        ├── clean_raw_text.py
        ├── convert_json_to_list.py
        ├── img.png
        ├── preprocess_data.py
        ├── preprocess_data_megatron.py
        ├── preprocess_wudao2.py
        ├── qwen_hf_preprocess_datasets.py
        ├── run_make_pretraining_dataset.sh
        ├── run_make_pretraining_dataset_megatron.sh
        ├── run_make_pretraining_dataset_megatron_slimpajama.sh
        ├── run_make_pretraining_dataset_megatron_slimpajama_chunk1_chunk2.sh
        ├── run_prepare_dataset.sh
        └── run_prepare_wudao.sh


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/.gitignore


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/.gitmodules


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/README.md


--------------------------------------------------------------------------------
/eval/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/README.md


--------------------------------------------------------------------------------
/eval/configs/eval_qwen2_linear_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/configs/eval_qwen2_linear_moe.py


--------------------------------------------------------------------------------
/eval/lm_eval_linear_moe.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/lm_eval_linear_moe.sh


--------------------------------------------------------------------------------
/eval/models/qwen2_linear_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/models/qwen2_linear_moe.py


--------------------------------------------------------------------------------
/eval/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/eval/run.py


--------------------------------------------------------------------------------
/examples/linear_llama3/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_llama3/README.md


--------------------------------------------------------------------------------
/examples/linear_llama3/pretrain_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_llama3/pretrain_llama.py


--------------------------------------------------------------------------------
/examples/linear_llama3/run_pretrain_mcore_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_llama3/run_pretrain_mcore_llama.sh


--------------------------------------------------------------------------------
/examples/linear_moe_deepseek_v2/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_deepseek_v2/README.md


--------------------------------------------------------------------------------
/examples/linear_moe_deepseek_v2/pretrain_deepseek.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_deepseek_v2/pretrain_deepseek.py


--------------------------------------------------------------------------------
/examples/linear_moe_deepseek_v2/run_pretrain_deepseek.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_deepseek_v2/run_pretrain_deepseek.sh


--------------------------------------------------------------------------------
/examples/linear_moe_mixtral/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_mixtral/README.md


--------------------------------------------------------------------------------
/examples/linear_moe_mixtral/pretrain_mcore_mistral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_mixtral/pretrain_mcore_mistral.py


--------------------------------------------------------------------------------
/examples/linear_moe_mixtral/run_pretrain_mistral.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_mixtral/run_pretrain_mistral.sh


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/README.md


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/check_tensorboard.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/check_tensorboard.sh


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/evaluate_huggingface_qwen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/evaluate_huggingface_qwen.py


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/evaluate_mcore_qwen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/evaluate_mcore_qwen.py


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/pretrain_qwen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/pretrain_qwen.py


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/run_evaluate_huggingface_qwen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/run_evaluate_huggingface_qwen.sh


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/run_evaluate_mcore_qwen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/run_evaluate_mcore_qwen.sh


--------------------------------------------------------------------------------
/examples/linear_moe_qwen2/run_pretrain_qwen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/examples/linear_moe_qwen2/run_pretrain_qwen.sh


--------------------------------------------------------------------------------
/images/linear-moe-fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/images/linear-moe-fig1.png


--------------------------------------------------------------------------------
/images/linear-moe-fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/images/linear-moe-fig2.png


--------------------------------------------------------------------------------
/linear_moe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/__init__.py


--------------------------------------------------------------------------------
/linear_moe/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/arguments.py


--------------------------------------------------------------------------------
/linear_moe/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/__init__.py


--------------------------------------------------------------------------------
/linear_moe/data/bloom.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/bloom.py


--------------------------------------------------------------------------------
/linear_moe/data/glm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/glm.py


--------------------------------------------------------------------------------
/linear_moe/data/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llama.py


--------------------------------------------------------------------------------
/linear_moe/data/llava/constants.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/constants.py


--------------------------------------------------------------------------------
/linear_moe/data/llava/conversation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/conversation.py


--------------------------------------------------------------------------------
/linear_moe/data/llava/cvcuda_image_processing_clip.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/cvcuda_image_processing_clip.py


--------------------------------------------------------------------------------
/linear_moe/data/llava/mm_pretrain_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/mm_pretrain_dataset.py


--------------------------------------------------------------------------------
/linear_moe/data/llava/mm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/llava/mm_utils.py


--------------------------------------------------------------------------------
/linear_moe/data/qwen_vl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/qwen_vl.py


--------------------------------------------------------------------------------
/linear_moe/data/starcoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/starcoder.py


--------------------------------------------------------------------------------
/linear_moe/data/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/data/utils.py


--------------------------------------------------------------------------------
/linear_moe/finetune_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/finetune_utils.py


--------------------------------------------------------------------------------
/linear_moe/generation/api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/api.py


--------------------------------------------------------------------------------
/linear_moe/generation/generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/generation.py


--------------------------------------------------------------------------------
/linear_moe/generation/gpt_predictor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/gpt_predictor.py


--------------------------------------------------------------------------------
/linear_moe/generation/megatron.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/megatron.md


--------------------------------------------------------------------------------
/linear_moe/generation/tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/generation/tokenization.py


--------------------------------------------------------------------------------
/linear_moe/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/initialize.py


--------------------------------------------------------------------------------
/linear_moe/lm_evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/lm_evaluate.py


--------------------------------------------------------------------------------
/linear_moe/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/__init__.py


--------------------------------------------------------------------------------
/linear_moe/model/common_modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/__init__.py


--------------------------------------------------------------------------------
/linear_moe/model/common_modules/activations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/activations.py


--------------------------------------------------------------------------------
/linear_moe/model/common_modules/feature_map.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/feature_map.py


--------------------------------------------------------------------------------
/linear_moe/model/common_modules/l2norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/l2norm.py


--------------------------------------------------------------------------------
/linear_moe/model/common_modules/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/layernorm.py


--------------------------------------------------------------------------------
/linear_moe/model/common_modules/rotary.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/common_modules/rotary.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/__init__.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/hybrid/hybrid_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/hybrid/hybrid_model.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/hybrid/hybrid_transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/hybrid/hybrid_transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/layer_specs.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/model.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/moe/experts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/experts.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/moe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/moe_layer.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/moe/router.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/router.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/moe/router_old.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/router_old.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/moe/token_dispatcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/moe/token_dispatcher.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/rms_norm.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer/attention.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer/mlp.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer_config.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/transformer_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/transformer_layer.py


--------------------------------------------------------------------------------
/linear_moe/model/deepseek_v2/yarn_rotary_pos_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/deepseek_v2/yarn_rotary_pos_embedding.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/__init__.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/gpt_model.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/hybrid/hybrid_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/hybrid/hybrid_model.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/hybrid/hybrid_transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/hybrid/hybrid_transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/language_model.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/layer_specs.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/model.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/rms_norm.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer/attention.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer/mlp.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer_config.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/transformer_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer_layer.py


--------------------------------------------------------------------------------
/linear_moe/model/llama3/transformer_legacy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/llama3/transformer_legacy.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/__init__.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/hybrid/hybrid_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/hybrid/hybrid_model.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/hybrid/hybrid_transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/hybrid/hybrid_transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/layer_specs.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/model.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/experts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/experts.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/grouped_gemm_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/grouped_gemm_util.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/moe_layer.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/moe_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/moe_utils.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/router.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/router.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/moe/token_dispatcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/moe/token_dispatcher.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/rms_norm.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer/attention.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer/mlp.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer_config.py


--------------------------------------------------------------------------------
/linear_moe/model/mixtral/transformer_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/mixtral/transformer_layer.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/hybrid/hybrid_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/hybrid/hybrid_model.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/hybrid/hybrid_transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/hybrid/hybrid_transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/layer_specs.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/model.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/moe/experts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/experts.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/moe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/moe_layer.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/moe/router.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/router.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/moe/token_dispatcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/moe/token_dispatcher.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/rms_norm.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer/attention.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer/mlp.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer_block.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer_config.py


--------------------------------------------------------------------------------
/linear_moe/model/qwen2/transformer_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/model/qwen2/transformer_layer.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/attention/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/attention/dot_product_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/attention/dot_product_attention.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/based/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/based/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/based/based.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/based/based.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/basic_linear_attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/basic_linear_attention/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/basic_linear_attention/basic_linear_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/basic_linear_attention/basic_linear_attention.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/deltanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/deltanet/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/deltanet/deltanet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/deltanet/deltanet.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/gated_deltanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gated_deltanet/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/gated_deltanet/gated_deltanet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gated_deltanet/gated_deltanet.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/gla/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gla/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/gla/gla.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gla/gla.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/gla/gla_gate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/gla/gla_gate.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/hgrn2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/hgrn2/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/hgrn2/hgrn2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/hgrn2/hgrn2.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/lasp2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/lasp2/lasp2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/lasp2.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/lasp2/lasp2_with_mask_triton_op.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/lasp2_with_mask_triton_op.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/lasp2/lasp2_without_mask_triton_op.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lasp2/lasp2_without_mask_triton_op.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/lightning_attention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lightning_attention/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/lightning_attention/lightning_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/lightning_attention/lightning_attention.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/linear_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/linear_attention.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/linear_rnn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/linear_rnn.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/mamba_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_block.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/mamba_hybrid_layer_allocation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_hybrid_layer_allocation.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/mamba_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_layer.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/mamba_mixer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_mixer.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/mamba_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/mamba_model.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mamba2/triton_cache_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mamba2/triton_cache_manager.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/mom_linear_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/mom_linear_attention.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rebased/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rebased/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rebased/rebased.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rebased/rebased.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/retention/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/retention/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/retention/retention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/retention/retention.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rwkv6/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv6/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rwkv6/dd_lerp_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv6/dd_lerp_linear.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rwkv6/rwkv6.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv6/rwkv6.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rwkv7/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv7/__init__.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rwkv7/lora_mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv7/lora_mlp.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/rwkv7/rwkv7.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/rwkv7/rwkv7.py


--------------------------------------------------------------------------------
/linear_moe/sequence_modeling/ssm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/sequence_modeling/ssm.py


--------------------------------------------------------------------------------
/linear_moe/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/__init__.py


--------------------------------------------------------------------------------
/linear_moe/tokenizer/icetk_glm130b_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/icetk_glm130b_tokenizer.py


--------------------------------------------------------------------------------
/linear_moe/tokenizer/jiebabpe_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/jiebabpe_tokenizer.py


--------------------------------------------------------------------------------
/linear_moe/tokenizer/tokenization_baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/tokenization_baichuan.py


--------------------------------------------------------------------------------
/linear_moe/tokenizer/tokenization_qwen_vl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/tokenization_qwen_vl.py


--------------------------------------------------------------------------------
/linear_moe/tokenizer/tokenization_yi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/tokenizer/tokenization_yi.py


--------------------------------------------------------------------------------
/linear_moe/training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/training.py


--------------------------------------------------------------------------------
/linear_moe/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/linear_moe/utils.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/requirements.txt


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/README.md


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan/configuration_baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/configuration_baichuan.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan/hf2te.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/hf2te.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan/te_model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan/te_model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan2/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan2/configuration_baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/configuration_baichuan.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan2/hf2te.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/hf2te.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan2/hf2te_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/hf2te_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/baichuan2/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/baichuan2/model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron_ori.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/deepspeed_to_megatron_ori.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/model_convertor_huggingface_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/model_convertor_huggingface_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/reward_model_convertor_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/reward_model_convertor_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/reward_model_to_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/reward_model_to_megatron.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_transformers.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/bloom/run_convert_deepspeed_to_transformers.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/chatglm/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/chatglm/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/chatglm/run_convert_huggingface_to_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/chatglm/run_convert_huggingface_to_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v2_moe_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/falcon/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/falcon/configuration_RW.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon/configuration_RW.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/falcon/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon/model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/falcon40b/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon40b/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/falcon40b/configuration_RW.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon40b/configuration_RW.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/falcon40b/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/falcon40b/model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/galactica/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/galactica/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/galactica/run_convert_huggingface_to_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/galactica/run_convert_huggingface_to_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/glm/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/glm/run_convert_transformers_to_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm/run_convert_transformers_to_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/glm130b/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm130b/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/glm130b/merge_130b_ckpts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm130b/merge_130b_ckpts.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/glm130b/run_convert_transformers_to_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/glm130b/run_convert_transformers_to_megatron.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf2mcore.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2mcore.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf2mcore_70b.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2mcore_70b.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf2mcore_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2mcore_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf2megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2megatron.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf2megatron_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf2megatron_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf_llama_moe/config_TEMPLATE.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf_llama_moe/config_TEMPLATE.json


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/llama/hf_llama_moe/llama_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/llama/hf_llama_moe/llama_moe.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/mistral/hf2mcore.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2mcore.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/mistral/hf2mcore_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2mcore_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/mistral/hf2mcore_mixtral.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2mcore_mixtral.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/mistral/hf2megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2megatron.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/mistral/hf2megatron_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf2megatron_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/mistral/hf_mistral_moe/config_TEMPLATE.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/mistral/hf_mistral_moe/config_TEMPLATE.json


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_gqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_gqa.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha_to_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_mha_to_moe.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_to_moe_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_dense_to_moe_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen1.5_moe_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_dense_and_moe_gqa.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megablocks_qwen1.5_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2megatron_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megatron_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.0.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.0.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.5.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/qwen/hf2megatron_qwen1.5.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/starcoder/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/starcoder/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/starcoder/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/starcoder/model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/yi/checkpoint_reshaping_and_interoperability.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/yi/checkpoint_reshaping_and_interoperability.py


--------------------------------------------------------------------------------
/toolkits/model_checkpoints_convertor/yi/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/model_checkpoints_convertor/yi/model_convertor.sh


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/README.md


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/clean_raw_text.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/clean_raw_text.py


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/convert_json_to_list.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/convert_json_to_list.py


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/img.png


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/preprocess_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/preprocess_data.py


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/preprocess_data_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/preprocess_data_megatron.py


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/preprocess_wudao2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/preprocess_wudao2.py


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/qwen_hf_preprocess_datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/qwen_hf_preprocess_datasets.py


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset.sh


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron.sh


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama.sh


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama_chunk1_chunk2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_make_pretraining_dataset_megatron_slimpajama_chunk1_chunk2.sh


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/run_prepare_dataset.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_prepare_dataset.sh


--------------------------------------------------------------------------------
/toolkits/pretrain_data_preprocessing/run_prepare_wudao.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSparseLLMs/Linear-MoE/HEAD/toolkits/pretrain_data_preprocessing/run_prepare_wudao.sh


--------------------------------------------------------------------------------