├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── alg.png ├── code_eval.patch ├── conf ├── ds_bf16_zero1.json ├── ds_bf16_zero2.json ├── ds_bf16_zero3.json ├── ds_bf16_zero3_offload.json ├── llama_fsdp.json ├── prob_map │ ├── code.json │ ├── datasize.json │ ├── gate_load.json │ ├── llama_moe_dynamic_final.json │ ├── math.json │ ├── moduleformer_dynamic_final.json │ ├── orca.json │ ├── roll.json │ ├── sent_emb.json │ ├── sharegpt.json │ ├── sharegpt_code_math_datasize.json │ ├── sharegpt_code_math_uniform.json │ ├── sharegpt_orca.json │ ├── sharegpt_orca_code.json │ ├── sharegpt_orca_math.json │ └── uniform.json ├── ref_loss │ ├── llama_moe.json │ ├── llama_moe_uniform_final.json │ ├── moduleformer.json │ └── moduleformer_uniform.json ├── wo_balance_loss.json ├── wo_gate_noise.json └── wo_gate_noise_and_balance_loss.json ├── intuition.png ├── lm_eval.patch ├── logs └── .gitkeep ├── requirements.txt ├── scripts ├── eval │ ├── code.sh │ ├── eval.sh │ ├── gen.sh │ └── multi.sh ├── llama_moe │ ├── llama_moe_dynamic.sh │ ├── llama_moe_dynamic_sent_emb_init.sh │ ├── llama_moe_final_static.sh │ ├── llama_moe_inverse_hypothesis.sh │ ├── llama_moe_random.sh │ ├── llama_moe_ref_loss.sh │ ├── llama_moe_sequential.sh │ ├── llama_moe_static_datasize.sh │ ├── llama_moe_static_gate_load.sh │ ├── llama_moe_static_sent_emb.sh │ └── llama_moe_uniform.sh └── moduleformer │ ├── moduleformer_dynamic.sh │ ├── moduleformer_final_static.sh │ ├── moduleformer_random.sh │ ├── moduleformer_ref_loss.sh │ ├── moduleformer_sequential.sh │ ├── moduleformer_static_data_size.sh │ └── moduleformer_uniform.sh ├── src ├── __init__.py ├── callbacks.py ├── core │ ├── __init__.py │ ├── cli_demo.py │ ├── clustering.py │ ├── train.py │ └── web_demo.py ├── data.py ├── eval │ ├── __init__.py │ ├── compute.py │ ├── gen_alpaca_eval_ans.py │ ├── gen_mt_ans.py │ ├── listen.py │ ├── show.py │ └── xlsx.py ├── models │ ├── __init__.py │ ├── llama_moe │ │ ├── __init__.py │ │ ├── configuration_llama_moe.py │ │ └── modeling_llama_moe.py │ └── moduleformer │ │ ├── __init__.py │ │ ├── configuration_moduleformer.py │ │ └── modeling_moduleformer.py ├── trainer.py └── utils │ ├── __init__.py │ ├── config.py │ ├── conversation.py │ ├── debugging.py │ ├── io.py │ ├── len_stats.py │ ├── notification.py │ └── vis.py ├── tests ├── __init__.py ├── models │ ├── __init__.py │ └── test_moduleformer.py └── test_data.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/README.md -------------------------------------------------------------------------------- /alg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/alg.png -------------------------------------------------------------------------------- /code_eval.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/code_eval.patch -------------------------------------------------------------------------------- /conf/ds_bf16_zero1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ds_bf16_zero1.json -------------------------------------------------------------------------------- /conf/ds_bf16_zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ds_bf16_zero2.json -------------------------------------------------------------------------------- /conf/ds_bf16_zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ds_bf16_zero3.json -------------------------------------------------------------------------------- /conf/ds_bf16_zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ds_bf16_zero3_offload.json -------------------------------------------------------------------------------- /conf/llama_fsdp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/llama_fsdp.json -------------------------------------------------------------------------------- /conf/prob_map/code.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/code.json -------------------------------------------------------------------------------- /conf/prob_map/datasize.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/datasize.json -------------------------------------------------------------------------------- /conf/prob_map/gate_load.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/gate_load.json -------------------------------------------------------------------------------- /conf/prob_map/llama_moe_dynamic_final.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/llama_moe_dynamic_final.json -------------------------------------------------------------------------------- /conf/prob_map/math.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/math.json -------------------------------------------------------------------------------- /conf/prob_map/moduleformer_dynamic_final.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/moduleformer_dynamic_final.json -------------------------------------------------------------------------------- /conf/prob_map/orca.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/orca.json -------------------------------------------------------------------------------- /conf/prob_map/roll.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/roll.json -------------------------------------------------------------------------------- /conf/prob_map/sent_emb.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sent_emb.json -------------------------------------------------------------------------------- /conf/prob_map/sharegpt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sharegpt.json -------------------------------------------------------------------------------- /conf/prob_map/sharegpt_code_math_datasize.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sharegpt_code_math_datasize.json -------------------------------------------------------------------------------- /conf/prob_map/sharegpt_code_math_uniform.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sharegpt_code_math_uniform.json -------------------------------------------------------------------------------- /conf/prob_map/sharegpt_orca.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sharegpt_orca.json -------------------------------------------------------------------------------- /conf/prob_map/sharegpt_orca_code.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sharegpt_orca_code.json -------------------------------------------------------------------------------- /conf/prob_map/sharegpt_orca_math.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/sharegpt_orca_math.json -------------------------------------------------------------------------------- /conf/prob_map/uniform.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/prob_map/uniform.json -------------------------------------------------------------------------------- /conf/ref_loss/llama_moe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ref_loss/llama_moe.json -------------------------------------------------------------------------------- /conf/ref_loss/llama_moe_uniform_final.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ref_loss/llama_moe_uniform_final.json -------------------------------------------------------------------------------- /conf/ref_loss/moduleformer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ref_loss/moduleformer.json -------------------------------------------------------------------------------- /conf/ref_loss/moduleformer_uniform.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/ref_loss/moduleformer_uniform.json -------------------------------------------------------------------------------- /conf/wo_balance_loss.json: -------------------------------------------------------------------------------- 1 | { 2 | "gate_use_balance": false 3 | } -------------------------------------------------------------------------------- /conf/wo_gate_noise.json: -------------------------------------------------------------------------------- 1 | { 2 | "gate_add_noise": false 3 | } -------------------------------------------------------------------------------- /conf/wo_gate_noise_and_balance_loss.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/conf/wo_gate_noise_and_balance_loss.json -------------------------------------------------------------------------------- /intuition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/intuition.png -------------------------------------------------------------------------------- /lm_eval.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/lm_eval.patch -------------------------------------------------------------------------------- /logs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/eval/code.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/eval/code.sh -------------------------------------------------------------------------------- /scripts/eval/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/eval/eval.sh -------------------------------------------------------------------------------- /scripts/eval/gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/eval/gen.sh -------------------------------------------------------------------------------- /scripts/eval/multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/eval/multi.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_dynamic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_dynamic.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_dynamic_sent_emb_init.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_dynamic_sent_emb_init.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_final_static.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_final_static.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_inverse_hypothesis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_inverse_hypothesis.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_random.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_random.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_ref_loss.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_ref_loss.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_sequential.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_sequential.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_static_datasize.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_static_datasize.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_static_gate_load.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_static_gate_load.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_static_sent_emb.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_static_sent_emb.sh -------------------------------------------------------------------------------- /scripts/llama_moe/llama_moe_uniform.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/llama_moe/llama_moe_uniform.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_dynamic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_dynamic.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_final_static.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_final_static.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_random.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_random.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_ref_loss.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_ref_loss.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_sequential.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_sequential.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_static_data_size.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_static_data_size.sh -------------------------------------------------------------------------------- /scripts/moduleformer/moduleformer_uniform.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/scripts/moduleformer/moduleformer_uniform.sh -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/callbacks.py -------------------------------------------------------------------------------- /src/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/core/cli_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/core/cli_demo.py -------------------------------------------------------------------------------- /src/core/clustering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/core/clustering.py -------------------------------------------------------------------------------- /src/core/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/core/train.py -------------------------------------------------------------------------------- /src/core/web_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/core/web_demo.py -------------------------------------------------------------------------------- /src/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/data.py -------------------------------------------------------------------------------- /src/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/eval/compute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/eval/compute.py -------------------------------------------------------------------------------- /src/eval/gen_alpaca_eval_ans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/eval/gen_alpaca_eval_ans.py -------------------------------------------------------------------------------- /src/eval/gen_mt_ans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/eval/gen_mt_ans.py -------------------------------------------------------------------------------- /src/eval/listen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/eval/listen.py -------------------------------------------------------------------------------- /src/eval/show.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/eval/show.py -------------------------------------------------------------------------------- /src/eval/xlsx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/eval/xlsx.py -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/__init__.py -------------------------------------------------------------------------------- /src/models/llama_moe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/llama_moe/__init__.py -------------------------------------------------------------------------------- /src/models/llama_moe/configuration_llama_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/llama_moe/configuration_llama_moe.py -------------------------------------------------------------------------------- /src/models/llama_moe/modeling_llama_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/llama_moe/modeling_llama_moe.py -------------------------------------------------------------------------------- /src/models/moduleformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/moduleformer/__init__.py -------------------------------------------------------------------------------- /src/models/moduleformer/configuration_moduleformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/moduleformer/configuration_moduleformer.py -------------------------------------------------------------------------------- /src/models/moduleformer/modeling_moduleformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/models/moduleformer/modeling_moduleformer.py -------------------------------------------------------------------------------- /src/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/trainer.py -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/config.py -------------------------------------------------------------------------------- /src/utils/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/conversation.py -------------------------------------------------------------------------------- /src/utils/debugging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/debugging.py -------------------------------------------------------------------------------- /src/utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/io.py -------------------------------------------------------------------------------- /src/utils/len_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/len_stats.py -------------------------------------------------------------------------------- /src/utils/notification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/notification.py -------------------------------------------------------------------------------- /src/utils/vis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/src/utils/vis.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/test_moduleformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/tests/models/test_moduleformer.py -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spico197/MoE-SFT/HEAD/tox.ini --------------------------------------------------------------------------------