├── .gitignore ├── LICENSE ├── README.md ├── mcsmoe ├── __init__.py ├── calflops │ ├── __init__.py │ ├── calculate_pipline.py │ ├── flops_counter.py │ ├── pytorch_ops.py │ └── utils.py ├── data │ ├── __init__.py │ ├── data_collator.py │ ├── evaluation.py │ └── preprocessors.py ├── evaluate-expert-usage.py ├── evaluate-fsgpt-expert-usage.py ├── evaluate-fsgpt-zero-shot.py ├── evaluation │ ├── __init__.py │ ├── lm_eval.py │ └── minipile.py ├── finetune-switch-transformers.py ├── fsgpt-zero-shot-losparse.py ├── fsgpt-zero-shot-with-task-specific-pruning.py ├── losparse-downstream.py ├── merge-and-kd-for-recover-downstream.py ├── merge-by-averaging.py ├── merge-fsgpt-by-usage-frequency-weighted.py ├── merging │ ├── __init__.py │ ├── fisher.py │ ├── grouping.py │ ├── grouping_fsgpt.py │ ├── grouping_mixtral.py │ ├── permutation.py │ └── utils.py ├── models │ ├── __init__.py │ ├── fsgpt.py │ ├── fsgpt_moe.py │ ├── modeling_outputs.py │ ├── sparse.py │ └── switch_transformers.py ├── msmoe-merging-mixtral.py ├── msmoe-merging.py ├── permute-fsgpt-model.py ├── permute-model.py ├── profile-flops.py ├── profile-fsgpt-flops.py ├── profile-latency.py ├── prune-fsgpt-non-core-experts.py ├── prune-non-core-experts-and-kd-downstream.py ├── pruning │ ├── __init__.py │ ├── losparse.py │ ├── task_specific.py │ └── task_specific_fsgpt.py ├── random-merge-fsgpt.py ├── random-merge.py ├── switch-finetune-with-task-specific-pruning.py ├── t5-finetune-on-downstream.py └── utils │ ├── __init__.py │ ├── constants.py │ ├── dummy_test.py │ ├── sparsity.py │ └── training_utils.py ├── requirements.txt ├── scripts ├── gpt │ ├── evaluate-dense-zero-shot.sh │ ├── evaluate-merged-moe-zero-shot.sh │ ├── evaluate-moe-zero-shot.sh │ ├── merge-by-usage-frequency-weighted.sh │ ├── permute-moe.sh │ ├── post-merging-losparse.sh │ ├── profile-flops.sh │ ├── prune-non-core.sh │ ├── random-merge.sh │ └── task-specific.sh └── t5 │ ├── finetune-switch.sh │ ├── finetune-t5.sh │ ├── losparse-only.sh │ ├── merge-by-averaging.sh │ ├── merge-by-usage-frequency-weighted.sh │ ├── permute-switch.sh │ ├── post-merging-losparse.sh │ ├── profile-flops.sh │ ├── profile-latency.sh │ ├── prune-non-core.sh │ ├── random-merge.sh │ └── task-specific.sh └── static ├── evaluation_config.yaml ├── finetune_config.yaml └── pipeline.jpg /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/README.md -------------------------------------------------------------------------------- /mcsmoe/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true' 4 | -------------------------------------------------------------------------------- /mcsmoe/calflops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/calflops/__init__.py -------------------------------------------------------------------------------- /mcsmoe/calflops/calculate_pipline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/calflops/calculate_pipline.py -------------------------------------------------------------------------------- /mcsmoe/calflops/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/calflops/flops_counter.py -------------------------------------------------------------------------------- /mcsmoe/calflops/pytorch_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/calflops/pytorch_ops.py -------------------------------------------------------------------------------- /mcsmoe/calflops/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/calflops/utils.py -------------------------------------------------------------------------------- /mcsmoe/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/data/__init__.py -------------------------------------------------------------------------------- /mcsmoe/data/data_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/data/data_collator.py -------------------------------------------------------------------------------- /mcsmoe/data/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/data/evaluation.py -------------------------------------------------------------------------------- /mcsmoe/data/preprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/data/preprocessors.py -------------------------------------------------------------------------------- /mcsmoe/evaluate-expert-usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/evaluate-expert-usage.py -------------------------------------------------------------------------------- /mcsmoe/evaluate-fsgpt-expert-usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/evaluate-fsgpt-expert-usage.py -------------------------------------------------------------------------------- /mcsmoe/evaluate-fsgpt-zero-shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/evaluate-fsgpt-zero-shot.py -------------------------------------------------------------------------------- /mcsmoe/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/evaluation/__init__.py -------------------------------------------------------------------------------- /mcsmoe/evaluation/lm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/evaluation/lm_eval.py -------------------------------------------------------------------------------- /mcsmoe/evaluation/minipile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/evaluation/minipile.py -------------------------------------------------------------------------------- /mcsmoe/finetune-switch-transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/finetune-switch-transformers.py -------------------------------------------------------------------------------- /mcsmoe/fsgpt-zero-shot-losparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/fsgpt-zero-shot-losparse.py -------------------------------------------------------------------------------- /mcsmoe/fsgpt-zero-shot-with-task-specific-pruning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/fsgpt-zero-shot-with-task-specific-pruning.py -------------------------------------------------------------------------------- /mcsmoe/losparse-downstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/losparse-downstream.py -------------------------------------------------------------------------------- /mcsmoe/merge-and-kd-for-recover-downstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merge-and-kd-for-recover-downstream.py -------------------------------------------------------------------------------- /mcsmoe/merge-by-averaging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merge-by-averaging.py -------------------------------------------------------------------------------- /mcsmoe/merge-fsgpt-by-usage-frequency-weighted.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merge-fsgpt-by-usage-frequency-weighted.py -------------------------------------------------------------------------------- /mcsmoe/merging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/__init__.py -------------------------------------------------------------------------------- /mcsmoe/merging/fisher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/fisher.py -------------------------------------------------------------------------------- /mcsmoe/merging/grouping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/grouping.py -------------------------------------------------------------------------------- /mcsmoe/merging/grouping_fsgpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/grouping_fsgpt.py -------------------------------------------------------------------------------- /mcsmoe/merging/grouping_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/grouping_mixtral.py -------------------------------------------------------------------------------- /mcsmoe/merging/permutation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/permutation.py -------------------------------------------------------------------------------- /mcsmoe/merging/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/merging/utils.py -------------------------------------------------------------------------------- /mcsmoe/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/models/__init__.py -------------------------------------------------------------------------------- /mcsmoe/models/fsgpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/models/fsgpt.py -------------------------------------------------------------------------------- /mcsmoe/models/fsgpt_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/models/fsgpt_moe.py -------------------------------------------------------------------------------- /mcsmoe/models/modeling_outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/models/modeling_outputs.py -------------------------------------------------------------------------------- /mcsmoe/models/sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/models/sparse.py -------------------------------------------------------------------------------- /mcsmoe/models/switch_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/models/switch_transformers.py -------------------------------------------------------------------------------- /mcsmoe/msmoe-merging-mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/msmoe-merging-mixtral.py -------------------------------------------------------------------------------- /mcsmoe/msmoe-merging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/msmoe-merging.py -------------------------------------------------------------------------------- /mcsmoe/permute-fsgpt-model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/permute-fsgpt-model.py -------------------------------------------------------------------------------- /mcsmoe/permute-model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/permute-model.py -------------------------------------------------------------------------------- /mcsmoe/profile-flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/profile-flops.py -------------------------------------------------------------------------------- /mcsmoe/profile-fsgpt-flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/profile-fsgpt-flops.py -------------------------------------------------------------------------------- /mcsmoe/profile-latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/profile-latency.py -------------------------------------------------------------------------------- /mcsmoe/prune-fsgpt-non-core-experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/prune-fsgpt-non-core-experts.py -------------------------------------------------------------------------------- /mcsmoe/prune-non-core-experts-and-kd-downstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/prune-non-core-experts-and-kd-downstream.py -------------------------------------------------------------------------------- /mcsmoe/pruning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/pruning/__init__.py -------------------------------------------------------------------------------- /mcsmoe/pruning/losparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/pruning/losparse.py -------------------------------------------------------------------------------- /mcsmoe/pruning/task_specific.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/pruning/task_specific.py -------------------------------------------------------------------------------- /mcsmoe/pruning/task_specific_fsgpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/pruning/task_specific_fsgpt.py -------------------------------------------------------------------------------- /mcsmoe/random-merge-fsgpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/random-merge-fsgpt.py -------------------------------------------------------------------------------- /mcsmoe/random-merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/random-merge.py -------------------------------------------------------------------------------- /mcsmoe/switch-finetune-with-task-specific-pruning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/switch-finetune-with-task-specific-pruning.py -------------------------------------------------------------------------------- /mcsmoe/t5-finetune-on-downstream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/t5-finetune-on-downstream.py -------------------------------------------------------------------------------- /mcsmoe/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/utils/__init__.py -------------------------------------------------------------------------------- /mcsmoe/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/utils/constants.py -------------------------------------------------------------------------------- /mcsmoe/utils/dummy_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/utils/dummy_test.py -------------------------------------------------------------------------------- /mcsmoe/utils/sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/utils/sparsity.py -------------------------------------------------------------------------------- /mcsmoe/utils/training_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/mcsmoe/utils/training_utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/gpt/evaluate-dense-zero-shot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/evaluate-dense-zero-shot.sh -------------------------------------------------------------------------------- /scripts/gpt/evaluate-merged-moe-zero-shot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/evaluate-merged-moe-zero-shot.sh -------------------------------------------------------------------------------- /scripts/gpt/evaluate-moe-zero-shot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/evaluate-moe-zero-shot.sh -------------------------------------------------------------------------------- /scripts/gpt/merge-by-usage-frequency-weighted.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/merge-by-usage-frequency-weighted.sh -------------------------------------------------------------------------------- /scripts/gpt/permute-moe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/permute-moe.sh -------------------------------------------------------------------------------- /scripts/gpt/post-merging-losparse.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/post-merging-losparse.sh -------------------------------------------------------------------------------- /scripts/gpt/profile-flops.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/profile-flops.sh -------------------------------------------------------------------------------- /scripts/gpt/prune-non-core.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/prune-non-core.sh -------------------------------------------------------------------------------- /scripts/gpt/random-merge.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/random-merge.sh -------------------------------------------------------------------------------- /scripts/gpt/task-specific.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/gpt/task-specific.sh -------------------------------------------------------------------------------- /scripts/t5/finetune-switch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/finetune-switch.sh -------------------------------------------------------------------------------- /scripts/t5/finetune-t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/finetune-t5.sh -------------------------------------------------------------------------------- /scripts/t5/losparse-only.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/losparse-only.sh -------------------------------------------------------------------------------- /scripts/t5/merge-by-averaging.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/merge-by-averaging.sh -------------------------------------------------------------------------------- /scripts/t5/merge-by-usage-frequency-weighted.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/merge-by-usage-frequency-weighted.sh -------------------------------------------------------------------------------- /scripts/t5/permute-switch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/permute-switch.sh -------------------------------------------------------------------------------- /scripts/t5/post-merging-losparse.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/post-merging-losparse.sh -------------------------------------------------------------------------------- /scripts/t5/profile-flops.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/profile-flops.sh -------------------------------------------------------------------------------- /scripts/t5/profile-latency.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/profile-latency.sh -------------------------------------------------------------------------------- /scripts/t5/prune-non-core.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/prune-non-core.sh -------------------------------------------------------------------------------- /scripts/t5/random-merge.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/random-merge.sh -------------------------------------------------------------------------------- /scripts/t5/task-specific.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/scripts/t5/task-specific.sh -------------------------------------------------------------------------------- /static/evaluation_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/static/evaluation_config.yaml -------------------------------------------------------------------------------- /static/finetune_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/static/finetune_config.yaml -------------------------------------------------------------------------------- /static/pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UNITES-Lab/MC-SMoE/HEAD/static/pipeline.jpg --------------------------------------------------------------------------------