├── .gitignore ├── .gitmodules ├── README.MD ├── coe ├── __init__.py ├── trainer │ ├── __init__.py │ ├── base_trainer.py │ └── fsdp_sft_trainer.py └── utils │ ├── __init__.py │ ├── dataset │ ├── __init__.py │ └── base_dataset.py │ └── debug │ ├── __init__.py │ └── performance.py ├── config ├── base.yaml ├── models │ └── coe_deepseekv2 │ │ ├── config.json │ │ ├── configuration_coe.py │ │ ├── generation_config.json │ │ ├── modeling_coe.py │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ └── tokenizer_config.json └── train.yaml ├── data └── gsm8k │ ├── test.parquet │ └── train.parquet ├── eval └── eval.sh ├── main.py ├── old ├── config │ ├── models │ │ ├── coe-tiny-hf-v2 │ │ │ ├── config.json │ │ │ ├── configuration_comoe.py │ │ │ ├── generation_config.json │ │ │ ├── modeling_comoe.py │ │ │ ├── modeling_comoe.py.old │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── dsmoe │ │ │ ├── config.json │ │ │ ├── configuration_deepseek.py │ │ │ ├── modeling_deepseek.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── dsv2 │ │ │ ├── config.json │ │ │ ├── configuration_deepseek.py │ │ │ ├── generation_config.json │ │ │ ├── modeling_deepseek.py │ │ │ ├── new.txt │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── olmoe │ │ │ ├── config.json │ │ │ ├── configuration_olmoe.py │ │ │ ├── modeling_olmoe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── olmoe_coe │ │ │ ├── config.json │ │ │ ├── configuration_olmoe.py │ │ │ ├── modeling_olmoe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── olmoe_coe_backup │ │ │ ├── config.json │ │ │ ├── configuration_olmoe.py │ │ │ ├── modeling_olmoe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── olmoe_coe_v2 │ │ │ ├── config.json │ │ │ ├── configuration_olmoe.py │ │ │ ├── modeling_olmoe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── pythia-160m │ │ │ ├── config.json │ │ │ ├── configuration_gpt_neox_moe.py │ │ │ ├── generation_config.json │ │ │ ├── modeling_gpt_neox_moe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── pythia-dsmoe-160m │ │ │ ├── config.json │ │ │ ├── configuration_gpt_neox_dsmoe.py │ │ │ ├── generation_config.json │ │ │ ├── modeling_gpt_neox_dsmoe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ └── tokenizer_config.json │ │ ├── qwen-moe │ │ │ ├── added_tokens.json │ │ │ ├── config.json │ │ │ ├── configuration_qwen2_moe.py │ │ │ ├── merges.txt │ │ │ ├── modeling_qwen2_moe.py │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.json │ │ └── train_olmoe.sh │ └── training │ │ ├── coe-tiny-hf-v2 │ │ ├── base.yaml │ │ └── math.yaml │ │ ├── olmoe_coe │ │ ├── g0.sh │ │ ├── g1.sh │ │ ├── g2.sh │ │ └── g3.sh │ │ └── pythia-14m │ │ ├── math.yaml │ │ └── or.yaml └── main.py ├── report.MD ├── requirements.txt ├── runs ├── _run.sh ├── run.sh └── run_latest.sh └── scripts ├── analyze_routing.py ├── download_config.py ├── download_dataset.py ├── plot ├── data.tsv ├── data3.tsv ├── data4.tsv ├── data5.tsv ├── data6.tsv ├── main.png ├── plot.png ├── plot.py ├── plot_abl.png ├── plot_abl.py ├── plot_dense.png ├── plot_dense.py ├── plot_eff.png ├── plot_eff.py ├── plot_lyr.png ├── plot_lyr.py ├── plot_scal.png ├── plot_scal.py └── plot_scal.py.old └── setup.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/.gitmodules -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/README.MD -------------------------------------------------------------------------------- /coe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coe/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coe/trainer/base_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/coe/trainer/base_trainer.py -------------------------------------------------------------------------------- /coe/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/coe/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /coe/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coe/utils/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coe/utils/dataset/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/coe/utils/dataset/base_dataset.py -------------------------------------------------------------------------------- /coe/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/coe/utils/debug/__init__.py -------------------------------------------------------------------------------- /coe/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/coe/utils/debug/performance.py -------------------------------------------------------------------------------- /config/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/base.yaml -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/config.json -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/configuration_coe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/configuration_coe.py -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/generation_config.json -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/modeling_coe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/modeling_coe.py -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/special_tokens_map.json -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/tokenizer.json -------------------------------------------------------------------------------- /config/models/coe_deepseekv2/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/models/coe_deepseekv2/tokenizer_config.json -------------------------------------------------------------------------------- /config/train.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/config/train.yaml -------------------------------------------------------------------------------- /data/gsm8k/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/data/gsm8k/test.parquet -------------------------------------------------------------------------------- /data/gsm8k/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/data/gsm8k/train.parquet -------------------------------------------------------------------------------- /eval/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/eval/eval.sh -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/main.py -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/config.json -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/configuration_comoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/configuration_comoe.py -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/generation_config.json -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/modeling_comoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/modeling_comoe.py -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/modeling_comoe.py.old: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/modeling_comoe.py.old -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/coe-tiny-hf-v2/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/coe-tiny-hf-v2/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/dsmoe/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsmoe/config.json -------------------------------------------------------------------------------- /old/config/models/dsmoe/configuration_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsmoe/configuration_deepseek.py -------------------------------------------------------------------------------- /old/config/models/dsmoe/modeling_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsmoe/modeling_deepseek.py -------------------------------------------------------------------------------- /old/config/models/dsmoe/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsmoe/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/dsmoe/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsmoe/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/dsmoe/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsmoe/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/dsv2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/config.json -------------------------------------------------------------------------------- /old/config/models/dsv2/configuration_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/configuration_deepseek.py -------------------------------------------------------------------------------- /old/config/models/dsv2/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/generation_config.json -------------------------------------------------------------------------------- /old/config/models/dsv2/modeling_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/modeling_deepseek.py -------------------------------------------------------------------------------- /old/config/models/dsv2/new.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/new.txt -------------------------------------------------------------------------------- /old/config/models/dsv2/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/dsv2/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/dsv2/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/dsv2/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/olmoe/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe/config.json -------------------------------------------------------------------------------- /old/config/models/olmoe/configuration_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe/configuration_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe/modeling_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe/modeling_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/olmoe/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/olmoe/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe/config.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe/configuration_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe/configuration_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe_coe/modeling_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe/modeling_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe_coe/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_backup/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_backup/config.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_backup/configuration_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_backup/configuration_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_backup/modeling_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_backup/modeling_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_backup/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_backup/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_backup/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_backup/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_backup/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_backup/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_v2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_v2/config.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_v2/configuration_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_v2/configuration_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_v2/modeling_olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_v2/modeling_olmoe.py -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_v2/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_v2/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_v2/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_v2/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/olmoe_coe_v2/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/olmoe_coe_v2/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/pythia-160m/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/config.json -------------------------------------------------------------------------------- /old/config/models/pythia-160m/configuration_gpt_neox_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/configuration_gpt_neox_moe.py -------------------------------------------------------------------------------- /old/config/models/pythia-160m/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/generation_config.json -------------------------------------------------------------------------------- /old/config/models/pythia-160m/modeling_gpt_neox_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/modeling_gpt_neox_moe.py -------------------------------------------------------------------------------- /old/config/models/pythia-160m/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/pythia-160m/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/pythia-160m/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-160m/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/config.json -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/configuration_gpt_neox_dsmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/configuration_gpt_neox_dsmoe.py -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/generation_config.json -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/modeling_gpt_neox_dsmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/modeling_gpt_neox_dsmoe.py -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/pythia-dsmoe-160m/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/pythia-dsmoe-160m/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/qwen-moe/added_tokens.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/added_tokens.json -------------------------------------------------------------------------------- /old/config/models/qwen-moe/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/config.json -------------------------------------------------------------------------------- /old/config/models/qwen-moe/configuration_qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/configuration_qwen2_moe.py -------------------------------------------------------------------------------- /old/config/models/qwen-moe/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/merges.txt -------------------------------------------------------------------------------- /old/config/models/qwen-moe/modeling_qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/modeling_qwen2_moe.py -------------------------------------------------------------------------------- /old/config/models/qwen-moe/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/special_tokens_map.json -------------------------------------------------------------------------------- /old/config/models/qwen-moe/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/tokenizer.json -------------------------------------------------------------------------------- /old/config/models/qwen-moe/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/tokenizer_config.json -------------------------------------------------------------------------------- /old/config/models/qwen-moe/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/qwen-moe/vocab.json -------------------------------------------------------------------------------- /old/config/models/train_olmoe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/models/train_olmoe.sh -------------------------------------------------------------------------------- /old/config/training/coe-tiny-hf-v2/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/coe-tiny-hf-v2/base.yaml -------------------------------------------------------------------------------- /old/config/training/coe-tiny-hf-v2/math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/coe-tiny-hf-v2/math.yaml -------------------------------------------------------------------------------- /old/config/training/olmoe_coe/g0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/olmoe_coe/g0.sh -------------------------------------------------------------------------------- /old/config/training/olmoe_coe/g1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/olmoe_coe/g1.sh -------------------------------------------------------------------------------- /old/config/training/olmoe_coe/g2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/olmoe_coe/g2.sh -------------------------------------------------------------------------------- /old/config/training/olmoe_coe/g3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/olmoe_coe/g3.sh -------------------------------------------------------------------------------- /old/config/training/pythia-14m/math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/pythia-14m/math.yaml -------------------------------------------------------------------------------- /old/config/training/pythia-14m/or.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/config/training/pythia-14m/or.yaml -------------------------------------------------------------------------------- /old/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/old/main.py -------------------------------------------------------------------------------- /report.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/report.MD -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/requirements.txt -------------------------------------------------------------------------------- /runs/_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/runs/_run.sh -------------------------------------------------------------------------------- /runs/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/runs/run.sh -------------------------------------------------------------------------------- /runs/run_latest.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/runs/run_latest.sh -------------------------------------------------------------------------------- /scripts/analyze_routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/analyze_routing.py -------------------------------------------------------------------------------- /scripts/download_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/download_config.py -------------------------------------------------------------------------------- /scripts/download_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/download_dataset.py -------------------------------------------------------------------------------- /scripts/plot/data.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/data.tsv -------------------------------------------------------------------------------- /scripts/plot/data3.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/data3.tsv -------------------------------------------------------------------------------- /scripts/plot/data4.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/data4.tsv -------------------------------------------------------------------------------- /scripts/plot/data5.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/data5.tsv -------------------------------------------------------------------------------- /scripts/plot/data6.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/data6.tsv -------------------------------------------------------------------------------- /scripts/plot/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/main.png -------------------------------------------------------------------------------- /scripts/plot/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot.png -------------------------------------------------------------------------------- /scripts/plot/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot.py -------------------------------------------------------------------------------- /scripts/plot/plot_abl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_abl.png -------------------------------------------------------------------------------- /scripts/plot/plot_abl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_abl.py -------------------------------------------------------------------------------- /scripts/plot/plot_dense.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_dense.png -------------------------------------------------------------------------------- /scripts/plot/plot_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_dense.py -------------------------------------------------------------------------------- /scripts/plot/plot_eff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_eff.png -------------------------------------------------------------------------------- /scripts/plot/plot_eff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_eff.py -------------------------------------------------------------------------------- /scripts/plot/plot_lyr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_lyr.png -------------------------------------------------------------------------------- /scripts/plot/plot_lyr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_lyr.py -------------------------------------------------------------------------------- /scripts/plot/plot_scal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_scal.png -------------------------------------------------------------------------------- /scripts/plot/plot_scal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_scal.py -------------------------------------------------------------------------------- /scripts/plot/plot_scal.py.old: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/plot/plot_scal.py.old -------------------------------------------------------------------------------- /scripts/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZihanWang314/CoE/HEAD/scripts/setup.sh --------------------------------------------------------------------------------