├── .gitignore ├── LICENSE ├── README.md ├── configs ├── OLMoE-1B-7B-0924.yml └── ablations │ ├── olmo-1b-newhp-newds-cx5-datafix.yml │ ├── olmo-1b-newhp-newds-cx5-flan.yml │ ├── olmo-1b-newhp-newds-cx5-reddit.yml │ ├── olmo-1b-newhp-newds-cx5.yml │ ├── olmo-1b-newhp-newds-s3.yml │ ├── olmo-1b-newhp-newds.yml │ ├── olmo-1b-newhp-oldds-cx5.yml │ ├── olmo-1b-newhp-oldds-s3.yml │ ├── olmo-1b-newhp-oldds.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine-shared-s3.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine-shared.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine05.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine1-datafix.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine1-docmask-8k.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine1-docmask.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine1-newtok.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine1-normreorder.yml │ ├── olmoe-8x1b-newhp-newds-cx5-fine1.yml │ ├── olmoe-8x1b-newhp-newds-cx5-k2-fine-s3.yml │ ├── olmoe-8x1b-newhp-newds-cx5-k2-fine.yml │ ├── olmoe-8x1b-newhp-newds-cx5-k2.yml │ ├── olmoe-8x1b-newhp-newds-cx5.yml │ ├── olmoe-8x1b-newhp-newds-final-anneal.yml │ ├── olmoe-8x1b-newhp-newds-final-densecomp.yml │ ├── olmoe-8x1b-newhp-newds-final-double-alt.yml │ ├── olmoe-8x1b-newhp-newds-final-double.yml │ ├── olmoe-8x1b-newhp-newds-final-s3.yml │ ├── olmoe-8x1b-newhp-newds-final-v2.yml │ ├── olmoe-8x1b-newhp-newds-final.yml │ ├── olmoe-8x1b-newhp-newds-k2qk.yml │ ├── olmoe-8x1b-newhp-newds-s3-cx5.yml │ ├── olmoe-8x1b-newhp-newds-s3.yml │ ├── olmoe-8x1b-newhp-newds.yml │ ├── olmoe-8x1b-newhp-oldds.yml │ ├── olmoe-8x2b-newhp-newds-final.yml │ ├── olmoe-8x7b-A7B.yml │ ├── olmoe-8x7b.yml │ ├── olmoe17-16x1b-fullshard-swiglu-wrapb-s1k1.yml │ ├── olmoe17-8x1b-final-decemb.yml │ ├── olmoe17-8x1b-final-decln.yml │ ├── olmoe17-8x1b-final-eps-fine.yml │ ├── olmoe17-8x1b-final-eps-noqk.yml │ ├── olmoe17-8x1b-final-eps.yml │ ├── olmoe17-8x1b-final-fine.yml │ ├── olmoe17-8x1b-final-nodecln.yml │ ├── olmoe17-8x1b-final-normdc.yml │ ├── olmoe17-8x1b-final-weka.yaml │ ├── olmoe17-8x1b-final.yml │ ├── olmoe17-8x1b-fullshard-swiglu-wrapb-k2-qknorm-zloss.yml │ └── olmoe17-8x7b-final.yml ├── logs ├── olmoe-dpo-logs.txt └── olmoe-sft-logs.txt ├── scripts ├── adapteval.sh ├── batchjob.sh ├── eval_openlm_ckpt.py ├── humaneval.yaml ├── llm1b.sh ├── make_table.py ├── megatron.sh ├── megatron_dense_46m_8gpu.sh ├── megatron_dmoe_46m_8gpu.sh ├── olmoe-gantry.sh ├── olmoe_visuals.ipynb ├── plot_routing_analysis.ipynb ├── plot_routing_analysis_v2.ipynb ├── plot_routing_analysis_v2_cross_layer.ipynb ├── plot_routing_analysis_v2_top1.ipynb ├── routing_mixtral_v2.jpg ├── routing_olmoe_v2.jpg ├── routing_output.zip ├── routing_output │ ├── mistral │ │ ├── eid2token │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ ├── expert_counts │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ ├── expert_counts_crosslayer │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ ├── expert_counts_crosslayer_top1 │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ └── expert_counts_top1 │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ ├── olmoe-dpo │ │ └── expert_counts │ │ │ └── tulu.pkl │ ├── olmoe-sft │ │ └── expert_counts │ │ │ └── tulu.pkl │ ├── olmoe │ │ ├── eid2token │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ ├── expert_counts │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ ├── tulu.pkl │ │ │ └── wikipedia.pkl │ │ ├── expert_counts_crosslayer │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ ├── expert_counts_crosslayer_top1 │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ │ └── expert_counts_top1 │ │ │ ├── arxiv.pkl │ │ │ ├── book.pkl │ │ │ ├── c4.pkl │ │ │ ├── github.pkl │ │ │ └── wikipedia.pkl │ ├── routing.jpg │ ├── routing_prob_distribution.png │ └── text │ │ ├── arxiv_texts.txt │ │ ├── b3g_texts.txt │ │ ├── c4_texts.txt │ │ ├── github_oss_with_stack_texts.txt │ │ └── wikipedia_texts.txt ├── run_dclm_evals_heavy.sh ├── run_dclm_evals_heavy_olmo.sh ├── run_dclm_evals_humaneval.sh ├── run_moe_analysis.py ├── run_routing_analysis.py ├── sparsify_ckpt_unsharded.py └── wekatransfer │ ├── s3weka.sh │ ├── s3weka.yml │ └── wekas3.yaml └── visuals ├── emojis ├── olmoe_checkmark.png ├── olmoe_checkmark_yellow.png ├── olmoe_cross.png └── olmoe_warning.png ├── figures ├── adamweps.pdf ├── dataset.pdf ├── datasetredditflan.pdf ├── embdecay.pdf ├── expertchoice.pdf ├── granularity.pdf ├── init.pdf ├── layer_0_heatmap.pdf ├── layer_15_heatmap.pdf ├── layer_7_heatmap.pdf ├── layersharing.pdf ├── lbl.pdf ├── lblprecision.pdf ├── lbltoks.pdf ├── ln.pdf ├── lndecay.pdf ├── lngradnorm.pdf ├── loss.pdf ├── moevsdense.pdf ├── noise.pdf ├── olmoe.pdf ├── overview.jpg ├── overview.pdf ├── qknorm.pdf ├── routing_mixtral.pdf ├── routing_olmoe.pdf ├── routing_prob_distribution_mixtral.pdf ├── routing_prob_distribution_olmoe.pdf ├── shared.pdf ├── token_specialization_top1_olmoe.pdf ├── token_specialization_top2_mixtral.pdf ├── token_specialization_top8_olmoe.pdf ├── top18_changes_over_checkpoints.pdf ├── trainingevalflops.pdf ├── trainingevaltokens.pdf ├── upcycle.pdf └── zloss.pdf ├── logos ├── OLMoE_logo.png ├── OLMoE_logo.svg ├── OLMoE_logo_alt1.png ├── OLMoE_logo_alt1.svg ├── OLMoE_logo_alt2.png ├── OLMoE_logo_alt2.svg ├── OLMoE_logo_alt3.png └── OLMoE_logo_alt3.svg ├── poster_iclr2025.pdf ├── poster_iclr2025.pptx ├── poster_neurips2024.pdf └── twitterblog_images ├── domainspec.png ├── experiments.png ├── logo_transparent.png ├── logo_twitter.png ├── overview_base.png ├── overview_left.png ├── overview_long.png ├── overview_right.png ├── perf_adapt.png ├── perf_during.png ├── perf_pretr.png ├── perf_pretr_adapt.png └── tokenidspec.png /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/README.md -------------------------------------------------------------------------------- /configs/OLMoE-1B-7B-0924.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/OLMoE-1B-7B-0924.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-newds-cx5-datafix.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-newds-cx5-datafix.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-newds-cx5-flan.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-newds-cx5-flan.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-newds-cx5-reddit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-newds-cx5-reddit.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-newds-cx5.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-newds-cx5.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-newds-s3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-newds-s3.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-newds.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-newds.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-oldds-cx5.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-oldds-cx5.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-oldds-s3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-oldds-s3.yml -------------------------------------------------------------------------------- /configs/ablations/olmo-1b-newhp-oldds.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmo-1b-newhp-oldds.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine-shared-s3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine-shared-s3.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine-shared.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine-shared.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine05.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine05.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-datafix.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-datafix.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-docmask-8k.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-docmask-8k.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-docmask.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-docmask.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-newtok.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-newtok.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-normreorder.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1-normreorder.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-fine1.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-k2-fine-s3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-k2-fine-s3.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-k2-fine.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-k2-fine.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5-k2.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5-k2.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-cx5.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-cx5.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final-anneal.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final-anneal.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final-densecomp.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final-densecomp.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final-double-alt.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final-double-alt.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final-double.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final-double.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final-s3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final-s3.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final-v2.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final-v2.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-final.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-final.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-k2qk.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-k2qk.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-s3-cx5.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-s3-cx5.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds-s3.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds-s3.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-newds.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-newds.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x1b-newhp-oldds.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x1b-newhp-oldds.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x2b-newhp-newds-final.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x2b-newhp-newds-final.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x7b-A7B.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x7b-A7B.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe-8x7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe-8x7b.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-16x1b-fullshard-swiglu-wrapb-s1k1.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-16x1b-fullshard-swiglu-wrapb-s1k1.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-decemb.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-decemb.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-decln.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-decln.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-eps-fine.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-eps-fine.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-eps-noqk.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-eps-noqk.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-eps.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-eps.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-fine.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-fine.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-nodecln.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-nodecln.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-normdc.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-normdc.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final-weka.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final-weka.yaml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-final.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-final.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x1b-fullshard-swiglu-wrapb-k2-qknorm-zloss.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x1b-fullshard-swiglu-wrapb-k2-qknorm-zloss.yml -------------------------------------------------------------------------------- /configs/ablations/olmoe17-8x7b-final.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/configs/ablations/olmoe17-8x7b-final.yml -------------------------------------------------------------------------------- /logs/olmoe-dpo-logs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/logs/olmoe-dpo-logs.txt -------------------------------------------------------------------------------- /logs/olmoe-sft-logs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/logs/olmoe-sft-logs.txt -------------------------------------------------------------------------------- /scripts/adapteval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/adapteval.sh -------------------------------------------------------------------------------- /scripts/batchjob.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/batchjob.sh -------------------------------------------------------------------------------- /scripts/eval_openlm_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/eval_openlm_ckpt.py -------------------------------------------------------------------------------- /scripts/humaneval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/humaneval.yaml -------------------------------------------------------------------------------- /scripts/llm1b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/llm1b.sh -------------------------------------------------------------------------------- /scripts/make_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/make_table.py -------------------------------------------------------------------------------- /scripts/megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/megatron.sh -------------------------------------------------------------------------------- /scripts/megatron_dense_46m_8gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/megatron_dense_46m_8gpu.sh -------------------------------------------------------------------------------- /scripts/megatron_dmoe_46m_8gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/megatron_dmoe_46m_8gpu.sh -------------------------------------------------------------------------------- /scripts/olmoe-gantry.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/olmoe-gantry.sh -------------------------------------------------------------------------------- /scripts/olmoe_visuals.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/olmoe_visuals.ipynb -------------------------------------------------------------------------------- /scripts/plot_routing_analysis.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/plot_routing_analysis.ipynb -------------------------------------------------------------------------------- /scripts/plot_routing_analysis_v2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/plot_routing_analysis_v2.ipynb -------------------------------------------------------------------------------- /scripts/plot_routing_analysis_v2_cross_layer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/plot_routing_analysis_v2_cross_layer.ipynb -------------------------------------------------------------------------------- /scripts/plot_routing_analysis_v2_top1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/plot_routing_analysis_v2_top1.ipynb -------------------------------------------------------------------------------- /scripts/routing_mixtral_v2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_mixtral_v2.jpg -------------------------------------------------------------------------------- /scripts/routing_olmoe_v2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_olmoe_v2.jpg -------------------------------------------------------------------------------- /scripts/routing_output.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output.zip -------------------------------------------------------------------------------- /scripts/routing_output/mistral/eid2token/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/eid2token/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/eid2token/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/eid2token/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/eid2token/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/eid2token/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/eid2token/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/eid2token/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/eid2token/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/eid2token/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer_top1/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer_top1/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer_top1/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer_top1/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer_top1/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer_top1/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer_top1/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer_top1/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_crosslayer_top1/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_crosslayer_top1/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_top1/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_top1/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_top1/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_top1/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_top1/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_top1/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_top1/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_top1/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/mistral/expert_counts_top1/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/mistral/expert_counts_top1/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe-dpo/expert_counts/tulu.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe-dpo/expert_counts/tulu.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe-sft/expert_counts/tulu.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe-sft/expert_counts/tulu.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/eid2token/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/eid2token/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/eid2token/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/eid2token/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/eid2token/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/eid2token/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/eid2token/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/eid2token/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/eid2token/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/eid2token/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts/tulu.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts/tulu.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer_top1/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer_top1/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer_top1/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer_top1/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer_top1/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer_top1/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer_top1/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer_top1/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_crosslayer_top1/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_crosslayer_top1/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_top1/arxiv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_top1/arxiv.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_top1/book.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_top1/book.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_top1/c4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_top1/c4.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_top1/github.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_top1/github.pkl -------------------------------------------------------------------------------- /scripts/routing_output/olmoe/expert_counts_top1/wikipedia.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/olmoe/expert_counts_top1/wikipedia.pkl -------------------------------------------------------------------------------- /scripts/routing_output/routing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/routing.jpg -------------------------------------------------------------------------------- /scripts/routing_output/routing_prob_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/routing_prob_distribution.png -------------------------------------------------------------------------------- /scripts/routing_output/text/arxiv_texts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/text/arxiv_texts.txt -------------------------------------------------------------------------------- /scripts/routing_output/text/b3g_texts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/text/b3g_texts.txt -------------------------------------------------------------------------------- /scripts/routing_output/text/c4_texts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/text/c4_texts.txt -------------------------------------------------------------------------------- /scripts/routing_output/text/github_oss_with_stack_texts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/text/github_oss_with_stack_texts.txt -------------------------------------------------------------------------------- /scripts/routing_output/text/wikipedia_texts.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/routing_output/text/wikipedia_texts.txt -------------------------------------------------------------------------------- /scripts/run_dclm_evals_heavy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/run_dclm_evals_heavy.sh -------------------------------------------------------------------------------- /scripts/run_dclm_evals_heavy_olmo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/run_dclm_evals_heavy_olmo.sh -------------------------------------------------------------------------------- /scripts/run_dclm_evals_humaneval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/run_dclm_evals_humaneval.sh -------------------------------------------------------------------------------- /scripts/run_moe_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/run_moe_analysis.py -------------------------------------------------------------------------------- /scripts/run_routing_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/run_routing_analysis.py -------------------------------------------------------------------------------- /scripts/sparsify_ckpt_unsharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/sparsify_ckpt_unsharded.py -------------------------------------------------------------------------------- /scripts/wekatransfer/s3weka.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/wekatransfer/s3weka.sh -------------------------------------------------------------------------------- /scripts/wekatransfer/s3weka.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/wekatransfer/s3weka.yml -------------------------------------------------------------------------------- /scripts/wekatransfer/wekas3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/scripts/wekatransfer/wekas3.yaml -------------------------------------------------------------------------------- /visuals/emojis/olmoe_checkmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/emojis/olmoe_checkmark.png -------------------------------------------------------------------------------- /visuals/emojis/olmoe_checkmark_yellow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/emojis/olmoe_checkmark_yellow.png -------------------------------------------------------------------------------- /visuals/emojis/olmoe_cross.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/emojis/olmoe_cross.png -------------------------------------------------------------------------------- /visuals/emojis/olmoe_warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/emojis/olmoe_warning.png -------------------------------------------------------------------------------- /visuals/figures/adamweps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/adamweps.pdf -------------------------------------------------------------------------------- /visuals/figures/dataset.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/dataset.pdf -------------------------------------------------------------------------------- /visuals/figures/datasetredditflan.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/datasetredditflan.pdf -------------------------------------------------------------------------------- /visuals/figures/embdecay.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/embdecay.pdf -------------------------------------------------------------------------------- /visuals/figures/expertchoice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/expertchoice.pdf -------------------------------------------------------------------------------- /visuals/figures/granularity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/granularity.pdf -------------------------------------------------------------------------------- /visuals/figures/init.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/init.pdf -------------------------------------------------------------------------------- /visuals/figures/layer_0_heatmap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/layer_0_heatmap.pdf -------------------------------------------------------------------------------- /visuals/figures/layer_15_heatmap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/layer_15_heatmap.pdf -------------------------------------------------------------------------------- /visuals/figures/layer_7_heatmap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/layer_7_heatmap.pdf -------------------------------------------------------------------------------- /visuals/figures/layersharing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/layersharing.pdf -------------------------------------------------------------------------------- /visuals/figures/lbl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/lbl.pdf -------------------------------------------------------------------------------- /visuals/figures/lblprecision.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/lblprecision.pdf -------------------------------------------------------------------------------- /visuals/figures/lbltoks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/lbltoks.pdf -------------------------------------------------------------------------------- /visuals/figures/ln.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/ln.pdf -------------------------------------------------------------------------------- /visuals/figures/lndecay.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/lndecay.pdf -------------------------------------------------------------------------------- /visuals/figures/lngradnorm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/lngradnorm.pdf -------------------------------------------------------------------------------- /visuals/figures/loss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/loss.pdf -------------------------------------------------------------------------------- /visuals/figures/moevsdense.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/moevsdense.pdf -------------------------------------------------------------------------------- /visuals/figures/noise.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/noise.pdf -------------------------------------------------------------------------------- /visuals/figures/olmoe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/olmoe.pdf -------------------------------------------------------------------------------- /visuals/figures/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/overview.jpg -------------------------------------------------------------------------------- /visuals/figures/overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/overview.pdf -------------------------------------------------------------------------------- /visuals/figures/qknorm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/qknorm.pdf -------------------------------------------------------------------------------- /visuals/figures/routing_mixtral.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/routing_mixtral.pdf -------------------------------------------------------------------------------- /visuals/figures/routing_olmoe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/routing_olmoe.pdf -------------------------------------------------------------------------------- /visuals/figures/routing_prob_distribution_mixtral.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/routing_prob_distribution_mixtral.pdf -------------------------------------------------------------------------------- /visuals/figures/routing_prob_distribution_olmoe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/routing_prob_distribution_olmoe.pdf -------------------------------------------------------------------------------- /visuals/figures/shared.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/shared.pdf -------------------------------------------------------------------------------- /visuals/figures/token_specialization_top1_olmoe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/token_specialization_top1_olmoe.pdf -------------------------------------------------------------------------------- /visuals/figures/token_specialization_top2_mixtral.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/token_specialization_top2_mixtral.pdf -------------------------------------------------------------------------------- /visuals/figures/token_specialization_top8_olmoe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/token_specialization_top8_olmoe.pdf -------------------------------------------------------------------------------- /visuals/figures/top18_changes_over_checkpoints.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/top18_changes_over_checkpoints.pdf -------------------------------------------------------------------------------- /visuals/figures/trainingevalflops.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/trainingevalflops.pdf -------------------------------------------------------------------------------- /visuals/figures/trainingevaltokens.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/trainingevaltokens.pdf -------------------------------------------------------------------------------- /visuals/figures/upcycle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/upcycle.pdf -------------------------------------------------------------------------------- /visuals/figures/zloss.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/figures/zloss.pdf -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo.png -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo.svg -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo_alt1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo_alt1.png -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo_alt1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo_alt1.svg -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo_alt2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo_alt2.png -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo_alt2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo_alt2.svg -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo_alt3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo_alt3.png -------------------------------------------------------------------------------- /visuals/logos/OLMoE_logo_alt3.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/logos/OLMoE_logo_alt3.svg -------------------------------------------------------------------------------- /visuals/poster_iclr2025.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/poster_iclr2025.pdf -------------------------------------------------------------------------------- /visuals/poster_iclr2025.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/poster_iclr2025.pptx -------------------------------------------------------------------------------- /visuals/poster_neurips2024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/poster_neurips2024.pdf -------------------------------------------------------------------------------- /visuals/twitterblog_images/domainspec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/domainspec.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/experiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/experiments.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/logo_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/logo_transparent.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/logo_twitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/logo_twitter.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/overview_base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/overview_base.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/overview_left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/overview_left.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/overview_long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/overview_long.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/overview_right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/overview_right.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/perf_adapt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/perf_adapt.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/perf_during.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/perf_during.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/perf_pretr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/perf_pretr.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/perf_pretr_adapt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/perf_pretr_adapt.png -------------------------------------------------------------------------------- /visuals/twitterblog_images/tokenidspec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/OLMoE/HEAD/visuals/twitterblog_images/tokenidspec.png --------------------------------------------------------------------------------