├── .gitignore ├── LICENSE ├── README.md ├── benchmark_cpu ├── ff.cpp ├── ff.h ├── fff.cpp ├── fff.h └── main.cpp ├── benchmark_cuda ├── .gitignore ├── benchmark.py ├── ff_bmm │ ├── __init__.py │ └── ff_bmm.py ├── ff_cuda │ ├── __init__.py │ ├── ff.py │ ├── ff_cuda.cpp │ ├── ff_cuda_kernel.cu │ ├── jit.py │ └── setup.py ├── ff_native │ ├── __init__.py │ └── ff_native.py ├── fff_bmm │ ├── __init__.py │ └── fff_bmm.py ├── fff_cuda │ ├── __init__.py │ ├── fff.py │ ├── fff_cuda.cpp │ ├── fff_cuda_kernel.cu │ ├── jit.py │ └── setup.py ├── fff_native │ ├── __init__.py │ └── fff_native.py └── requirements.txt ├── benchmark_pytorch ├── README.md ├── fff │ ├── __init__.py │ ├── ff_bmm.py │ ├── fff_bmm.py │ └── fff_sparse.py ├── main.py └── moe.py └── training ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── cramming ├── __init__.py ├── architectures │ ├── __init__.py │ ├── attention.py │ ├── components.py │ ├── construction.py │ ├── crammed_bert.py │ ├── embeddings.py │ ├── fff.py │ ├── funnel_transformers.py │ ├── huggingface_interface.py │ ├── losses.py │ ├── recurrent_transformers.py │ └── sanity_check.py ├── backend │ ├── __init__.py │ ├── deepspeed_integration.py │ ├── optimizers │ │ ├── __init__.py │ │ ├── adahessian.py │ │ ├── agd.py │ │ ├── nanoT5_optimizer.py │ │ ├── optimizer_modifiers.py │ │ ├── progressive_batching.py │ │ ├── schedulers.py │ │ ├── shampoo │ │ │ ├── CODE_OF_CONDUCT.md │ │ │ ├── CONTRIBUTING.md │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── matrix_functions.py │ │ │ ├── shampoo.py │ │ │ └── shampoo_utils.py │ │ └── sophia.py │ ├── prepare_backend.py │ ├── torch_default.py │ └── utils.py ├── config │ ├── __init__.py │ ├── arch │ │ ├── __init__.py │ │ ├── crammed-bert-fff.yaml │ │ ├── crammed-bert-simple.yaml │ │ ├── crammed-bert.yaml │ │ ├── crammed-large-izsak.yaml │ │ ├── hf-bert-base.yaml │ │ ├── hf-bert-tiny.yaml │ │ └── v1 │ │ │ ├── bert-base.yaml │ │ │ ├── bert-c2.yaml │ │ │ ├── bert-c3.yaml │ │ │ ├── bert-c4.yaml │ │ │ ├── bert-c5.yaml │ │ │ ├── bert-i4.yaml │ │ │ ├── bert-large-izsak.yaml │ │ │ ├── bert-original.yaml │ │ │ ├── bert-tiny.yaml │ │ │ ├── crammed-bamboo.yaml │ │ │ ├── funnel-c2.yaml │ │ │ ├── recurrent-c2.yaml │ │ │ └── sanitycheck.yaml │ ├── cfg_eval.yaml │ ├── cfg_pretrain.yaml │ ├── data │ │ ├── __init__.py │ │ ├── bert-default.yaml │ │ ├── bookcorpus-wikipedia.yaml │ │ ├── c4-subset-processed.yaml │ │ ├── openweb.yaml │ │ ├── oscar.yaml │ │ ├── pile-readymade.yaml │ │ ├── raw-wikipedia-dumps.yaml │ │ ├── roots-mini.yaml │ │ ├── sanity-check-1.yaml │ │ ├── sanity-check-2.yaml │ │ ├── sources │ │ │ ├── ag_news.yaml │ │ │ ├── bookcorpus.yaml │ │ │ ├── c4.yaml │ │ │ ├── dash_books.yaml │ │ │ ├── fake.yaml │ │ │ ├── iwslt.yaml │ │ │ ├── no_code_stackexchange.yaml │ │ │ ├── openwebtext.yaml │ │ │ ├── oscar.yaml │ │ │ ├── raw_wiki_dump.yaml │ │ │ ├── the_pile.yaml │ │ │ ├── the_pileCC.yaml │ │ │ ├── the_pile_dedup.yaml │ │ │ ├── the_pile_natural.yaml │ │ │ ├── the_pile_stream.yaml │ │ │ ├── uncorpus.yaml │ │ │ ├── uspto.yaml │ │ │ ├── wikibooks.yaml │ │ │ ├── wikinews.yaml │ │ │ ├── wikipedia.yaml │ │ │ ├── wikiquote.yaml │ │ │ ├── wikiversity.yaml │ │ │ └── wikivoyage.yaml │ │ ├── the-pile-dedup.yaml │ │ ├── the-pile-natural.yaml │ │ ├── the-pile-stream.yaml │ │ └── the-pile.yaml │ ├── eval │ │ ├── GLUE.yaml │ │ ├── GLUE_sane.yaml │ │ ├── GLUE_sane_nocola.yaml │ │ ├── RACE.yaml │ │ ├── SWAG.yaml │ │ ├── __init__.py │ │ ├── cola.yaml │ │ ├── mnli.yaml │ │ ├── mrpc.yaml │ │ ├── optim │ │ │ └── adam.yaml │ │ ├── qnli.yaml │ │ ├── qqp.yaml │ │ ├── quatro.yaml │ │ ├── rte.yaml │ │ ├── superGLUE.yaml │ │ └── tasks │ │ │ ├── axb.yaml │ │ │ ├── axg.yaml │ │ │ ├── boolq.yaml │ │ │ ├── cb.yaml │ │ │ ├── cola.yaml │ │ │ ├── copa.yaml │ │ │ ├── mnli.yaml │ │ │ ├── mrpc.yaml │ │ │ ├── multirc.yaml │ │ │ ├── qnli.yaml │ │ │ ├── qqp.yaml │ │ │ ├── race.yaml │ │ │ ├── record.yaml │ │ │ ├── rte.yaml │ │ │ ├── sst2.yaml │ │ │ ├── stsb.yaml │ │ │ ├── swag.yaml │ │ │ ├── wic.yaml │ │ │ ├── wnli.yaml │ │ │ └── wsc_fixed.yaml │ ├── hydra │ │ ├── __init__.py │ │ └── job_logging │ │ │ └── custom.yaml │ ├── impl │ │ ├── __init__.py │ │ ├── _default.yaml │ │ ├── deepspeed-hf.yaml │ │ ├── deepspeed.yaml │ │ ├── onnx.yaml │ │ └── torch-default.yaml │ ├── train │ │ ├── __init__.py │ │ ├── bert-base.yaml │ │ ├── bert-izsak.yaml │ │ ├── bert-o4.yaml │ │ ├── bert-original.yaml │ │ ├── optim │ │ │ ├── adafactor.yaml │ │ │ ├── adahessian.yaml │ │ │ ├── adam.yaml │ │ │ ├── adam8bit.yaml │ │ │ ├── adam_classic.yaml │ │ │ ├── adamscale.yaml │ │ │ ├── agd.yaml │ │ │ ├── lion.yaml │ │ │ ├── radam.yaml │ │ │ ├── sgd.yaml │ │ │ ├── shampoo.yaml │ │ │ └── sophia.yaml │ │ ├── optim_mod │ │ │ ├── disabled.yaml │ │ │ ├── larc.yaml │ │ │ ├── lars.yaml │ │ │ ├── progressive.yaml │ │ │ └── sam.yaml │ │ └── v1 │ │ │ ├── bert-o1.yaml │ │ │ ├── bert-o2.yaml │ │ │ └── bert-o3.yaml │ └── wandb │ │ ├── default.yaml │ │ └── none.yaml ├── data │ ├── __init__.py │ ├── curriculum_sorting.py │ ├── deduplicate.py │ ├── downstream_task_preparation.py │ ├── pretraining_preparation.py │ ├── tokenizer_preparation.py │ └── utils.py └── utils.py ├── data_card.md ├── eval.py ├── eval_GLUE.sh ├── load_local_model.py ├── model_card.md ├── pretrain.py ├── pyproject.toml ├── scripts ├── architecture_ablations_c5_o3.sh ├── data_ablations_a4000.sh ├── data_ablations_a6000.sh ├── eval_baselines.sh ├── final_story_cb_o4_2080ti.sh ├── final_story_cb_o4_a4000.sh ├── final_story_cb_o4_a6000.sh ├── preprocessing.sh ├── reproducing_bert.sh ├── scaling_law_cb_o4_a4000.sh ├── scaling_law_cb_o4_a5000.sh ├── scaling_law_cb_o4_a6000.sh └── training_ablations_c5_o3.sh ├── setup.cfg └── upload_processed_dataset.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/README.md -------------------------------------------------------------------------------- /benchmark_cpu/ff.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cpu/ff.cpp -------------------------------------------------------------------------------- /benchmark_cpu/ff.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cpu/ff.h -------------------------------------------------------------------------------- /benchmark_cpu/fff.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cpu/fff.cpp -------------------------------------------------------------------------------- /benchmark_cpu/fff.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cpu/fff.h -------------------------------------------------------------------------------- /benchmark_cpu/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cpu/main.cpp -------------------------------------------------------------------------------- /benchmark_cuda/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/.gitignore -------------------------------------------------------------------------------- /benchmark_cuda/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/benchmark.py -------------------------------------------------------------------------------- /benchmark_cuda/ff_bmm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark_cuda/ff_bmm/ff_bmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_bmm/ff_bmm.py -------------------------------------------------------------------------------- /benchmark_cuda/ff_cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark_cuda/ff_cuda/ff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_cuda/ff.py -------------------------------------------------------------------------------- /benchmark_cuda/ff_cuda/ff_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_cuda/ff_cuda.cpp -------------------------------------------------------------------------------- /benchmark_cuda/ff_cuda/ff_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_cuda/ff_cuda_kernel.cu -------------------------------------------------------------------------------- /benchmark_cuda/ff_cuda/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_cuda/jit.py -------------------------------------------------------------------------------- /benchmark_cuda/ff_cuda/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_cuda/setup.py -------------------------------------------------------------------------------- /benchmark_cuda/ff_native/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark_cuda/ff_native/ff_native.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/ff_native/ff_native.py -------------------------------------------------------------------------------- /benchmark_cuda/fff_bmm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark_cuda/fff_bmm/fff_bmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_bmm/fff_bmm.py -------------------------------------------------------------------------------- /benchmark_cuda/fff_cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark_cuda/fff_cuda/fff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_cuda/fff.py -------------------------------------------------------------------------------- /benchmark_cuda/fff_cuda/fff_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_cuda/fff_cuda.cpp -------------------------------------------------------------------------------- /benchmark_cuda/fff_cuda/fff_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_cuda/fff_cuda_kernel.cu -------------------------------------------------------------------------------- /benchmark_cuda/fff_cuda/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_cuda/jit.py -------------------------------------------------------------------------------- /benchmark_cuda/fff_cuda/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_cuda/setup.py -------------------------------------------------------------------------------- /benchmark_cuda/fff_native/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmark_cuda/fff_native/fff_native.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_cuda/fff_native/fff_native.py -------------------------------------------------------------------------------- /benchmark_cuda/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | -------------------------------------------------------------------------------- /benchmark_pytorch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/README.md -------------------------------------------------------------------------------- /benchmark_pytorch/fff/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/fff/__init__.py -------------------------------------------------------------------------------- /benchmark_pytorch/fff/ff_bmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/fff/ff_bmm.py -------------------------------------------------------------------------------- /benchmark_pytorch/fff/fff_bmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/fff/fff_bmm.py -------------------------------------------------------------------------------- /benchmark_pytorch/fff/fff_sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/fff/fff_sparse.py -------------------------------------------------------------------------------- /benchmark_pytorch/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/main.py -------------------------------------------------------------------------------- /benchmark_pytorch/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/benchmark_pytorch/moe.py -------------------------------------------------------------------------------- /training/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/.gitignore -------------------------------------------------------------------------------- /training/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/.pre-commit-config.yaml -------------------------------------------------------------------------------- /training/CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/CITATION.cff -------------------------------------------------------------------------------- /training/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/LICENSE.md -------------------------------------------------------------------------------- /training/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/MANIFEST.in -------------------------------------------------------------------------------- /training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/README.md -------------------------------------------------------------------------------- /training/cramming/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/__init__.py -------------------------------------------------------------------------------- /training/cramming/architectures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/__init__.py -------------------------------------------------------------------------------- /training/cramming/architectures/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/attention.py -------------------------------------------------------------------------------- /training/cramming/architectures/components.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/components.py -------------------------------------------------------------------------------- /training/cramming/architectures/construction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/construction.py -------------------------------------------------------------------------------- /training/cramming/architectures/crammed_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/crammed_bert.py -------------------------------------------------------------------------------- /training/cramming/architectures/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/embeddings.py -------------------------------------------------------------------------------- /training/cramming/architectures/fff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/fff.py -------------------------------------------------------------------------------- /training/cramming/architectures/funnel_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/funnel_transformers.py -------------------------------------------------------------------------------- /training/cramming/architectures/huggingface_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/huggingface_interface.py -------------------------------------------------------------------------------- /training/cramming/architectures/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/losses.py -------------------------------------------------------------------------------- /training/cramming/architectures/recurrent_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/recurrent_transformers.py -------------------------------------------------------------------------------- /training/cramming/architectures/sanity_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/architectures/sanity_check.py -------------------------------------------------------------------------------- /training/cramming/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/__init__.py -------------------------------------------------------------------------------- /training/cramming/backend/deepspeed_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/deepspeed_integration.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/__init__.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/adahessian.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/adahessian.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/agd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/agd.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/nanoT5_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/nanoT5_optimizer.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/optimizer_modifiers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/optimizer_modifiers.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/progressive_batching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/progressive_batching.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/schedulers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/schedulers.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/CONTRIBUTING.md -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/LICENSE -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/README.md -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/__init__.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/matrix_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/matrix_functions.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/shampoo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/shampoo.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/shampoo/shampoo_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/shampoo/shampoo_utils.py -------------------------------------------------------------------------------- /training/cramming/backend/optimizers/sophia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/optimizers/sophia.py -------------------------------------------------------------------------------- /training/cramming/backend/prepare_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/prepare_backend.py -------------------------------------------------------------------------------- /training/cramming/backend/torch_default.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/torch_default.py -------------------------------------------------------------------------------- /training/cramming/backend/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/backend/utils.py -------------------------------------------------------------------------------- /training/cramming/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/arch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/arch/crammed-bert-fff.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/crammed-bert-fff.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/crammed-bert-simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/crammed-bert-simple.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/crammed-bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/crammed-bert.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/crammed-large-izsak.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/crammed-large-izsak.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/hf-bert-base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/hf-bert-base.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/hf-bert-tiny.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/hf-bert-tiny.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-base.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-c2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-c2.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-c3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-c3.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-c4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-c4.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-c5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-c5.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-i4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-i4.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-large-izsak.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-large-izsak.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-original.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-original.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/bert-tiny.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/bert-tiny.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/crammed-bamboo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/crammed-bamboo.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/funnel-c2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/funnel-c2.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/recurrent-c2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/arch/v1/recurrent-c2.yaml -------------------------------------------------------------------------------- /training/cramming/config/arch/v1/sanitycheck.yaml: -------------------------------------------------------------------------------- 1 | architectures: 2 | - SanityCheckLM 3 | 4 | width: 1024 # 8352 5 | -------------------------------------------------------------------------------- /training/cramming/config/cfg_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/cfg_eval.yaml -------------------------------------------------------------------------------- /training/cramming/config/cfg_pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/cfg_pretrain.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/data/bert-default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/bert-default.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/bookcorpus-wikipedia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/bookcorpus-wikipedia.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/c4-subset-processed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/c4-subset-processed.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/openweb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/openweb.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/oscar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/oscar.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/pile-readymade.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/pile-readymade.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/raw-wikipedia-dumps.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/raw-wikipedia-dumps.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/roots-mini.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/roots-mini.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sanity-check-1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sanity-check-1.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sanity-check-2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sanity-check-2.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/ag_news.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/ag_news.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/bookcorpus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/bookcorpus.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/c4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/c4.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/dash_books.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/dash_books.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/fake.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/fake.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/iwslt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/iwslt.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/no_code_stackexchange.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/no_code_stackexchange.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/openwebtext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/openwebtext.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/oscar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/oscar.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/raw_wiki_dump.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/raw_wiki_dump.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/the_pile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/the_pile.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/the_pileCC.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/the_pileCC.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/the_pile_dedup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/the_pile_dedup.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/the_pile_natural.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/the_pile_natural.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/the_pile_stream.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/the_pile_stream.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/uncorpus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/uncorpus.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/uspto.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/uspto.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/wikibooks.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/wikibooks.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/wikinews.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/wikinews.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/wikipedia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/wikipedia.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/wikiquote.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/wikiquote.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/wikiversity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/wikiversity.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/sources/wikivoyage.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/sources/wikivoyage.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/the-pile-dedup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/the-pile-dedup.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/the-pile-natural.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/the-pile-natural.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/the-pile-stream.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/the-pile-stream.yaml -------------------------------------------------------------------------------- /training/cramming/config/data/the-pile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/data/the-pile.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/GLUE.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/GLUE.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/GLUE_sane.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/GLUE_sane.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/GLUE_sane_nocola.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/GLUE_sane_nocola.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/RACE.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/RACE.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/SWAG.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/SWAG.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/eval/cola.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/cola.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/mnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/mnli.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/mrpc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/mrpc.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/optim/adam.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/optim/adam.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/qnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/qnli.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/qqp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/qqp.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/quatro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/quatro.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/rte.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/rte.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/superGLUE.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/superGLUE.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/axb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/axb.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/axg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/axg.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/boolq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/boolq.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/cb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/cb.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/cola.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/cola.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/copa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/copa.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/mnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/mnli.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/mrpc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/mrpc.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/multirc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/multirc.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/qnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/qnli.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/qqp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/qqp.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/race.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/race.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/record.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/record.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/rte.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/rte.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/sst2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/sst2.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/stsb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/stsb.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/swag.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/swag.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/wic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/wic.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/wnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/wnli.yaml -------------------------------------------------------------------------------- /training/cramming/config/eval/tasks/wsc_fixed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/eval/tasks/wsc_fixed.yaml -------------------------------------------------------------------------------- /training/cramming/config/hydra/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/hydra/job_logging/custom.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/hydra/job_logging/custom.yaml -------------------------------------------------------------------------------- /training/cramming/config/impl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/impl/_default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/impl/_default.yaml -------------------------------------------------------------------------------- /training/cramming/config/impl/deepspeed-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/impl/deepspeed-hf.yaml -------------------------------------------------------------------------------- /training/cramming/config/impl/deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/impl/deepspeed.yaml -------------------------------------------------------------------------------- /training/cramming/config/impl/onnx.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/impl/onnx.yaml -------------------------------------------------------------------------------- /training/cramming/config/impl/torch-default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/impl/torch-default.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/cramming/config/train/bert-base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/bert-base.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/bert-izsak.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/bert-izsak.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/bert-o4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/bert-o4.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/bert-original.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/bert-original.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/adafactor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/adafactor.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/adahessian.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/adahessian.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/adam.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/adam.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/adam8bit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/adam8bit.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/adam_classic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/adam_classic.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/adamscale.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/adamscale.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/agd.yaml: -------------------------------------------------------------------------------- 1 | type: AGD 2 | 3 | gain: 1.0 4 | -------------------------------------------------------------------------------- /training/cramming/config/train/optim/lion.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/lion.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/radam.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/radam.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/sgd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/sgd.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/shampoo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/shampoo.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim/sophia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim/sophia.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim_mod/disabled.yaml: -------------------------------------------------------------------------------- 1 | name: none 2 | -------------------------------------------------------------------------------- /training/cramming/config/train/optim_mod/larc.yaml: -------------------------------------------------------------------------------- 1 | name: LARC 2 | 3 | trust_coefficient: 0.02 4 | clip: True 5 | eps: 1e-8 6 | -------------------------------------------------------------------------------- /training/cramming/config/train/optim_mod/lars.yaml: -------------------------------------------------------------------------------- 1 | name: LARS 2 | 3 | trust_coefficient: 0.02 4 | clip: False 5 | eps: 1e-8 6 | -------------------------------------------------------------------------------- /training/cramming/config/train/optim_mod/progressive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/optim_mod/progressive.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/optim_mod/sam.yaml: -------------------------------------------------------------------------------- 1 | name: SAM 2 | rho: 0.05 3 | -------------------------------------------------------------------------------- /training/cramming/config/train/v1/bert-o1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/v1/bert-o1.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/v1/bert-o2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/v1/bert-o2.yaml -------------------------------------------------------------------------------- /training/cramming/config/train/v1/bert-o3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/config/train/v1/bert-o3.yaml -------------------------------------------------------------------------------- /training/cramming/config/wandb/default.yaml: -------------------------------------------------------------------------------- 1 | enabled: True 2 | entity: YOURNAMEHERE 3 | project: cramming-pretrain 4 | tags: [] 5 | -------------------------------------------------------------------------------- /training/cramming/config/wandb/none.yaml: -------------------------------------------------------------------------------- 1 | enabled: False 2 | entity: 3 | project: 4 | tags: [] 5 | -------------------------------------------------------------------------------- /training/cramming/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/__init__.py -------------------------------------------------------------------------------- /training/cramming/data/curriculum_sorting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/curriculum_sorting.py -------------------------------------------------------------------------------- /training/cramming/data/deduplicate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/deduplicate.py -------------------------------------------------------------------------------- /training/cramming/data/downstream_task_preparation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/downstream_task_preparation.py -------------------------------------------------------------------------------- /training/cramming/data/pretraining_preparation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/pretraining_preparation.py -------------------------------------------------------------------------------- /training/cramming/data/tokenizer_preparation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/tokenizer_preparation.py -------------------------------------------------------------------------------- /training/cramming/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/data/utils.py -------------------------------------------------------------------------------- /training/cramming/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/cramming/utils.py -------------------------------------------------------------------------------- /training/data_card.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/data_card.md -------------------------------------------------------------------------------- /training/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/eval.py -------------------------------------------------------------------------------- /training/eval_GLUE.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/eval_GLUE.sh -------------------------------------------------------------------------------- /training/load_local_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/load_local_model.py -------------------------------------------------------------------------------- /training/model_card.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/model_card.md -------------------------------------------------------------------------------- /training/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/pretrain.py -------------------------------------------------------------------------------- /training/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/pyproject.toml -------------------------------------------------------------------------------- /training/scripts/architecture_ablations_c5_o3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/architecture_ablations_c5_o3.sh -------------------------------------------------------------------------------- /training/scripts/data_ablations_a4000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/data_ablations_a4000.sh -------------------------------------------------------------------------------- /training/scripts/data_ablations_a6000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/data_ablations_a6000.sh -------------------------------------------------------------------------------- /training/scripts/eval_baselines.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/eval_baselines.sh -------------------------------------------------------------------------------- /training/scripts/final_story_cb_o4_2080ti.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/final_story_cb_o4_2080ti.sh -------------------------------------------------------------------------------- /training/scripts/final_story_cb_o4_a4000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/final_story_cb_o4_a4000.sh -------------------------------------------------------------------------------- /training/scripts/final_story_cb_o4_a6000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/final_story_cb_o4_a6000.sh -------------------------------------------------------------------------------- /training/scripts/preprocessing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/preprocessing.sh -------------------------------------------------------------------------------- /training/scripts/reproducing_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/reproducing_bert.sh -------------------------------------------------------------------------------- /training/scripts/scaling_law_cb_o4_a4000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/scaling_law_cb_o4_a4000.sh -------------------------------------------------------------------------------- /training/scripts/scaling_law_cb_o4_a5000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/scaling_law_cb_o4_a5000.sh -------------------------------------------------------------------------------- /training/scripts/scaling_law_cb_o4_a6000.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/scaling_law_cb_o4_a6000.sh -------------------------------------------------------------------------------- /training/scripts/training_ablations_c5_o3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/scripts/training_ablations_c5_o3.sh -------------------------------------------------------------------------------- /training/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/setup.cfg -------------------------------------------------------------------------------- /training/upload_processed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbelcak/UltraFastBERT/HEAD/training/upload_processed_dataset.py --------------------------------------------------------------------------------