├── CITATION.cff ├── LICENSE ├── README.md ├── docker ├── Dockerfile ├── build.sh ├── pull.sh └── run.sh ├── download └── download.sh ├── examples ├── example_cond_input.json ├── generate_conditional_greedy.sh ├── generate_conditional_sampling.sh ├── generate_interactive.sh └── generate_unconditional.sh └── megatron_lm ├── LICENSE ├── MANIFEST.in ├── README.md ├── changes.md ├── curriculum_learning ├── README.md ├── ds_pretrain_gpt2.sh ├── ds_train.sh ├── ds_zero_stage_2_config_baseline.json └── ds_zero_stage_2_config_curriculum_fixed_linear.json ├── examples ├── ds_pretrain_gpt2-zero2.sh ├── ds_pretrain_gpt2-zero3.sh ├── ds_pretrain_gpt2.sh ├── ds_zero_stage_2_config.json ├── ds_zero_stage_3_config.json ├── ds_zero_stage_3_config_release.json ├── ds_zero_stage_infinity_config.json ├── evaluate_zeroshot_gpt2.sh ├── finetune_mnli_distributed.sh ├── finetune_race_distributed.sh ├── generate_text.sh ├── merge_mp_bert.sh ├── pretrain_bert.sh ├── pretrain_bert_distributed.sh ├── pretrain_gpt2.sh └── pretrain_gpt2_distributed.sh ├── images ├── Makefile ├── cases.png ├── scaling-dp.png ├── scaling-mp.png └── tables.tex ├── megatron ├── __init__.py ├── arguments.py ├── checkpointing.py ├── data │ ├── Makefile │ ├── __init__.py │ ├── bert_dataset.py │ ├── dataset_utils.py │ ├── gpt2_dataset.py │ ├── helpers.cpp │ ├── ict_dataset.py │ ├── indexed_dataset.py │ ├── realm_dataset_utils.py │ ├── realm_index.py │ ├── samplers.py │ └── test │ │ ├── test_indexed_dataset.py │ │ └── test_preprocess_data.sh ├── deprecated_data_utils │ ├── __init__.py │ ├── configure_data.py │ ├── corpora.py │ ├── datasets.py │ ├── file_utils.py │ ├── lazy_loader.py │ ├── samplers.py │ ├── scripts │ │ ├── presplit_sentences_json.py │ │ ├── split_gpt2_json.py │ │ └── split_json.py │ ├── tf_dl.py │ ├── tokenization.py │ ├── tokenization_gpt2.py │ └── wordpiece.py ├── fp16 │ ├── __init__.py │ ├── fp16.py │ ├── fp16util.py │ └── loss_scaler.py ├── fused_kernels │ ├── __init__.py │ ├── scaled_masked_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.cpp │ ├── scaled_upper_triang_masked_softmax.h │ └── scaled_upper_triang_masked_softmax_cuda.cu ├── global_vars.py ├── indexer.py ├── initialize.py ├── learning_rates.py ├── memory.py ├── model │ ├── __init__.py │ ├── bert_model.py │ ├── classification.py │ ├── distributed.py │ ├── fused_bias_gelu.py │ ├── fused_softmax.py │ ├── gpt2_model.py │ ├── language_model.py │ ├── multiple_choice.py │ ├── realm_model.py │ ├── transformer.py │ └── utils.py ├── module.py ├── mpu │ ├── __init__.py │ ├── cross_entropy.py │ ├── data.py │ ├── grads.py │ ├── initialize.py │ ├── layers.py │ ├── mappings.py │ ├── random.py │ ├── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py │ └── utils.py ├── package_info.py ├── text_generation_utils.py ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ ├── sp_tokenization.py │ └── tokenizer.py ├── training.py └── utils.py ├── pretrain_bert.py ├── pretrain_gpt2.py ├── pretrain_ict.py ├── requirements.txt ├── setup.py ├── tasks ├── data_utils.py ├── ensemble_classifier.py ├── eval_utils.py ├── finetune_utils.py ├── glue │ ├── data.py │ ├── finetune.py │ ├── mnli.py │ └── qqp.py ├── main.py ├── race │ ├── data.py │ └── finetune.py └── zeroshot_gpt2 │ ├── datasets.py │ ├── detokenizer.py │ └── evaluate.py └── tools ├── create_doc_index.py ├── generate_samples_gpt2.py ├── linter.py ├── merge_mp_partitions.py ├── openwebtext ├── README.md ├── blacklist_urls.py ├── cleanup_dataset.py ├── find_duplicates.py ├── group_duplicates_url.py ├── merge_jsons.py └── remove_group_duplicates.py └── preprocess_data.py /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/README.md -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | docker build -t yalm-cuda11-ds:1.0 --network host . 2 | -------------------------------------------------------------------------------- /docker/pull.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/docker/pull.sh -------------------------------------------------------------------------------- /docker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/docker/run.sh -------------------------------------------------------------------------------- /download/download.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/download/download.sh -------------------------------------------------------------------------------- /examples/example_cond_input.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/example_cond_input.json -------------------------------------------------------------------------------- /examples/generate_conditional_greedy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_conditional_greedy.sh -------------------------------------------------------------------------------- /examples/generate_conditional_sampling.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_conditional_sampling.sh -------------------------------------------------------------------------------- /examples/generate_interactive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_interactive.sh -------------------------------------------------------------------------------- /examples/generate_unconditional.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_unconditional.sh -------------------------------------------------------------------------------- /megatron_lm/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/LICENSE -------------------------------------------------------------------------------- /megatron_lm/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/MANIFEST.in -------------------------------------------------------------------------------- /megatron_lm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/README.md -------------------------------------------------------------------------------- /megatron_lm/changes.md: -------------------------------------------------------------------------------- 1 | PRETEND THESE ARE CODE CHANGES 2 | -------------------------------------------------------------------------------- /megatron_lm/curriculum_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/README.md -------------------------------------------------------------------------------- /megatron_lm/curriculum_learning/ds_pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_pretrain_gpt2.sh -------------------------------------------------------------------------------- /megatron_lm/curriculum_learning/ds_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_train.sh -------------------------------------------------------------------------------- /megatron_lm/curriculum_learning/ds_zero_stage_2_config_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_zero_stage_2_config_baseline.json -------------------------------------------------------------------------------- /megatron_lm/curriculum_learning/ds_zero_stage_2_config_curriculum_fixed_linear.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_zero_stage_2_config_curriculum_fixed_linear.json -------------------------------------------------------------------------------- /megatron_lm/examples/ds_pretrain_gpt2-zero2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_pretrain_gpt2-zero2.sh -------------------------------------------------------------------------------- /megatron_lm/examples/ds_pretrain_gpt2-zero3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_pretrain_gpt2-zero3.sh -------------------------------------------------------------------------------- /megatron_lm/examples/ds_pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_pretrain_gpt2.sh -------------------------------------------------------------------------------- /megatron_lm/examples/ds_zero_stage_2_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_2_config.json -------------------------------------------------------------------------------- /megatron_lm/examples/ds_zero_stage_3_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_3_config.json -------------------------------------------------------------------------------- /megatron_lm/examples/ds_zero_stage_3_config_release.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_3_config_release.json -------------------------------------------------------------------------------- /megatron_lm/examples/ds_zero_stage_infinity_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_infinity_config.json -------------------------------------------------------------------------------- /megatron_lm/examples/evaluate_zeroshot_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/evaluate_zeroshot_gpt2.sh -------------------------------------------------------------------------------- /megatron_lm/examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /megatron_lm/examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /megatron_lm/examples/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/generate_text.sh -------------------------------------------------------------------------------- /megatron_lm/examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /megatron_lm/examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /megatron_lm/examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /megatron_lm/examples/pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_gpt2.sh -------------------------------------------------------------------------------- /megatron_lm/examples/pretrain_gpt2_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_gpt2_distributed.sh -------------------------------------------------------------------------------- /megatron_lm/images/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/Makefile -------------------------------------------------------------------------------- /megatron_lm/images/cases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/cases.png -------------------------------------------------------------------------------- /megatron_lm/images/scaling-dp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/scaling-dp.png -------------------------------------------------------------------------------- /megatron_lm/images/scaling-mp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/scaling-mp.png -------------------------------------------------------------------------------- /megatron_lm/images/tables.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/tables.tex -------------------------------------------------------------------------------- /megatron_lm/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/arguments.py -------------------------------------------------------------------------------- /megatron_lm/megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/checkpointing.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/Makefile -------------------------------------------------------------------------------- /megatron_lm/megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /megatron_lm/megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/gpt2_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/gpt2_dataset.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /megatron_lm/megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/realm_index.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/samplers.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /megatron_lm/megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/configure_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/configure_data.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/corpora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/corpora.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/datasets.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/file_utils.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/lazy_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/lazy_loader.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/samplers.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/scripts/presplit_sentences_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/scripts/presplit_sentences_json.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/scripts/split_gpt2_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/scripts/split_gpt2_json.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/scripts/split_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/scripts/split_json.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/tf_dl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/tf_dl.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/tokenization.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/tokenization_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/tokenization_gpt2.py -------------------------------------------------------------------------------- /megatron_lm/megatron/deprecated_data_utils/wordpiece.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/wordpiece.py -------------------------------------------------------------------------------- /megatron_lm/megatron/fp16/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/fp16/fp16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/fp16.py -------------------------------------------------------------------------------- /megatron_lm/megatron/fp16/fp16util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/fp16util.py -------------------------------------------------------------------------------- /megatron_lm/megatron/fp16/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/loss_scaler.py -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron_lm/megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/global_vars.py -------------------------------------------------------------------------------- /megatron_lm/megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/indexer.py -------------------------------------------------------------------------------- /megatron_lm/megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/initialize.py -------------------------------------------------------------------------------- /megatron_lm/megatron/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/learning_rates.py -------------------------------------------------------------------------------- /megatron_lm/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/memory.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/bert_model.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/classification.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/distributed.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/gpt2_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/gpt2_model.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/language_model.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/realm_model.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/transformer.py -------------------------------------------------------------------------------- /megatron_lm/megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/utils.py -------------------------------------------------------------------------------- /megatron_lm/megatron/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/module.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/cross_entropy.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/data.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/grads.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/initialize.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/layers.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/mappings.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/random.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /megatron_lm/megatron/mpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/utils.py -------------------------------------------------------------------------------- /megatron_lm/megatron/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/package_info.py -------------------------------------------------------------------------------- /megatron_lm/megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /megatron_lm/megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /megatron_lm/megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /megatron_lm/megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /megatron_lm/megatron/tokenizer/sp_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/sp_tokenization.py -------------------------------------------------------------------------------- /megatron_lm/megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /megatron_lm/megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/training.py -------------------------------------------------------------------------------- /megatron_lm/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/utils.py -------------------------------------------------------------------------------- /megatron_lm/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/pretrain_bert.py -------------------------------------------------------------------------------- /megatron_lm/pretrain_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/pretrain_gpt2.py -------------------------------------------------------------------------------- /megatron_lm/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/pretrain_ict.py -------------------------------------------------------------------------------- /megatron_lm/requirements.txt: -------------------------------------------------------------------------------- 1 | pybind11 2 | torch 3 | six 4 | regex 5 | numpy 6 | -------------------------------------------------------------------------------- /megatron_lm/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/setup.py -------------------------------------------------------------------------------- /megatron_lm/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/data_utils.py -------------------------------------------------------------------------------- /megatron_lm/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /megatron_lm/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/eval_utils.py -------------------------------------------------------------------------------- /megatron_lm/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/finetune_utils.py -------------------------------------------------------------------------------- /megatron_lm/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/data.py -------------------------------------------------------------------------------- /megatron_lm/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/finetune.py -------------------------------------------------------------------------------- /megatron_lm/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/mnli.py -------------------------------------------------------------------------------- /megatron_lm/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/qqp.py -------------------------------------------------------------------------------- /megatron_lm/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/main.py -------------------------------------------------------------------------------- /megatron_lm/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/race/data.py -------------------------------------------------------------------------------- /megatron_lm/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/race/finetune.py -------------------------------------------------------------------------------- /megatron_lm/tasks/zeroshot_gpt2/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/zeroshot_gpt2/datasets.py -------------------------------------------------------------------------------- /megatron_lm/tasks/zeroshot_gpt2/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/zeroshot_gpt2/detokenizer.py -------------------------------------------------------------------------------- /megatron_lm/tasks/zeroshot_gpt2/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/zeroshot_gpt2/evaluate.py -------------------------------------------------------------------------------- /megatron_lm/tools/create_doc_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/create_doc_index.py -------------------------------------------------------------------------------- /megatron_lm/tools/generate_samples_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/generate_samples_gpt2.py -------------------------------------------------------------------------------- /megatron_lm/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/linter.py -------------------------------------------------------------------------------- /megatron_lm/tools/merge_mp_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/merge_mp_partitions.py -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/README.md -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/group_duplicates_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/group_duplicates_url.py -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /megatron_lm/tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /megatron_lm/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/preprocess_data.py --------------------------------------------------------------------------------