├── CITATION.cff
├── LICENSE
├── README.md
├── docker
    ├── Dockerfile
    ├── build.sh
    ├── pull.sh
    └── run.sh
├── download
    └── download.sh
├── examples
    ├── example_cond_input.json
    ├── generate_conditional_greedy.sh
    ├── generate_conditional_sampling.sh
    ├── generate_interactive.sh
    └── generate_unconditional.sh
└── megatron_lm
    ├── LICENSE
    ├── MANIFEST.in
    ├── README.md
    ├── changes.md
    ├── curriculum_learning
        ├── README.md
        ├── ds_pretrain_gpt2.sh
        ├── ds_train.sh
        ├── ds_zero_stage_2_config_baseline.json
        └── ds_zero_stage_2_config_curriculum_fixed_linear.json
    ├── examples
        ├── ds_pretrain_gpt2-zero2.sh
        ├── ds_pretrain_gpt2-zero3.sh
        ├── ds_pretrain_gpt2.sh
        ├── ds_zero_stage_2_config.json
        ├── ds_zero_stage_3_config.json
        ├── ds_zero_stage_3_config_release.json
        ├── ds_zero_stage_infinity_config.json
        ├── evaluate_zeroshot_gpt2.sh
        ├── finetune_mnli_distributed.sh
        ├── finetune_race_distributed.sh
        ├── generate_text.sh
        ├── merge_mp_bert.sh
        ├── pretrain_bert.sh
        ├── pretrain_bert_distributed.sh
        ├── pretrain_gpt2.sh
        └── pretrain_gpt2_distributed.sh
    ├── images
        ├── Makefile
        ├── cases.png
        ├── scaling-dp.png
        ├── scaling-mp.png
        └── tables.tex
    ├── megatron
        ├── __init__.py
        ├── arguments.py
        ├── checkpointing.py
        ├── data
        │   ├── Makefile
        │   ├── __init__.py
        │   ├── bert_dataset.py
        │   ├── dataset_utils.py
        │   ├── gpt2_dataset.py
        │   ├── helpers.cpp
        │   ├── ict_dataset.py
        │   ├── indexed_dataset.py
        │   ├── realm_dataset_utils.py
        │   ├── realm_index.py
        │   ├── samplers.py
        │   └── test
        │   │   ├── test_indexed_dataset.py
        │   │   └── test_preprocess_data.sh
        ├── deprecated_data_utils
        │   ├── __init__.py
        │   ├── configure_data.py
        │   ├── corpora.py
        │   ├── datasets.py
        │   ├── file_utils.py
        │   ├── lazy_loader.py
        │   ├── samplers.py
        │   ├── scripts
        │   │   ├── presplit_sentences_json.py
        │   │   ├── split_gpt2_json.py
        │   │   └── split_json.py
        │   ├── tf_dl.py
        │   ├── tokenization.py
        │   ├── tokenization_gpt2.py
        │   └── wordpiece.py
        ├── fp16
        │   ├── __init__.py
        │   ├── fp16.py
        │   ├── fp16util.py
        │   └── loss_scaler.py
        ├── fused_kernels
        │   ├── __init__.py
        │   ├── scaled_masked_softmax.cpp
        │   ├── scaled_masked_softmax.h
        │   ├── scaled_masked_softmax_cuda.cu
        │   ├── scaled_upper_triang_masked_softmax.cpp
        │   ├── scaled_upper_triang_masked_softmax.h
        │   └── scaled_upper_triang_masked_softmax_cuda.cu
        ├── global_vars.py
        ├── indexer.py
        ├── initialize.py
        ├── learning_rates.py
        ├── memory.py
        ├── model
        │   ├── __init__.py
        │   ├── bert_model.py
        │   ├── classification.py
        │   ├── distributed.py
        │   ├── fused_bias_gelu.py
        │   ├── fused_softmax.py
        │   ├── gpt2_model.py
        │   ├── language_model.py
        │   ├── multiple_choice.py
        │   ├── realm_model.py
        │   ├── transformer.py
        │   └── utils.py
        ├── module.py
        ├── mpu
        │   ├── __init__.py
        │   ├── cross_entropy.py
        │   ├── data.py
        │   ├── grads.py
        │   ├── initialize.py
        │   ├── layers.py
        │   ├── mappings.py
        │   ├── random.py
        │   ├── tests
        │   │   ├── __init__.py
        │   │   ├── commons.py
        │   │   ├── test_cross_entropy.py
        │   │   ├── test_data.py
        │   │   ├── test_initialize.py
        │   │   ├── test_layers.py
        │   │   └── test_random.py
        │   └── utils.py
        ├── package_info.py
        ├── text_generation_utils.py
        ├── tokenizer
        │   ├── __init__.py
        │   ├── bert_tokenization.py
        │   ├── gpt2_tokenization.py
        │   ├── sp_tokenization.py
        │   └── tokenizer.py
        ├── training.py
        └── utils.py
    ├── pretrain_bert.py
    ├── pretrain_gpt2.py
    ├── pretrain_ict.py
    ├── requirements.txt
    ├── setup.py
    ├── tasks
        ├── data_utils.py
        ├── ensemble_classifier.py
        ├── eval_utils.py
        ├── finetune_utils.py
        ├── glue
        │   ├── data.py
        │   ├── finetune.py
        │   ├── mnli.py
        │   └── qqp.py
        ├── main.py
        ├── race
        │   ├── data.py
        │   └── finetune.py
        └── zeroshot_gpt2
        │   ├── datasets.py
        │   ├── detokenizer.py
        │   └── evaluate.py
    └── tools
        ├── create_doc_index.py
        ├── generate_samples_gpt2.py
        ├── linter.py
        ├── merge_mp_partitions.py
        ├── openwebtext
            ├── README.md
            ├── blacklist_urls.py
            ├── cleanup_dataset.py
            ├── find_duplicates.py
            ├── group_duplicates_url.py
            ├── merge_jsons.py
            └── remove_group_duplicates.py
        └── preprocess_data.py


/CITATION.cff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/CITATION.cff


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/README.md


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/docker/Dockerfile


--------------------------------------------------------------------------------
/docker/build.sh:
--------------------------------------------------------------------------------
1 | docker build -t yalm-cuda11-ds:1.0 --network host .
2 | 


--------------------------------------------------------------------------------
/docker/pull.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/docker/pull.sh


--------------------------------------------------------------------------------
/docker/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/docker/run.sh


--------------------------------------------------------------------------------
/download/download.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/download/download.sh


--------------------------------------------------------------------------------
/examples/example_cond_input.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/example_cond_input.json


--------------------------------------------------------------------------------
/examples/generate_conditional_greedy.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_conditional_greedy.sh


--------------------------------------------------------------------------------
/examples/generate_conditional_sampling.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_conditional_sampling.sh


--------------------------------------------------------------------------------
/examples/generate_interactive.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_interactive.sh


--------------------------------------------------------------------------------
/examples/generate_unconditional.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/examples/generate_unconditional.sh


--------------------------------------------------------------------------------
/megatron_lm/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/LICENSE


--------------------------------------------------------------------------------
/megatron_lm/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/MANIFEST.in


--------------------------------------------------------------------------------
/megatron_lm/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/README.md


--------------------------------------------------------------------------------
/megatron_lm/changes.md:
--------------------------------------------------------------------------------
1 | PRETEND THESE ARE CODE CHANGES
2 | 


--------------------------------------------------------------------------------
/megatron_lm/curriculum_learning/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/README.md


--------------------------------------------------------------------------------
/megatron_lm/curriculum_learning/ds_pretrain_gpt2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_pretrain_gpt2.sh


--------------------------------------------------------------------------------
/megatron_lm/curriculum_learning/ds_train.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_train.sh


--------------------------------------------------------------------------------
/megatron_lm/curriculum_learning/ds_zero_stage_2_config_baseline.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_zero_stage_2_config_baseline.json


--------------------------------------------------------------------------------
/megatron_lm/curriculum_learning/ds_zero_stage_2_config_curriculum_fixed_linear.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/curriculum_learning/ds_zero_stage_2_config_curriculum_fixed_linear.json


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_pretrain_gpt2-zero2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_pretrain_gpt2-zero2.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_pretrain_gpt2-zero3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_pretrain_gpt2-zero3.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_pretrain_gpt2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_pretrain_gpt2.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_zero_stage_2_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_2_config.json


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_zero_stage_3_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_3_config.json


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_zero_stage_3_config_release.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_3_config_release.json


--------------------------------------------------------------------------------
/megatron_lm/examples/ds_zero_stage_infinity_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/ds_zero_stage_infinity_config.json


--------------------------------------------------------------------------------
/megatron_lm/examples/evaluate_zeroshot_gpt2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/evaluate_zeroshot_gpt2.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/finetune_mnli_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/finetune_mnli_distributed.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/finetune_race_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/finetune_race_distributed.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/generate_text.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/generate_text.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/merge_mp_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/merge_mp_bert.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/pretrain_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_bert.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/pretrain_bert_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_bert_distributed.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/pretrain_gpt2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_gpt2.sh


--------------------------------------------------------------------------------
/megatron_lm/examples/pretrain_gpt2_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/examples/pretrain_gpt2_distributed.sh


--------------------------------------------------------------------------------
/megatron_lm/images/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/Makefile


--------------------------------------------------------------------------------
/megatron_lm/images/cases.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/cases.png


--------------------------------------------------------------------------------
/megatron_lm/images/scaling-dp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/scaling-dp.png


--------------------------------------------------------------------------------
/megatron_lm/images/scaling-mp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/scaling-mp.png


--------------------------------------------------------------------------------
/megatron_lm/images/tables.tex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/images/tables.tex


--------------------------------------------------------------------------------
/megatron_lm/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/arguments.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/checkpointing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/checkpointing.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/Makefile


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import indexed_dataset
2 | 


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/bert_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/bert_dataset.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/dataset_utils.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/gpt2_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/gpt2_dataset.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/helpers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/helpers.cpp


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/ict_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/ict_dataset.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/indexed_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/indexed_dataset.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/realm_dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/realm_dataset_utils.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/realm_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/realm_index.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/samplers.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/test/test_indexed_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/test/test_indexed_dataset.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/data/test/test_preprocess_data.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/data/test/test_preprocess_data.sh


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/configure_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/configure_data.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/corpora.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/corpora.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/datasets.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/file_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/file_utils.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/lazy_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/lazy_loader.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/samplers.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/scripts/presplit_sentences_json.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/scripts/presplit_sentences_json.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/scripts/split_gpt2_json.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/scripts/split_gpt2_json.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/scripts/split_json.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/scripts/split_json.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/tf_dl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/tf_dl.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/tokenization.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/tokenization_gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/tokenization_gpt2.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/deprecated_data_utils/wordpiece.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/deprecated_data_utils/wordpiece.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/fp16/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/fp16/fp16.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/fp16.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/fp16/fp16util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/fp16util.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/fp16/loss_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fp16/loss_scaler.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/scaled_masked_softmax.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_masked_softmax.cpp


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/scaled_masked_softmax.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_masked_softmax.h


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/scaled_masked_softmax_cuda.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_masked_softmax_cuda.cu


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h


--------------------------------------------------------------------------------
/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu


--------------------------------------------------------------------------------
/megatron_lm/megatron/global_vars.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/global_vars.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/indexer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/indexer.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/initialize.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/learning_rates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/learning_rates.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/memory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/memory.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/bert_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/bert_model.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/classification.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/distributed.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/fused_bias_gelu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/fused_bias_gelu.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/fused_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/fused_softmax.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/gpt2_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/gpt2_model.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/language_model.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/multiple_choice.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/multiple_choice.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/realm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/realm_model.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/transformer.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/model/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/model/utils.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/module.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/cross_entropy.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/data.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/grads.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/grads.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/initialize.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/layers.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/mappings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/mappings.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/random.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/commons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/commons.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/test_cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_cross_entropy.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_data.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/test_initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_initialize.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/test_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_layers.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/tests/test_random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/tests/test_random.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/mpu/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/mpu/utils.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/package_info.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/package_info.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/text_generation_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/text_generation_utils.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/__init__.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/tokenizer/bert_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/bert_tokenization.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/tokenizer/gpt2_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/gpt2_tokenization.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/tokenizer/sp_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/sp_tokenization.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/tokenizer/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/tokenizer/tokenizer.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/training.py


--------------------------------------------------------------------------------
/megatron_lm/megatron/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/megatron/utils.py


--------------------------------------------------------------------------------
/megatron_lm/pretrain_bert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/pretrain_bert.py


--------------------------------------------------------------------------------
/megatron_lm/pretrain_gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/pretrain_gpt2.py


--------------------------------------------------------------------------------
/megatron_lm/pretrain_ict.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/pretrain_ict.py


--------------------------------------------------------------------------------
/megatron_lm/requirements.txt:
--------------------------------------------------------------------------------
1 | pybind11
2 | torch
3 | six
4 | regex
5 | numpy
6 | 


--------------------------------------------------------------------------------
/megatron_lm/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/setup.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/data_utils.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/ensemble_classifier.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/ensemble_classifier.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/eval_utils.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/finetune_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/finetune_utils.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/glue/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/data.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/glue/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/finetune.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/glue/mnli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/mnli.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/glue/qqp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/glue/qqp.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/main.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/race/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/race/data.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/race/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/race/finetune.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/zeroshot_gpt2/datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/zeroshot_gpt2/datasets.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/zeroshot_gpt2/detokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/zeroshot_gpt2/detokenizer.py


--------------------------------------------------------------------------------
/megatron_lm/tasks/zeroshot_gpt2/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tasks/zeroshot_gpt2/evaluate.py


--------------------------------------------------------------------------------
/megatron_lm/tools/create_doc_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/create_doc_index.py


--------------------------------------------------------------------------------
/megatron_lm/tools/generate_samples_gpt2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/generate_samples_gpt2.py


--------------------------------------------------------------------------------
/megatron_lm/tools/linter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/linter.py


--------------------------------------------------------------------------------
/megatron_lm/tools/merge_mp_partitions.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/merge_mp_partitions.py


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/README.md


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/blacklist_urls.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/blacklist_urls.py


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/cleanup_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/cleanup_dataset.py


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/find_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/find_duplicates.py


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/group_duplicates_url.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/group_duplicates_url.py


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/merge_jsons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/merge_jsons.py


--------------------------------------------------------------------------------
/megatron_lm/tools/openwebtext/remove_group_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/openwebtext/remove_group_duplicates.py


--------------------------------------------------------------------------------
/megatron_lm/tools/preprocess_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yandex/YaLM-100B/HEAD/megatron_lm/tools/preprocess_data.py


--------------------------------------------------------------------------------