├── .gitignore ├── .gitlab-ci.yml ├── CODEOWNERS ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── dataset ├── README.md ├── download_books.sh ├── download_ckpt.sh └── download_vocab.sh ├── examples ├── MoE │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_config_gpt_Zero2_TEMPLATE.json │ ├── ds_evalharness.sh │ ├── ds_pretrain_gpt_1.3B_MoE128.sh │ ├── ds_pretrain_gpt_1.3B_PR-MoE64or128.sh │ ├── ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh │ ├── ds_pretrain_gpt_1.3B_dense.sh │ ├── ds_pretrain_gpt_1.3B_dense_cl.sh │ ├── ds_pretrain_gpt_125M_MoE64.sh │ ├── ds_pretrain_gpt_125M_dense_cl.sh │ ├── ds_pretrain_gpt_350M_MoE128.sh │ ├── ds_pretrain_gpt_350M_PR-MoE32or64.sh │ ├── ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh │ ├── ds_pretrain_gpt_350M_dense.sh │ ├── ds_pretrain_gpt_6.7B_dense.sh │ └── readme_evalharness.md ├── README.md ├── azure │ ├── README.md │ ├── run-175b.sh │ ├── run-1t.sh │ └── run-benchmark-model.sh ├── azureml │ ├── Dockerfile.dockerfile │ ├── README.md │ ├── aml_submit.py │ └── prepare_dataset.py ├── bert_with_pile │ ├── README.md │ ├── ds_config_bert_TEMPLATE.json │ ├── ds_finetune_bert_mnli.sh │ ├── ds_finetune_bert_qqp.sh │ ├── ds_finetune_bert_race.sh │ ├── ds_pretrain_bert.sh │ └── prepare_pile_data.py ├── compression │ ├── 125M-Int8-test-64gpu-distilled-group48.sh │ ├── 125M-L10-Int8-test-64gpu-distilled-group48.sh │ ├── 125M-L12-Int8-test-64gpu-distilled-group48.sh │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_config_gpt_TEMPLATE_compression.json │ ├── ds_evalharness.sh │ ├── ds_pretrain_gpt_1.3B_dense_cl_kd.sh │ ├── ds_pretrain_gpt_125M_dense_cl_kd.sh │ ├── ds_pretrain_gpt_125M_dense_kd.sh │ └── ds_pretrain_gpt_350M_dense_kd.sh ├── create_embeddings.sh ├── curriculum_learning │ ├── README.md │ ├── ds_pretrain_gpt2.sh │ ├── ds_train.sh │ ├── ds_zero_stage_1_config_baseline.json │ └── ds_zero_stage_1_config_curriculum_fixed_linear.json ├── data_efficiency │ ├── README.md │ ├── analyze_data.py │ ├── bert │ │ ├── ds_analyze_bert_data_map.sh │ │ ├── ds_analyze_bert_data_reduce.sh │ │ ├── finetune │ │ │ ├── ds_config_bert_TEMPLATE.json │ │ │ ├── ds_finetune_bert_mnli.sh │ │ │ ├── ds_finetune_bert_qqp.sh │ │ │ ├── ds_finetune_bert_race.sh │ │ │ └── ds_finetune_gather_result.py │ │ ├── finetune_glue │ │ │ ├── ds_config_bert_TEMPLATE.json │ │ │ ├── ds_finetune_bert_glue.sh │ │ │ ├── ds_finetune_bert_glue_run.sh │ │ │ └── ds_finetune_gather_result.py │ │ ├── pile_data_download_preprocess.py │ │ └── pretrain │ │ │ ├── ds_config_bert_1clmetric_TEMPLATE.json │ │ │ ├── ds_config_bert_2clmetrics_TEMPLATE.json │ │ │ ├── ds_pretrain_bert_336M_base_script.sh │ │ │ └── ds_pretrain_bert_336M_run.sh │ └── gpt │ │ ├── ds_analyze_gpt_data_map.sh │ │ ├── ds_analyze_gpt_data_reduce.sh │ │ ├── eval │ │ ├── ds_config_eval_dummy.json │ │ ├── ds_evalharness_1gpu.sh │ │ ├── ds_evalharness_gather_result.py │ │ ├── ds_evalharness_parallel_run.sh │ │ └── ds_evalharness_parallel_run_10shot.sh │ │ └── pretrain │ │ ├── ds_config_gpt_1clmetric_TEMPLATE.json │ │ ├── ds_config_gpt_2clmetrics_TEMPLATE.json │ │ ├── ds_pretrain_gpt_1.3B_dense_base_script.sh │ │ └── ds_pretrain_gpt_1.3B_dense_run.sh ├── evaluate_ict_zeroshot_nq.sh ├── evaluate_zeroshot_gpt.sh ├── finetune_mnli_distributed.sh ├── finetune_race_distributed.sh ├── generate_text.sh ├── merge_mp_bert.sh ├── pretrain_bert.sh ├── pretrain_bert_distributed.sh ├── pretrain_bert_distributed_with_mp.sh ├── pretrain_gpt.sh ├── pretrain_gpt3_175B.sh ├── pretrain_gpt_distributed.sh ├── pretrain_gpt_distributed_with_mp.sh ├── pretrain_ict.sh ├── pretrain_llama_distributed.sh ├── pretrain_t5.sh ├── pretrain_t5_distributed.sh ├── pretrain_t5_distributed_with_mp.sh └── run_deepspeed_example.sh ├── images └── cases_april2021.png ├── megatron ├── __init__.py ├── arguments.py ├── checkpointing.py ├── data │ ├── Makefile │ ├── __init__.py │ ├── autoaugment.py │ ├── bert_dataset.py │ ├── biencoder_dataset_utils.py │ ├── blendable_dataset.py │ ├── data_samplers.py │ ├── dataset_utils.py │ ├── gpt_dataset.py │ ├── helpers.cpp │ ├── ict_dataset.py │ ├── indexed_dataset.py │ ├── orqa_wiki_dataset.py │ ├── realm_dataset_utils.py │ ├── realm_index.py │ ├── t5_dataset.py │ ├── test │ │ ├── test_indexed_dataset.py │ │ └── test_preprocess_data.sh │ └── vit_dataset.py ├── enums.py ├── fp16_deprecated │ └── loss_scaler.py ├── fused_kernels │ ├── __init__.py │ ├── compat.h │ ├── layer_norm_cuda.cpp │ ├── layer_norm_cuda_kernel.cu │ ├── scaled_masked_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.cpp │ ├── scaled_upper_triang_masked_softmax.h │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ └── type_shim.h ├── global_vars.py ├── indexer.py ├── initialize.py ├── learning_rates.py ├── memory.py ├── microbatches.py ├── model │ ├── __init__.py │ ├── bert_model.py │ ├── biencoder_model.py │ ├── classification.py │ ├── distributed.py │ ├── enums.py │ ├── fused_bias_gelu.py │ ├── fused_layer_norm.py │ ├── fused_softmax.py │ ├── gpt_model.py │ ├── language_model.py │ ├── module.py │ ├── multiple_choice.py │ ├── realm_model.py │ ├── rotary_pos_embedding.py │ ├── t5_model.py │ ├── transformer.py │ ├── utils.py │ └── vit_model.py ├── mpu │ ├── __init__.py │ ├── cross_entropy.py │ ├── data.py │ ├── initialize.py │ ├── layers.py │ ├── mappings.py │ ├── random.py │ ├── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py │ └── utils.py ├── optimizer │ ├── __init__.py │ ├── clip_grads.py │ ├── grad_scaler.py │ └── optimizer.py ├── p2p_communication.py ├── package_info.py ├── schedules.py ├── text_generation_utils.py ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ ├── sp_tokenization.py │ └── tokenizer.py ├── training.py └── utils.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_ict.py ├── pretrain_t5.py ├── pretrain_vit.py ├── requirements.txt ├── setup.py ├── tasks ├── data_utils.py ├── ensemble_classifier.py ├── eval_harness │ ├── download.py │ ├── evaluate.py │ └── report-to-csv.py ├── eval_utils.py ├── finetune_utils.py ├── glue │ ├── cola.py │ ├── data.py │ ├── finetune.py │ ├── mnli.py │ ├── mrpc.py │ ├── qnli.py │ ├── qqp.py │ ├── rte.py │ ├── sst2.py │ └── stsb.py ├── main.py ├── orqa │ ├── evaluate_orqa.py │ ├── evaluate_utils.py │ └── natural_questions │ │ ├── nq.py │ │ ├── qa_utils.py │ │ └── tokenizers.py ├── race │ ├── data.py │ └── finetune.py ├── vision │ ├── classification.py │ ├── eval_utils.py │ ├── finetune_utils.py │ └── main.py └── zeroshot_gpt │ ├── datasets.py │ ├── detokenizer.py │ └── evaluate.py ├── tests ├── run_megatron.py ├── test_basic.py └── test_megatron.py └── tools ├── convert_checkpoint ├── README.md ├── deepspeed_checkpoint.py ├── deepspeed_to_megatron.py ├── deepspeed_to_transformers.py ├── inspect_checkpoint.py └── inspect_deepspeed_checkpoint.py ├── create_doc_index.py ├── generate_samples_gpt.py ├── linter.py ├── merge_mp_partitions.py ├── openwebtext ├── README.md ├── add_id.py ├── blacklist_urls.py ├── cleanup_dataset.py ├── cleanup_fix_dataset.py ├── filter_ngrams.py ├── find_duplicates.py ├── group_duplicate_url.py ├── merge_jsons.py └── remove_group_duplicates.py └── preprocess_data.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/.gitlab-ci.yml -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/CODEOWNERS -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/SECURITY.md -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/dataset/README.md -------------------------------------------------------------------------------- /dataset/download_books.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/dataset/download_books.sh -------------------------------------------------------------------------------- /dataset/download_ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/dataset/download_ckpt.sh -------------------------------------------------------------------------------- /dataset/download_vocab.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/dataset/download_vocab.sh -------------------------------------------------------------------------------- /examples/MoE/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples/MoE/ds_config_gpt_Zero2_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_config_gpt_Zero2_TEMPLATE.json -------------------------------------------------------------------------------- /examples/MoE/ds_evalharness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_evalharness.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_1.3B_MoE128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_1.3B_MoE128.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_1.3B_dense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_1.3B_dense.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_125M_MoE64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_125M_MoE64.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_125M_dense_cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_125M_dense_cl.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_350M_MoE128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_350M_MoE128.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_350M_dense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_350M_dense.sh -------------------------------------------------------------------------------- /examples/MoE/ds_pretrain_gpt_6.7B_dense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/ds_pretrain_gpt_6.7B_dense.sh -------------------------------------------------------------------------------- /examples/MoE/readme_evalharness.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/MoE/readme_evalharness.md -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/azure/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azure/README.md -------------------------------------------------------------------------------- /examples/azure/run-175b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azure/run-175b.sh -------------------------------------------------------------------------------- /examples/azure/run-1t.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azure/run-1t.sh -------------------------------------------------------------------------------- /examples/azure/run-benchmark-model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azure/run-benchmark-model.sh -------------------------------------------------------------------------------- /examples/azureml/Dockerfile.dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azureml/Dockerfile.dockerfile -------------------------------------------------------------------------------- /examples/azureml/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azureml/README.md -------------------------------------------------------------------------------- /examples/azureml/aml_submit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azureml/aml_submit.py -------------------------------------------------------------------------------- /examples/azureml/prepare_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/azureml/prepare_dataset.py -------------------------------------------------------------------------------- /examples/bert_with_pile/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/README.md -------------------------------------------------------------------------------- /examples/bert_with_pile/ds_config_bert_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/ds_config_bert_TEMPLATE.json -------------------------------------------------------------------------------- /examples/bert_with_pile/ds_finetune_bert_mnli.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/ds_finetune_bert_mnli.sh -------------------------------------------------------------------------------- /examples/bert_with_pile/ds_finetune_bert_qqp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/ds_finetune_bert_qqp.sh -------------------------------------------------------------------------------- /examples/bert_with_pile/ds_finetune_bert_race.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/ds_finetune_bert_race.sh -------------------------------------------------------------------------------- /examples/bert_with_pile/ds_pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/ds_pretrain_bert.sh -------------------------------------------------------------------------------- /examples/bert_with_pile/prepare_pile_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/bert_with_pile/prepare_pile_data.py -------------------------------------------------------------------------------- /examples/compression/125M-Int8-test-64gpu-distilled-group48.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/125M-Int8-test-64gpu-distilled-group48.sh -------------------------------------------------------------------------------- /examples/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh -------------------------------------------------------------------------------- /examples/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh -------------------------------------------------------------------------------- /examples/compression/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples/compression/ds_config_gpt_TEMPLATE_compression.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_config_gpt_TEMPLATE_compression.json -------------------------------------------------------------------------------- /examples/compression/ds_evalharness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_evalharness.sh -------------------------------------------------------------------------------- /examples/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh -------------------------------------------------------------------------------- /examples/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh -------------------------------------------------------------------------------- /examples/compression/ds_pretrain_gpt_125M_dense_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_pretrain_gpt_125M_dense_kd.sh -------------------------------------------------------------------------------- /examples/compression/ds_pretrain_gpt_350M_dense_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/compression/ds_pretrain_gpt_350M_dense_kd.sh -------------------------------------------------------------------------------- /examples/create_embeddings.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/create_embeddings.sh -------------------------------------------------------------------------------- /examples/curriculum_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/curriculum_learning/README.md -------------------------------------------------------------------------------- /examples/curriculum_learning/ds_pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/curriculum_learning/ds_pretrain_gpt2.sh -------------------------------------------------------------------------------- /examples/curriculum_learning/ds_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/curriculum_learning/ds_train.sh -------------------------------------------------------------------------------- /examples/curriculum_learning/ds_zero_stage_1_config_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/curriculum_learning/ds_zero_stage_1_config_baseline.json -------------------------------------------------------------------------------- /examples/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json -------------------------------------------------------------------------------- /examples/data_efficiency/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/README.md -------------------------------------------------------------------------------- /examples/data_efficiency/analyze_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/analyze_data.py -------------------------------------------------------------------------------- /examples/data_efficiency/bert/ds_analyze_bert_data_map.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/ds_analyze_bert_data_map.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/ds_analyze_bert_data_reduce.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/ds_analyze_bert_data_reduce.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune/ds_finetune_bert_race.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune/ds_finetune_bert_race.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune/ds_finetune_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune/ds_finetune_gather_result.py -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py -------------------------------------------------------------------------------- /examples/data_efficiency/bert/pile_data_download_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/pile_data_download_preprocess.py -------------------------------------------------------------------------------- /examples/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json -------------------------------------------------------------------------------- /examples/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json -------------------------------------------------------------------------------- /examples/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh -------------------------------------------------------------------------------- /examples/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/ds_analyze_gpt_data_map.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/ds_analyze_gpt_data_map.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/eval/ds_config_eval_dummy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/eval/ds_config_eval_dummy.json -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/eval/ds_evalharness_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/eval/ds_evalharness_gather_result.py -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh -------------------------------------------------------------------------------- /examples/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh -------------------------------------------------------------------------------- /examples/evaluate_ict_zeroshot_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/evaluate_ict_zeroshot_nq.sh -------------------------------------------------------------------------------- /examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /examples/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/generate_text.sh -------------------------------------------------------------------------------- /examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /examples/pretrain_llama_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_llama_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/run_deepspeed_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/examples/run_deepspeed_example.sh -------------------------------------------------------------------------------- /images/cases_april2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/images/cases_april2021.png -------------------------------------------------------------------------------- /megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/__init__.py -------------------------------------------------------------------------------- /megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/arguments.py -------------------------------------------------------------------------------- /megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/checkpointing.py -------------------------------------------------------------------------------- /megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/Makefile -------------------------------------------------------------------------------- /megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/realm_index.py -------------------------------------------------------------------------------- /megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /megatron/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/data/vit_dataset.py -------------------------------------------------------------------------------- /megatron/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/enums.py -------------------------------------------------------------------------------- /megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /megatron/fused_kernels/layer_norm_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/layer_norm_cuda.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/layer_norm_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/layer_norm_cuda_kernel.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/global_vars.py -------------------------------------------------------------------------------- /megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/indexer.py -------------------------------------------------------------------------------- /megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/initialize.py -------------------------------------------------------------------------------- /megatron/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/learning_rates.py -------------------------------------------------------------------------------- /megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/memory.py -------------------------------------------------------------------------------- /megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/microbatches.py -------------------------------------------------------------------------------- /megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/__init__.py -------------------------------------------------------------------------------- /megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/bert_model.py -------------------------------------------------------------------------------- /megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/classification.py -------------------------------------------------------------------------------- /megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/distributed.py -------------------------------------------------------------------------------- /megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/enums.py -------------------------------------------------------------------------------- /megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/language_model.py -------------------------------------------------------------------------------- /megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/module.py -------------------------------------------------------------------------------- /megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/realm_model.py -------------------------------------------------------------------------------- /megatron/model/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/rotary_pos_embedding.py -------------------------------------------------------------------------------- /megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/t5_model.py -------------------------------------------------------------------------------- /megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/transformer.py -------------------------------------------------------------------------------- /megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/utils.py -------------------------------------------------------------------------------- /megatron/model/vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/model/vit_model.py -------------------------------------------------------------------------------- /megatron/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/__init__.py -------------------------------------------------------------------------------- /megatron/mpu/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/cross_entropy.py -------------------------------------------------------------------------------- /megatron/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/data.py -------------------------------------------------------------------------------- /megatron/mpu/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/initialize.py -------------------------------------------------------------------------------- /megatron/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/layers.py -------------------------------------------------------------------------------- /megatron/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/mappings.py -------------------------------------------------------------------------------- /megatron/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/random.py -------------------------------------------------------------------------------- /megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /megatron/mpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/mpu/utils.py -------------------------------------------------------------------------------- /megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /megatron/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/p2p_communication.py -------------------------------------------------------------------------------- /megatron/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/package_info.py -------------------------------------------------------------------------------- /megatron/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/schedules.py -------------------------------------------------------------------------------- /megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/sp_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/tokenizer/sp_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/training.py -------------------------------------------------------------------------------- /megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/megatron/utils.py -------------------------------------------------------------------------------- /pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/pretrain_bert.py -------------------------------------------------------------------------------- /pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/pretrain_gpt.py -------------------------------------------------------------------------------- /pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/pretrain_ict.py -------------------------------------------------------------------------------- /pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/pretrain_t5.py -------------------------------------------------------------------------------- /pretrain_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/pretrain_vit.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pybind11 2 | torch 3 | six 4 | regex 5 | numpy 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/data_utils.py -------------------------------------------------------------------------------- /tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /tasks/eval_harness/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/eval_harness/download.py -------------------------------------------------------------------------------- /tasks/eval_harness/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/eval_harness/evaluate.py -------------------------------------------------------------------------------- /tasks/eval_harness/report-to-csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/eval_harness/report-to-csv.py -------------------------------------------------------------------------------- /tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/eval_utils.py -------------------------------------------------------------------------------- /tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/finetune_utils.py -------------------------------------------------------------------------------- /tasks/glue/cola.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/cola.py -------------------------------------------------------------------------------- /tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/data.py -------------------------------------------------------------------------------- /tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/finetune.py -------------------------------------------------------------------------------- /tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/mnli.py -------------------------------------------------------------------------------- /tasks/glue/mrpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/mrpc.py -------------------------------------------------------------------------------- /tasks/glue/qnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/qnli.py -------------------------------------------------------------------------------- /tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/qqp.py -------------------------------------------------------------------------------- /tasks/glue/rte.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/rte.py -------------------------------------------------------------------------------- /tasks/glue/sst2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/sst2.py -------------------------------------------------------------------------------- /tasks/glue/stsb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/glue/stsb.py -------------------------------------------------------------------------------- /tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/main.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /tasks/orqa/natural_questions/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/orqa/natural_questions/nq.py -------------------------------------------------------------------------------- /tasks/orqa/natural_questions/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/orqa/natural_questions/qa_utils.py -------------------------------------------------------------------------------- /tasks/orqa/natural_questions/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/orqa/natural_questions/tokenizers.py -------------------------------------------------------------------------------- /tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/race/data.py -------------------------------------------------------------------------------- /tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/race/finetune.py -------------------------------------------------------------------------------- /tasks/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/vision/classification.py -------------------------------------------------------------------------------- /tasks/vision/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/vision/eval_utils.py -------------------------------------------------------------------------------- /tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/vision/main.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /tests/run_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tests/run_megatron.py -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tests/test_basic.py -------------------------------------------------------------------------------- /tests/test_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tests/test_megatron.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/convert_checkpoint/README.md -------------------------------------------------------------------------------- /tools/convert_checkpoint/deepspeed_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/convert_checkpoint/deepspeed_checkpoint.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/deepspeed_to_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/convert_checkpoint/deepspeed_to_megatron.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/deepspeed_to_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/convert_checkpoint/deepspeed_to_transformers.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/inspect_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/convert_checkpoint/inspect_checkpoint.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/inspect_deepspeed_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py -------------------------------------------------------------------------------- /tools/create_doc_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/create_doc_index.py -------------------------------------------------------------------------------- /tools/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/generate_samples_gpt.py -------------------------------------------------------------------------------- /tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/linter.py -------------------------------------------------------------------------------- /tools/merge_mp_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/merge_mp_partitions.py -------------------------------------------------------------------------------- /tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/README.md -------------------------------------------------------------------------------- /tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LydiaXiaohongLi/Megatron-DeepSpeed/HEAD/tools/preprocess_data.py --------------------------------------------------------------------------------