├── .gitignore ├── DeepSpeedExample ├── LICENSE ├── MANIFEST.in ├── README.md ├── changes.md ├── curriculum_learning │ ├── README.md │ ├── ds_pretrain_gpt2.sh │ ├── ds_train.sh │ ├── ds_zero_stage_2_config_baseline.json │ └── ds_zero_stage_2_config_curriculum_fixed_linear.json ├── examples │ ├── ds_pretrain_gpt2-infinity-cpu.sh │ ├── ds_pretrain_gpt2-infinity-nvme.sh │ ├── ds_pretrain_gpt2-offload.sh │ ├── ds_pretrain_gpt2-zero2.sh │ ├── ds_pretrain_gpt2-zero3.sh │ ├── ds_pretrain_gpt2.sh │ ├── ds_zero_stage_0_config.json │ ├── ds_zero_stage_2_config.json │ ├── ds_zero_stage_3_config.json │ ├── ds_zero_stage_3_config_release.json │ ├── ds_zero_stage_infinity-cpu.json │ ├── ds_zero_stage_infinity-nvme.json │ ├── ds_zero_stage_infinity_config.json │ ├── evaluate_zeroshot_gpt2.sh │ ├── finetune_mnli_distributed.sh │ ├── finetune_race_distributed.sh │ ├── generate_text.sh │ ├── merge_mp_bert.sh │ ├── pretrain_bert.sh │ ├── pretrain_bert_distributed.sh │ ├── pretrain_gpt2.sh │ ├── pretrain_gpt2_distributed.sh │ ├── sc22-gpt-zero-infinity-cpu.sh │ └── sc22-gpt-zero-offloading.sh ├── images │ ├── Makefile │ ├── cases.png │ ├── scaling-dp.png │ ├── scaling-mp.png │ └── tables.tex ├── megatron │ ├── __init__.py │ ├── arguments.py │ ├── checkpointing.py │ ├── data │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── bert_dataset.py │ │ ├── dataset_utils.py │ │ ├── gpt2_dataset.py │ │ ├── helpers.cpp │ │ ├── ict_dataset.py │ │ ├── indexed_dataset.py │ │ ├── realm_dataset_utils.py │ │ ├── realm_index.py │ │ ├── samplers.py │ │ └── test │ │ │ ├── test_indexed_dataset.py │ │ │ └── test_preprocess_data.sh │ ├── deprecated_data_utils │ │ ├── __init__.py │ │ ├── configure_data.py │ │ ├── corpora.py │ │ ├── datasets.py │ │ ├── file_utils.py │ │ ├── lazy_loader.py │ │ ├── samplers.py │ │ ├── scripts │ │ │ ├── presplit_sentences_json.py │ │ │ ├── split_gpt2_json.py │ │ │ └── split_json.py │ │ ├── tf_dl.py │ │ ├── tokenization.py │ │ ├── tokenization_gpt2.py │ │ └── wordpiece.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── fp16.py │ │ ├── fp16util.py │ │ └── loss_scaler.py │ ├── fused_kernels │ │ ├── __init__.py │ │ ├── scaled_masked_softmax.cpp │ │ ├── scaled_masked_softmax.h │ │ ├── scaled_masked_softmax_cuda.cu │ │ ├── scaled_upper_triang_masked_softmax.cpp │ │ ├── scaled_upper_triang_masked_softmax.h │ │ └── scaled_upper_triang_masked_softmax_cuda.cu │ ├── global_vars.py │ ├── indexer.py │ ├── initialize.py │ ├── learning_rates.py │ ├── memory.py │ ├── model │ │ ├── __init__.py │ │ ├── bert_model.py │ │ ├── classification.py │ │ ├── distributed.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_softmax.py │ │ ├── gpt2_model.py │ │ ├── language_model.py │ │ ├── multiple_choice.py │ │ ├── realm_model.py │ │ ├── transformer.py │ │ └── utils.py │ ├── module.py │ ├── mpu │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── grads.py │ │ ├── initialize.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── commons.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialize.py │ │ │ ├── test_layers.py │ │ │ └── test_random.py │ │ └── utils.py │ ├── package_info.py │ ├── text_generation_utils.py │ ├── tokenizer │ │ ├── __init__.py │ │ ├── bert_tokenization.py │ │ ├── gpt2_tokenization.py │ │ └── tokenizer.py │ ├── training.py │ └── utils.py ├── pretrain_bert.py ├── pretrain_gpt2.py ├── pretrain_ict.py ├── requirements.txt ├── run.sh ├── setup.py ├── tasks │ ├── data_utils.py │ ├── ensemble_classifier.py │ ├── eval_utils.py │ ├── finetune_utils.py │ ├── glue │ │ ├── data.py │ │ ├── finetune.py │ │ ├── mnli.py │ │ └── qqp.py │ ├── main.py │ ├── race │ │ ├── data.py │ │ └── finetune.py │ └── zeroshot_gpt2 │ │ ├── datasets.py │ │ ├── detokenizer.py │ │ └── evaluate.py └── tools │ ├── create_doc_index.py │ ├── generate_samples_gpt2.py │ ├── linter.py │ ├── merge_mp_partitions.py │ ├── openwebtext │ ├── README.md │ ├── blacklist_urls.py │ ├── cleanup_dataset.py │ ├── find_duplicates.py │ ├── group_duplicates_url.py │ ├── merge_jsons.py │ └── remove_group_duplicates.py │ └── preprocess_data.py ├── L2L ├── .gitignore ├── .gitlab-ci.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── docker │ ├── Dockerfile │ ├── attach_container.sh │ ├── build_image.sh │ ├── nogil.torch.csrc.autograd.python_variable.cpp │ └── start_container.sh ├── examples │ ├── evaluate_retriever_nq.sh │ ├── evaluate_zeroshot_gpt.sh │ ├── finetune_mnli_distributed.sh │ ├── finetune_race_distributed.sh │ ├── finetune_retriever_distributed.sh │ ├── generate_text.sh │ ├── merge_mp_bert.sh │ ├── pretrain_bert.sh │ ├── pretrain_bert_distributed.sh │ ├── pretrain_bert_distributed_with_mp.sh │ ├── pretrain_gpt-l2l.sh │ ├── pretrain_gpt.sh │ ├── pretrain_gpt3_175B.sh │ ├── pretrain_gpt_distributed.sh │ ├── pretrain_gpt_distributed_with_mp.sh │ ├── pretrain_ict.sh │ ├── pretrain_t5.sh │ ├── pretrain_t5_distributed.sh │ ├── pretrain_t5_distributed_with_mp.sh │ ├── run_text_generation_server_345M.sh │ ├── run_text_generation_server_345M_8_tensor_parallel.sh │ ├── sc21 │ │ ├── CONFIG.sh │ │ ├── README.md │ │ ├── SBATCH.sh │ │ ├── SRUN.sh │ │ ├── run_figure_11.sh │ │ ├── run_figure_12.sh │ │ ├── run_figure_13.sh │ │ ├── run_figure_14.sh │ │ ├── run_figure_15.sh │ │ ├── run_figure_16.sh │ │ ├── run_figure_17.sh │ │ ├── run_figure_18.sh │ │ └── run_table_1.sh │ └── sc22-gpt-l2l.sh ├── images │ └── cases_april2021.png ├── megatron │ ├── __init__.py │ ├── arguments.py │ ├── checkpointing.py │ ├── data │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── autoaugment.py │ │ ├── bert_dataset.py │ │ ├── biencoder_dataset_utils.py │ │ ├── blendable_dataset.py │ │ ├── data_samplers.py │ │ ├── dataset_utils.py │ │ ├── gpt_dataset.py │ │ ├── helpers.cpp │ │ ├── ict_dataset.py │ │ ├── indexed_dataset.py │ │ ├── orqa_wiki_dataset.py │ │ ├── realm_dataset_utils.py │ │ ├── realm_index.py │ │ ├── t5_dataset.py │ │ ├── test │ │ │ ├── test_indexed_dataset.py │ │ │ └── test_preprocess_data.sh │ │ └── vit_dataset.py │ ├── fp16_deprecated │ │ └── loss_scaler.py │ ├── fused_kernels │ │ ├── __init__.py │ │ ├── compat.h │ │ ├── layer_norm_cuda.cpp │ │ ├── layer_norm_cuda_kernel.cu │ │ ├── scaled_masked_softmax.cpp │ │ ├── scaled_masked_softmax.h │ │ ├── scaled_masked_softmax_cuda.cu │ │ ├── scaled_upper_triang_masked_softmax.cpp │ │ ├── scaled_upper_triang_masked_softmax.h │ │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_fused_kernels.py │ │ └── type_shim.h │ ├── global_vars.py │ ├── indexer.py │ ├── initialize.py │ ├── learning_rates.py │ ├── memory.py │ ├── microbatches.py │ ├── model │ │ ├── __init__.py │ │ ├── bert_model.py │ │ ├── biencoder_model.py │ │ ├── classification.py │ │ ├── distributed.py │ │ ├── enums.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_layer_norm.py │ │ ├── fused_softmax.py │ │ ├── gpt_model.py │ │ ├── language_model.py │ │ ├── module.py │ │ ├── multiple_choice.py │ │ ├── realm_model.py │ │ ├── t5_model.py │ │ ├── transformer.py │ │ ├── utils.py │ │ └── vit_model.py │ ├── mpu │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── initialize.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── commons.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialize.py │ │ │ ├── test_layers.py │ │ │ └── test_random.py │ │ └── utils.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── clip_grads.py │ │ ├── cpp │ │ │ ├── cpu_ops.cpp │ │ │ ├── cpu_ops.h │ │ │ └── ds │ │ │ │ ├── StopWatch.h │ │ │ │ ├── context.h │ │ │ │ ├── cpu_adam.cpp │ │ │ │ ├── cpu_adam.h │ │ │ │ ├── cublas_wrappers.h │ │ │ │ ├── custom_cuda_kernel.cu │ │ │ │ ├── custom_cuda_layers.h │ │ │ │ ├── gemm_test.h │ │ │ │ └── simd.h │ │ ├── grad_scaler.py │ │ └── optimizer.py │ ├── p2p_communication.py │ ├── package_info.py │ ├── schedules.py │ ├── strongh │ │ ├── __init__.py │ │ └── cpp │ │ │ └── utils.cpp │ ├── text_generation_server.py │ ├── text_generation_utils.py │ ├── tokenizer │ │ ├── __init__.py │ │ ├── bert_tokenization.py │ │ ├── gpt2_tokenization.py │ │ └── tokenizer.py │ ├── training.py │ └── utils.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_ict.py ├── pretrain_t5.py ├── pretrain_vit.py ├── requirements.txt ├── setup.py ├── tasks │ ├── data_utils.py │ ├── ensemble_classifier.py │ ├── eval_utils.py │ ├── finetune_utils.py │ ├── glue │ │ ├── data.py │ │ ├── finetune.py │ │ ├── mnli.py │ │ └── qqp.py │ ├── main.py │ ├── orqa │ │ ├── README.md │ │ ├── evaluate_orqa.py │ │ ├── evaluate_utils.py │ │ ├── supervised │ │ │ ├── data.py │ │ │ ├── eval_utils.py │ │ │ └── finetune.py │ │ └── unsupervised │ │ │ ├── nq.py │ │ │ ├── qa_utils.py │ │ │ └── tokenizers.py │ ├── race │ │ ├── data.py │ │ └── finetune.py │ ├── vision │ │ ├── classification.py │ │ ├── eval_utils.py │ │ ├── finetune_utils.py │ │ └── main.py │ └── zeroshot_gpt │ │ ├── datasets.py │ │ ├── detokenizer.py │ │ └── evaluate.py ├── tests │ └── test_basic.py └── tools │ ├── generate_samples_gpt.py │ ├── linter.py │ ├── merge_mp_partitions.py │ ├── openwebtext │ ├── README.md │ ├── add_id.py │ ├── blacklist_urls.py │ ├── cleanup_dataset.py │ ├── cleanup_fix_dataset.py │ ├── filter_ngrams.py │ ├── find_duplicates.py │ ├── group_duplicate_url.py │ ├── merge_jsons.py │ └── remove_group_duplicates.py │ ├── preprocess_data.py │ ├── run_text_generation_server.py │ └── text_generation_cli.py ├── LICENSE ├── Megatron-LM ├── .gitignore ├── .gitlab-ci.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── examples │ ├── evaluate_retriever_nq.sh │ ├── evaluate_zeroshot_gpt.sh │ ├── finetune_mnli_distributed.sh │ ├── finetune_race_distributed.sh │ ├── finetune_retriever_distributed.sh │ ├── generate_text.sh │ ├── merge_mp_bert.sh │ ├── pretrain_bert.sh │ ├── pretrain_bert_distributed.sh │ ├── pretrain_bert_distributed_with_mp.sh │ ├── pretrain_gpt.sh │ ├── pretrain_gpt3_175B.sh │ ├── pretrain_gpt_distributed.sh │ ├── pretrain_gpt_distributed_with_mp.sh │ ├── pretrain_ict.sh │ ├── pretrain_t5.sh │ ├── pretrain_t5_distributed.sh │ ├── pretrain_t5_distributed_with_mp.sh │ ├── run_text_generation_server_345M.sh │ ├── run_text_generation_server_345M_8_tensor_parallel.sh │ ├── sc21 │ │ ├── CONFIG.sh │ │ ├── README.md │ │ ├── SBATCH.sh │ │ ├── SRUN.sh │ │ ├── run_figure_11.sh │ │ ├── run_figure_12.sh │ │ ├── run_figure_13.sh │ │ ├── run_figure_14.sh │ │ ├── run_figure_15.sh │ │ ├── run_figure_16.sh │ │ ├── run_figure_17.sh │ │ ├── run_figure_18.sh │ │ └── run_table_1.sh │ └── sc22-gpt-megatron.sh ├── images │ └── cases_april2021.png ├── megatron │ ├── __init__.py │ ├── arguments.py │ ├── checkpointing.py │ ├── data │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── autoaugment.py │ │ ├── bert_dataset.py │ │ ├── biencoder_dataset_utils.py │ │ ├── blendable_dataset.py │ │ ├── data_samplers.py │ │ ├── dataset_utils.py │ │ ├── gpt_dataset.py │ │ ├── helpers.cpp │ │ ├── ict_dataset.py │ │ ├── indexed_dataset.py │ │ ├── orqa_wiki_dataset.py │ │ ├── realm_dataset_utils.py │ │ ├── realm_index.py │ │ ├── t5_dataset.py │ │ ├── test │ │ │ ├── test_indexed_dataset.py │ │ │ └── test_preprocess_data.sh │ │ └── vit_dataset.py │ ├── fp16_deprecated │ │ └── loss_scaler.py │ ├── fused_kernels │ │ ├── __init__.py │ │ ├── compat.h │ │ ├── layer_norm_cuda.cpp │ │ ├── layer_norm_cuda_kernel.cu │ │ ├── scaled_masked_softmax.cpp │ │ ├── scaled_masked_softmax.h │ │ ├── scaled_masked_softmax_cuda.cu │ │ ├── scaled_upper_triang_masked_softmax.cpp │ │ ├── scaled_upper_triang_masked_softmax.h │ │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_fused_kernels.py │ │ └── type_shim.h │ ├── global_vars.py │ ├── indexer.py │ ├── initialize.py │ ├── learning_rates.py │ ├── memory.py │ ├── microbatches.py │ ├── model │ │ ├── __init__.py │ │ ├── bert_model.py │ │ ├── biencoder_model.py │ │ ├── classification.py │ │ ├── distributed.py │ │ ├── enums.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_layer_norm.py │ │ ├── fused_softmax.py │ │ ├── gpt_model.py │ │ ├── language_model.py │ │ ├── module.py │ │ ├── multiple_choice.py │ │ ├── realm_model.py │ │ ├── t5_model.py │ │ ├── transformer.py │ │ ├── utils.py │ │ └── vit_model.py │ ├── mpu │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── initialize.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── commons.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialize.py │ │ │ ├── test_layers.py │ │ │ └── test_random.py │ │ └── utils.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── clip_grads.py │ │ ├── grad_scaler.py │ │ └── optimizer.py │ ├── p2p_communication.py │ ├── package_info.py │ ├── schedules.py │ ├── text_generation_server.py │ ├── text_generation_utils.py │ ├── tokenizer │ │ ├── __init__.py │ │ ├── bert_tokenization.py │ │ ├── gpt2_tokenization.py │ │ └── tokenizer.py │ ├── training.py │ └── utils.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_ict.py ├── pretrain_t5.py ├── pretrain_vit.py ├── requirements.txt ├── setup.py ├── tasks │ ├── data_utils.py │ ├── ensemble_classifier.py │ ├── eval_utils.py │ ├── finetune_utils.py │ ├── glue │ │ ├── data.py │ │ ├── finetune.py │ │ ├── mnli.py │ │ └── qqp.py │ ├── main.py │ ├── orqa │ │ ├── README.md │ │ ├── evaluate_orqa.py │ │ ├── evaluate_utils.py │ │ ├── supervised │ │ │ ├── data.py │ │ │ ├── eval_utils.py │ │ │ └── finetune.py │ │ └── unsupervised │ │ │ ├── nq.py │ │ │ ├── qa_utils.py │ │ │ └── tokenizers.py │ ├── race │ │ ├── data.py │ │ └── finetune.py │ ├── vision │ │ ├── classification.py │ │ ├── eval_utils.py │ │ ├── finetune_utils.py │ │ └── main.py │ └── zeroshot_gpt │ │ ├── datasets.py │ │ ├── detokenizer.py │ │ └── evaluate.py ├── tests │ └── test_basic.py └── tools │ ├── generate_samples_gpt.py │ ├── linter.py │ ├── merge_mp_partitions.py │ ├── openwebtext │ ├── README.md │ ├── add_id.py │ ├── blacklist_urls.py │ ├── cleanup_dataset.py │ ├── cleanup_fix_dataset.py │ ├── filter_ngrams.py │ ├── find_duplicates.py │ ├── group_duplicate_url.py │ ├── merge_jsons.py │ └── remove_group_duplicates.py │ ├── preprocess_data.py │ ├── run_text_generation_server.py │ └── text_generation_cli.py ├── README.md ├── SHv0 ├── .gitignore ├── .gitlab-ci.yml ├── LICENSE ├── README.md ├── examples │ ├── evaluate_retriever_nq.sh │ ├── evaluate_zeroshot_gpt.sh │ ├── finetune_mnli_distributed.sh │ ├── finetune_race_distributed.sh │ ├── finetune_retriever_distributed.sh │ ├── generate_text.sh │ ├── merge_mp_bert.sh │ ├── pretrain_bert.sh │ ├── pretrain_bert_1gpu_baseline.sh │ ├── pretrain_bert_1gpu_offloading.sh │ ├── pretrain_bert_8gpus_tp_baseline.sh │ ├── pretrain_bert_8gpus_tp_offloading.sh │ ├── pretrain_bert_distributed.sh │ ├── pretrain_bert_distributed_with_mp.sh │ ├── pretrain_gpt.sh │ ├── pretrain_gpt3_175B.sh │ ├── pretrain_gpt_1gpu_baseline.sh │ ├── pretrain_gpt_1gpu_offloading.sh │ ├── pretrain_gpt_8gpus_tp_baseline.sh │ ├── pretrain_gpt_8gpus_tp_offloading.sh │ ├── pretrain_gpt_distributed.sh │ ├── pretrain_gpt_distributed_with_mp.sh │ ├── pretrain_ict.sh │ ├── pretrain_t5.sh │ ├── pretrain_t5_distributed.sh │ ├── pretrain_t5_distributed_with_mp.sh │ ├── run_text_generation_server_345M.sh │ ├── run_text_generation_server_345M_8_tensor_parallel.sh │ ├── sc21 │ │ ├── CONFIG.sh │ │ ├── README.md │ │ ├── SBATCH.sh │ │ ├── SRUN.sh │ │ ├── run_figure_11.sh │ │ ├── run_figure_12.sh │ │ ├── run_figure_13.sh │ │ ├── run_figure_14.sh │ │ ├── run_figure_15.sh │ │ ├── run_figure_16.sh │ │ ├── run_figure_17.sh │ │ ├── run_figure_18.sh │ │ └── run_table_1.sh │ └── sc22-gpt-sh.sh ├── images │ └── cases_april2021.png ├── megatron │ ├── __init__.py │ ├── arguments.py │ ├── checkpointing.py │ ├── data │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── autoaugment.py │ │ ├── bert_dataset.py │ │ ├── biencoder_dataset_utils.py │ │ ├── blendable_dataset.py │ │ ├── data_samplers.py │ │ ├── dataset_utils.py │ │ ├── gpt_dataset.py │ │ ├── helpers.cpp │ │ ├── ict_dataset.py │ │ ├── indexed_dataset.py │ │ ├── orqa_wiki_dataset.py │ │ ├── realm_dataset_utils.py │ │ ├── realm_index.py │ │ ├── t5_dataset.py │ │ ├── test │ │ │ ├── test_indexed_dataset.py │ │ │ └── test_preprocess_data.sh │ │ └── vit_dataset.py │ ├── fp16_deprecated │ │ └── loss_scaler.py │ ├── fused_kernels │ │ ├── __init__.py │ │ ├── compat.h │ │ ├── layer_norm_cuda.cpp │ │ ├── layer_norm_cuda_kernel.cu │ │ ├── scaled_masked_softmax.cpp │ │ ├── scaled_masked_softmax.h │ │ ├── scaled_masked_softmax_cuda.cu │ │ ├── scaled_upper_triang_masked_softmax.cpp │ │ ├── scaled_upper_triang_masked_softmax.h │ │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_fused_kernels.py │ │ └── type_shim.h │ ├── global_vars.py │ ├── indexer.py │ ├── initialize.py │ ├── learning_rates.py │ ├── memory.py │ ├── microbatches.py │ ├── model │ │ ├── __init__.py │ │ ├── bert_model.py │ │ ├── biencoder_model.py │ │ ├── classification.py │ │ ├── distributed.py │ │ ├── enums.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_layer_norm.py │ │ ├── fused_softmax.py │ │ ├── gpt_model.py │ │ ├── language_model.py │ │ ├── module.py │ │ ├── multiple_choice.py │ │ ├── realm_model.py │ │ ├── t5_model.py │ │ ├── transformer.py │ │ ├── utils.py │ │ └── vit_model.py │ ├── mpu │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── initialize.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── commons.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialize.py │ │ │ ├── test_layers.py │ │ │ └── test_random.py │ │ └── utils.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── clip_grads.py │ │ ├── cpp │ │ │ ├── GL_CPU_Float16OptimizerWithFloat16Params.cpp │ │ │ ├── GL_CPU_Float16OptimizerWithFloat16Params.h │ │ │ ├── deepspeed │ │ │ │ ├── StopWatch.h │ │ │ │ ├── context.h │ │ │ │ ├── cpu_adam.cpp │ │ │ │ ├── cpu_adam.h │ │ │ │ ├── cublas_wrappers.h │ │ │ │ ├── custom_cuda_kernel.cu │ │ │ │ ├── custom_cuda_layers.h │ │ │ │ ├── gemm_test.h │ │ │ │ └── simd.h │ │ │ ├── offloading_utils.cpp │ │ │ └── torch_csrc_export.h │ │ ├── grad_scaler.py │ │ └── optimizer.py │ ├── p2p_communication.py │ ├── schedules.py │ ├── text_generation_server.py │ ├── text_generation_utils.py │ ├── tokenizer │ │ ├── __init__.py │ │ ├── bert_tokenization.py │ │ ├── gpt2_tokenization.py │ │ └── tokenizer.py │ ├── training.py │ └── utils.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_ict.py ├── pretrain_t5.py ├── pretrain_vit.py ├── scripts │ ├── clean-cache.sh │ ├── deepspeed_cpu_adam._gl_.py │ ├── deepspeed_cpu_adam._v0.5.8_.py │ ├── distributed_c10d._gl_.py │ ├── distributed_c10d._v1.10.0_.py │ ├── function._gl_.py │ ├── function._v1.10.0_.py │ ├── init-mps.sh │ ├── kill-all-python-procs.sh │ └── stop-mps.sh ├── tasks │ ├── data_utils.py │ ├── ensemble_classifier.py │ ├── eval_utils.py │ ├── finetune_utils.py │ ├── glue │ │ ├── data.py │ │ ├── finetune.py │ │ ├── mnli.py │ │ └── qqp.py │ ├── main.py │ ├── orqa │ │ ├── README.md │ │ ├── evaluate_orqa.py │ │ ├── evaluate_utils.py │ │ ├── supervised │ │ │ ├── data.py │ │ │ ├── eval_utils.py │ │ │ └── finetune.py │ │ └── unsupervised │ │ │ ├── nq.py │ │ │ ├── qa_utils.py │ │ │ └── tokenizers.py │ ├── race │ │ ├── data.py │ │ └── finetune.py │ ├── vision │ │ ├── classification.py │ │ ├── eval_utils.py │ │ ├── finetune_utils.py │ │ └── main.py │ └── zeroshot_gpt │ │ ├── datasets.py │ │ ├── detokenizer.py │ │ └── evaluate.py ├── tests │ ├── cpp │ │ ├── foreach_non_finite_check_and_unscale.cpp │ │ ├── foreach_non_finite_check_and_unscale.h │ │ ├── tensor_to_cpu_or_cuda.cpp │ │ └── tensor_to_cpu_or_cuda.h │ ├── test_bandwidth.py │ ├── test_basic.py │ ├── test_cpp_extention_parallel_in_ray.py │ ├── test_cuda_stream_in_ray_1.py │ ├── test_cuda_stream_in_ray_2.py │ ├── test_cuda_stream_in_ray_3.py │ ├── test_cuda_stream_in_ray_4.py │ ├── test_distil_roberta.py │ ├── test_distributed.py │ ├── test_ray.py │ └── test_tensor_to_function_parallel_in_ray_via_cpp_extention.py └── tools │ ├── linter.py │ ├── merge_mp_partitions.py │ ├── openwebtext │ ├── README.md │ ├── add_id.py │ ├── blacklist_urls.py │ ├── cleanup_dataset.py │ ├── cleanup_fix_dataset.py │ ├── filter_ngrams.py │ ├── find_duplicates.py │ ├── group_duplicate_url.py │ ├── merge_jsons.py │ └── remove_group_duplicates.py │ ├── preprocess_data.py │ ├── run_text_generation_server.py │ └── text_generation_cli.py ├── examples ├── case1.sh ├── case1_draw.py ├── case1_extract.sh ├── case2.sh ├── case2_draw.py ├── case2_extract.sh ├── case3.sh ├── case3_draw.py ├── case3_extract.sh ├── case4.sh ├── case4_draw.py ├── case4_extract.sh ├── case5.sh ├── case5_draw.py ├── case5_extract.sh └── run.sh ├── notebook └── sc22ae.ipynb └── results ├── case1.csv ├── case2.csv ├── case3.csv ├── case4.csv ├── case5.csv ├── log_l2l_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656736709.txt ├── log_l2l_l-78_hs-2048_bs-4_ws-4_2022-07-02.1656730580.txt ├── log_megatron-lm_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656730240.txt ├── log_stronghold_l-100_hs-2048_bs-4_ws-4_2022-07-02.1656733406.txt ├── log_stronghold_l-16_hs-2048_bs-4_ws-15_2022-07-02.1656742529.txt ├── log_stronghold_l-24_hs-2048_bs-4_ws-15_2022-07-02.1656742286.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-10_2022-07-02.1656744387.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-12_2022-07-02.1656744774.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-14_2022-07-02.1656745146.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-15_2022-07-02.1656734808.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-2_2022-07-02.1656742692.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656743137.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-6_2022-07-02.1656743570.txt ├── log_stronghold_l-32_hs-2048_bs-4_ws-8_2022-07-02.1656743979.txt ├── log_stronghold_l-40_hs-2048_bs-4_ws-15_2022-07-02.1656741833.txt ├── log_stronghold_l-48_hs-2048_bs-4_ws-15_2022-07-02.1656735152.txt ├── log_stronghold_l-56_hs-2048_bs-4_ws-15_2022-07-02.1656741167.txt ├── log_stronghold_l-64_hs-2048_bs-4_ws-15_2022-07-02.1656740376.txt ├── log_stronghold_l-78_hs-2048_bs-4_ws-15_2022-07-02.1656735719.txt ├── log_stronghold_l-92_hs-2048_bs-4_ws-15_2022-07-02.1656739206.txt ├── log_zero-infinity_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656738599.txt ├── log_zero-infinity_l-48_hs-2048_bs-4_ws-4_2022-07-02.1656732519.txt ├── log_zero-offload_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656738007.txt └── log_zero-offload_l-48_hs-2048_bs-4_ws-4_2022-07-02.1656731649.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/.gitignore -------------------------------------------------------------------------------- /DeepSpeedExample/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/LICENSE -------------------------------------------------------------------------------- /DeepSpeedExample/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/MANIFEST.in -------------------------------------------------------------------------------- /DeepSpeedExample/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/README.md -------------------------------------------------------------------------------- /DeepSpeedExample/changes.md: -------------------------------------------------------------------------------- 1 | PRETEND THESE ARE CODE CHANGES 2 | -------------------------------------------------------------------------------- /DeepSpeedExample/curriculum_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/curriculum_learning/README.md -------------------------------------------------------------------------------- /DeepSpeedExample/curriculum_learning/ds_pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/curriculum_learning/ds_pretrain_gpt2.sh -------------------------------------------------------------------------------- /DeepSpeedExample/curriculum_learning/ds_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/curriculum_learning/ds_train.sh -------------------------------------------------------------------------------- /DeepSpeedExample/curriculum_learning/ds_zero_stage_2_config_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/curriculum_learning/ds_zero_stage_2_config_baseline.json -------------------------------------------------------------------------------- /DeepSpeedExample/curriculum_learning/ds_zero_stage_2_config_curriculum_fixed_linear.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/curriculum_learning/ds_zero_stage_2_config_curriculum_fixed_linear.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_pretrain_gpt2-infinity-cpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_pretrain_gpt2-infinity-cpu.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_pretrain_gpt2-infinity-nvme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_pretrain_gpt2-infinity-nvme.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_pretrain_gpt2-offload.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_pretrain_gpt2-offload.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_pretrain_gpt2-zero2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_pretrain_gpt2-zero2.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_pretrain_gpt2-zero3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_pretrain_gpt2-zero3.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_pretrain_gpt2.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_0_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_0_config.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_2_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_2_config.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_3_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_3_config.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_3_config_release.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_3_config_release.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_infinity-cpu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_infinity-cpu.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_infinity-nvme.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_infinity-nvme.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/ds_zero_stage_infinity_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/ds_zero_stage_infinity_config.json -------------------------------------------------------------------------------- /DeepSpeedExample/examples/evaluate_zeroshot_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/evaluate_zeroshot_gpt2.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/generate_text.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/pretrain_gpt2.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/pretrain_gpt2_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/pretrain_gpt2_distributed.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/sc22-gpt-zero-infinity-cpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/sc22-gpt-zero-infinity-cpu.sh -------------------------------------------------------------------------------- /DeepSpeedExample/examples/sc22-gpt-zero-offloading.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/examples/sc22-gpt-zero-offloading.sh -------------------------------------------------------------------------------- /DeepSpeedExample/images/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/images/Makefile -------------------------------------------------------------------------------- /DeepSpeedExample/images/cases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/images/cases.png -------------------------------------------------------------------------------- /DeepSpeedExample/images/scaling-dp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/images/scaling-dp.png -------------------------------------------------------------------------------- /DeepSpeedExample/images/scaling-mp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/images/scaling-mp.png -------------------------------------------------------------------------------- /DeepSpeedExample/images/tables.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/images/tables.tex -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/arguments.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/checkpointing.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/Makefile -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/gpt2_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/gpt2_dataset.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/realm_index.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/samplers.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/configure_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/configure_data.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/corpora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/corpora.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/datasets.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/file_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/file_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/lazy_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/lazy_loader.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/samplers.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/scripts/presplit_sentences_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/scripts/presplit_sentences_json.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/scripts/split_gpt2_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/scripts/split_gpt2_json.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/scripts/split_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/scripts/split_json.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/tf_dl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/tf_dl.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/tokenization.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/tokenization_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/tokenization_gpt2.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/deprecated_data_utils/wordpiece.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/deprecated_data_utils/wordpiece.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fp16/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fp16/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fp16/fp16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fp16/fp16.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fp16/fp16util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fp16/fp16util.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fp16/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fp16/loss_scaler.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/global_vars.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/indexer.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/initialize.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/learning_rates.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/memory.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/bert_model.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/classification.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/distributed.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/gpt2_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/gpt2_model.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/language_model.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/realm_model.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/transformer.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/model/utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/module.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/cross_entropy.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/data.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/grads.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/initialize.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/layers.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/mappings.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/random.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/mpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/mpu/utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/package_info.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/training.py -------------------------------------------------------------------------------- /DeepSpeedExample/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/megatron/utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/pretrain_bert.py -------------------------------------------------------------------------------- /DeepSpeedExample/pretrain_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/pretrain_gpt2.py -------------------------------------------------------------------------------- /DeepSpeedExample/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/pretrain_ict.py -------------------------------------------------------------------------------- /DeepSpeedExample/requirements.txt: -------------------------------------------------------------------------------- 1 | pybind11 2 | torch 3 | six 4 | regex 5 | numpy 6 | -------------------------------------------------------------------------------- /DeepSpeedExample/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/run.sh -------------------------------------------------------------------------------- /DeepSpeedExample/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/setup.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/data_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/eval_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/finetune_utils.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/glue/data.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/glue/finetune.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/glue/mnli.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/glue/qqp.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/main.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/race/data.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/race/finetune.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/zeroshot_gpt2/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/zeroshot_gpt2/datasets.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/zeroshot_gpt2/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/zeroshot_gpt2/detokenizer.py -------------------------------------------------------------------------------- /DeepSpeedExample/tasks/zeroshot_gpt2/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tasks/zeroshot_gpt2/evaluate.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/create_doc_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/create_doc_index.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/generate_samples_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/generate_samples_gpt2.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/linter.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/merge_mp_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/merge_mp_partitions.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/README.md -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/group_duplicates_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/group_duplicates_url.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /DeepSpeedExample/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/DeepSpeedExample/tools/preprocess_data.py -------------------------------------------------------------------------------- /L2L/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/.gitignore -------------------------------------------------------------------------------- /L2L/.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/.gitlab-ci.yml -------------------------------------------------------------------------------- /L2L/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/LICENSE -------------------------------------------------------------------------------- /L2L/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/MANIFEST.in -------------------------------------------------------------------------------- /L2L/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/README.md -------------------------------------------------------------------------------- /L2L/docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/docker/Dockerfile -------------------------------------------------------------------------------- /L2L/docker/attach_container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/docker/attach_container.sh -------------------------------------------------------------------------------- /L2L/docker/build_image.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/docker/build_image.sh -------------------------------------------------------------------------------- /L2L/docker/nogil.torch.csrc.autograd.python_variable.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/docker/nogil.torch.csrc.autograd.python_variable.cpp -------------------------------------------------------------------------------- /L2L/docker/start_container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/docker/start_container.sh -------------------------------------------------------------------------------- /L2L/examples/evaluate_retriever_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/evaluate_retriever_nq.sh -------------------------------------------------------------------------------- /L2L/examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /L2L/examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /L2L/examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /L2L/examples/finetune_retriever_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/finetune_retriever_distributed.sh -------------------------------------------------------------------------------- /L2L/examples/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/generate_text.sh -------------------------------------------------------------------------------- /L2L/examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_gpt-l2l.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_gpt-l2l.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /L2L/examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /L2L/examples/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /L2L/examples/run_text_generation_server_345M_8_tensor_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/run_text_generation_server_345M_8_tensor_parallel.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/CONFIG.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/README.md -------------------------------------------------------------------------------- /L2L/examples/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/SBATCH.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/SRUN.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /L2L/examples/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc21/run_table_1.sh -------------------------------------------------------------------------------- /L2L/examples/sc22-gpt-l2l.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/examples/sc22-gpt-l2l.sh -------------------------------------------------------------------------------- /L2L/images/cases_april2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/images/cases_april2021.png -------------------------------------------------------------------------------- /L2L/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/arguments.py -------------------------------------------------------------------------------- /L2L/megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/checkpointing.py -------------------------------------------------------------------------------- /L2L/megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/Makefile -------------------------------------------------------------------------------- /L2L/megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /L2L/megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /L2L/megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /L2L/megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /L2L/megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /L2L/megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /L2L/megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /L2L/megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/realm_index.py -------------------------------------------------------------------------------- /L2L/megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /L2L/megatron/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/data/vit_dataset.py -------------------------------------------------------------------------------- /L2L/megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/layer_norm_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/layer_norm_cuda.cpp -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/layer_norm_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/layer_norm_cuda_kernel.cu -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /L2L/megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /L2L/megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/global_vars.py -------------------------------------------------------------------------------- /L2L/megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/indexer.py -------------------------------------------------------------------------------- /L2L/megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/initialize.py -------------------------------------------------------------------------------- /L2L/megatron/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/learning_rates.py -------------------------------------------------------------------------------- /L2L/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/memory.py -------------------------------------------------------------------------------- /L2L/megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/microbatches.py -------------------------------------------------------------------------------- /L2L/megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/bert_model.py -------------------------------------------------------------------------------- /L2L/megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /L2L/megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/classification.py -------------------------------------------------------------------------------- /L2L/megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/distributed.py -------------------------------------------------------------------------------- /L2L/megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/enums.py -------------------------------------------------------------------------------- /L2L/megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /L2L/megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /L2L/megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /L2L/megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /L2L/megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/language_model.py -------------------------------------------------------------------------------- /L2L/megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/module.py -------------------------------------------------------------------------------- /L2L/megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /L2L/megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/realm_model.py -------------------------------------------------------------------------------- /L2L/megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/t5_model.py -------------------------------------------------------------------------------- /L2L/megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/transformer.py -------------------------------------------------------------------------------- /L2L/megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/utils.py -------------------------------------------------------------------------------- /L2L/megatron/model/vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/model/vit_model.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/cross_entropy.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/data.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/initialize.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/layers.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/mappings.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/random.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /L2L/megatron/mpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/mpu/utils.py -------------------------------------------------------------------------------- /L2L/megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/cpu_ops.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/cpu_ops.cpp -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/cpu_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/cpu_ops.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/StopWatch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/StopWatch.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/context.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/cpu_adam.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/cpu_adam.cpp -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/cpu_adam.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/cpu_adam.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/cublas_wrappers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/cublas_wrappers.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/custom_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/custom_cuda_kernel.cu -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/custom_cuda_layers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/custom_cuda_layers.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/gemm_test.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/gemm_test.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/cpp/ds/simd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/cpp/ds/simd.h -------------------------------------------------------------------------------- /L2L/megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /L2L/megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /L2L/megatron/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/p2p_communication.py -------------------------------------------------------------------------------- /L2L/megatron/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/package_info.py -------------------------------------------------------------------------------- /L2L/megatron/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/schedules.py -------------------------------------------------------------------------------- /L2L/megatron/strongh/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/strongh/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/strongh/cpp/utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/strongh/cpp/utils.cpp -------------------------------------------------------------------------------- /L2L/megatron/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/text_generation_server.py -------------------------------------------------------------------------------- /L2L/megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /L2L/megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /L2L/megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /L2L/megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /L2L/megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /L2L/megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/training.py -------------------------------------------------------------------------------- /L2L/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/megatron/utils.py -------------------------------------------------------------------------------- /L2L/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/pretrain_bert.py -------------------------------------------------------------------------------- /L2L/pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/pretrain_gpt.py -------------------------------------------------------------------------------- /L2L/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/pretrain_ict.py -------------------------------------------------------------------------------- /L2L/pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/pretrain_t5.py -------------------------------------------------------------------------------- /L2L/pretrain_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/pretrain_vit.py -------------------------------------------------------------------------------- /L2L/requirements.txt: -------------------------------------------------------------------------------- 1 | pybind11 2 | torch 3 | six 4 | regex 5 | numpy 6 | -------------------------------------------------------------------------------- /L2L/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/setup.py -------------------------------------------------------------------------------- /L2L/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/data_utils.py -------------------------------------------------------------------------------- /L2L/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /L2L/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/eval_utils.py -------------------------------------------------------------------------------- /L2L/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/finetune_utils.py -------------------------------------------------------------------------------- /L2L/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/glue/data.py -------------------------------------------------------------------------------- /L2L/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/glue/finetune.py -------------------------------------------------------------------------------- /L2L/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/glue/mnli.py -------------------------------------------------------------------------------- /L2L/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/glue/qqp.py -------------------------------------------------------------------------------- /L2L/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/main.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/README.md -------------------------------------------------------------------------------- /L2L/tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /L2L/tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /L2L/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/race/data.py -------------------------------------------------------------------------------- /L2L/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/race/finetune.py -------------------------------------------------------------------------------- /L2L/tasks/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/vision/classification.py -------------------------------------------------------------------------------- /L2L/tasks/vision/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/vision/eval_utils.py -------------------------------------------------------------------------------- /L2L/tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /L2L/tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/vision/main.py -------------------------------------------------------------------------------- /L2L/tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /L2L/tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /L2L/tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /L2L/tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tests/test_basic.py -------------------------------------------------------------------------------- /L2L/tools/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/generate_samples_gpt.py -------------------------------------------------------------------------------- /L2L/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/linter.py -------------------------------------------------------------------------------- /L2L/tools/merge_mp_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/merge_mp_partitions.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/README.md -------------------------------------------------------------------------------- /L2L/tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /L2L/tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /L2L/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/preprocess_data.py -------------------------------------------------------------------------------- /L2L/tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /L2L/tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/L2L/tools/text_generation_cli.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/LICENSE -------------------------------------------------------------------------------- /Megatron-LM/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | 3 | # Distribution / packaging 4 | build/ 5 | dist/ 6 | *.egg-info/ -------------------------------------------------------------------------------- /Megatron-LM/.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/.gitlab-ci.yml -------------------------------------------------------------------------------- /Megatron-LM/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/LICENSE -------------------------------------------------------------------------------- /Megatron-LM/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/MANIFEST.in -------------------------------------------------------------------------------- /Megatron-LM/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/README.md -------------------------------------------------------------------------------- /Megatron-LM/examples/evaluate_retriever_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/evaluate_retriever_nq.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/finetune_retriever_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/finetune_retriever_distributed.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/generate_text.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/run_text_generation_server_345M_8_tensor_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/run_text_generation_server_345M_8_tensor_parallel.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/CONFIG.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/README.md -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/SBATCH.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/SRUN.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc21/run_table_1.sh -------------------------------------------------------------------------------- /Megatron-LM/examples/sc22-gpt-megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/examples/sc22-gpt-megatron.sh -------------------------------------------------------------------------------- /Megatron-LM/images/cases_april2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/images/cases_april2021.png -------------------------------------------------------------------------------- /Megatron-LM/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/__init__.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/arguments.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/checkpointing.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/Makefile -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/realm_index.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /Megatron-LM/megatron/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/data/vit_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/layer_norm_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/layer_norm_cuda.cpp -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/layer_norm_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/layer_norm_cuda_kernel.cu -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /Megatron-LM/megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/global_vars.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/indexer.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/initialize.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/learning_rates.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/memory.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/microbatches.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/__init__.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/bert_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/classification.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/distributed.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/enums.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/language_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/module.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/realm_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/t5_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/transformer.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/utils.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/model/vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/model/vit_model.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/__init__.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/cross_entropy.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/data.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/initialize.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/layers.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/mappings.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/random.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/mpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/mpu/utils.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/p2p_communication.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/package_info.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/schedules.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/text_generation_server.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/training.py -------------------------------------------------------------------------------- /Megatron-LM/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/megatron/utils.py -------------------------------------------------------------------------------- /Megatron-LM/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/pretrain_bert.py -------------------------------------------------------------------------------- /Megatron-LM/pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/pretrain_gpt.py -------------------------------------------------------------------------------- /Megatron-LM/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/pretrain_ict.py -------------------------------------------------------------------------------- /Megatron-LM/pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/pretrain_t5.py -------------------------------------------------------------------------------- /Megatron-LM/pretrain_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/pretrain_vit.py -------------------------------------------------------------------------------- /Megatron-LM/requirements.txt: -------------------------------------------------------------------------------- 1 | pybind11 2 | torch 3 | six 4 | regex 5 | numpy 6 | -------------------------------------------------------------------------------- /Megatron-LM/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/setup.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/data_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/eval_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/finetune_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/glue/data.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/glue/finetune.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/glue/mnli.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/glue/qqp.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/main.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/README.md -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/race/data.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/race/finetune.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/vision/classification.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/vision/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/vision/eval_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/vision/main.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /Megatron-LM/tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /Megatron-LM/tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tests/test_basic.py -------------------------------------------------------------------------------- /Megatron-LM/tools/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/generate_samples_gpt.py -------------------------------------------------------------------------------- /Megatron-LM/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/linter.py -------------------------------------------------------------------------------- /Megatron-LM/tools/merge_mp_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/merge_mp_partitions.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/README.md -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /Megatron-LM/tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /Megatron-LM/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/preprocess_data.py -------------------------------------------------------------------------------- /Megatron-LM/tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /Megatron-LM/tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/Megatron-LM/tools/text_generation_cli.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/README.md -------------------------------------------------------------------------------- /SHv0/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/.gitignore -------------------------------------------------------------------------------- /SHv0/.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/.gitlab-ci.yml -------------------------------------------------------------------------------- /SHv0/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/LICENSE -------------------------------------------------------------------------------- /SHv0/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/README.md -------------------------------------------------------------------------------- /SHv0/examples/evaluate_retriever_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/evaluate_retriever_nq.sh -------------------------------------------------------------------------------- /SHv0/examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /SHv0/examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /SHv0/examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /SHv0/examples/finetune_retriever_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/finetune_retriever_distributed.sh -------------------------------------------------------------------------------- /SHv0/examples/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/generate_text.sh -------------------------------------------------------------------------------- /SHv0/examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert_1gpu_baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert_1gpu_baseline.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert_1gpu_offloading.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert_1gpu_offloading.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert_8gpus_tp_baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert_8gpus_tp_baseline.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert_8gpus_tp_offloading.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert_8gpus_tp_offloading.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt_1gpu_baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt_1gpu_baseline.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt_1gpu_offloading.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt_1gpu_offloading.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt_8gpus_tp_baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt_8gpus_tp_baseline.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt_8gpus_tp_offloading.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt_8gpus_tp_offloading.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /SHv0/examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /SHv0/examples/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /SHv0/examples/run_text_generation_server_345M_8_tensor_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/run_text_generation_server_345M_8_tensor_parallel.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/CONFIG.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/README.md -------------------------------------------------------------------------------- /SHv0/examples/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/SBATCH.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/SRUN.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /SHv0/examples/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc21/run_table_1.sh -------------------------------------------------------------------------------- /SHv0/examples/sc22-gpt-sh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/examples/sc22-gpt-sh.sh -------------------------------------------------------------------------------- /SHv0/images/cases_april2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/images/cases_april2021.png -------------------------------------------------------------------------------- /SHv0/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/__init__.py -------------------------------------------------------------------------------- /SHv0/megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/arguments.py -------------------------------------------------------------------------------- /SHv0/megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/checkpointing.py -------------------------------------------------------------------------------- /SHv0/megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/Makefile -------------------------------------------------------------------------------- /SHv0/megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /SHv0/megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /SHv0/megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /SHv0/megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /SHv0/megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /SHv0/megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /SHv0/megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /SHv0/megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/realm_index.py -------------------------------------------------------------------------------- /SHv0/megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /SHv0/megatron/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/data/vit_dataset.py -------------------------------------------------------------------------------- /SHv0/megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/layer_norm_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/layer_norm_cuda.cpp -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/layer_norm_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/layer_norm_cuda_kernel.cu -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /SHv0/megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /SHv0/megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/global_vars.py -------------------------------------------------------------------------------- /SHv0/megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/indexer.py -------------------------------------------------------------------------------- /SHv0/megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/initialize.py -------------------------------------------------------------------------------- /SHv0/megatron/learning_rates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/learning_rates.py -------------------------------------------------------------------------------- /SHv0/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/memory.py -------------------------------------------------------------------------------- /SHv0/megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/microbatches.py -------------------------------------------------------------------------------- /SHv0/megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/__init__.py -------------------------------------------------------------------------------- /SHv0/megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/bert_model.py -------------------------------------------------------------------------------- /SHv0/megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /SHv0/megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/classification.py -------------------------------------------------------------------------------- /SHv0/megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/distributed.py -------------------------------------------------------------------------------- /SHv0/megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/enums.py -------------------------------------------------------------------------------- /SHv0/megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /SHv0/megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /SHv0/megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /SHv0/megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /SHv0/megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/language_model.py -------------------------------------------------------------------------------- /SHv0/megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/module.py -------------------------------------------------------------------------------- /SHv0/megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /SHv0/megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/realm_model.py -------------------------------------------------------------------------------- /SHv0/megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/t5_model.py -------------------------------------------------------------------------------- /SHv0/megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/transformer.py -------------------------------------------------------------------------------- /SHv0/megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/utils.py -------------------------------------------------------------------------------- /SHv0/megatron/model/vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/model/vit_model.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/__init__.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/cross_entropy.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/data.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/initialize.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/layers.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/mappings.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/random.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /SHv0/megatron/mpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/mpu/utils.py -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/GL_CPU_Float16OptimizerWithFloat16Params.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/GL_CPU_Float16OptimizerWithFloat16Params.cpp -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/GL_CPU_Float16OptimizerWithFloat16Params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/GL_CPU_Float16OptimizerWithFloat16Params.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/StopWatch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/StopWatch.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/context.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/cpu_adam.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/cpu_adam.cpp -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/cpu_adam.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/cpu_adam.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/cublas_wrappers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/cublas_wrappers.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/custom_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/custom_cuda_kernel.cu -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/custom_cuda_layers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/custom_cuda_layers.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/gemm_test.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/gemm_test.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/deepspeed/simd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/deepspeed/simd.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/offloading_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/offloading_utils.cpp -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/cpp/torch_csrc_export.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/cpp/torch_csrc_export.h -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /SHv0/megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /SHv0/megatron/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/p2p_communication.py -------------------------------------------------------------------------------- /SHv0/megatron/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/schedules.py -------------------------------------------------------------------------------- /SHv0/megatron/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/text_generation_server.py -------------------------------------------------------------------------------- /SHv0/megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /SHv0/megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /SHv0/megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /SHv0/megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /SHv0/megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /SHv0/megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/training.py -------------------------------------------------------------------------------- /SHv0/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/megatron/utils.py -------------------------------------------------------------------------------- /SHv0/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/pretrain_bert.py -------------------------------------------------------------------------------- /SHv0/pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/pretrain_gpt.py -------------------------------------------------------------------------------- /SHv0/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/pretrain_ict.py -------------------------------------------------------------------------------- /SHv0/pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/pretrain_t5.py -------------------------------------------------------------------------------- /SHv0/pretrain_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/pretrain_vit.py -------------------------------------------------------------------------------- /SHv0/scripts/clean-cache.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/clean-cache.sh -------------------------------------------------------------------------------- /SHv0/scripts/deepspeed_cpu_adam._gl_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/deepspeed_cpu_adam._gl_.py -------------------------------------------------------------------------------- /SHv0/scripts/deepspeed_cpu_adam._v0.5.8_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/deepspeed_cpu_adam._v0.5.8_.py -------------------------------------------------------------------------------- /SHv0/scripts/distributed_c10d._gl_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/distributed_c10d._gl_.py -------------------------------------------------------------------------------- /SHv0/scripts/distributed_c10d._v1.10.0_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/distributed_c10d._v1.10.0_.py -------------------------------------------------------------------------------- /SHv0/scripts/function._gl_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/function._gl_.py -------------------------------------------------------------------------------- /SHv0/scripts/function._v1.10.0_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/function._v1.10.0_.py -------------------------------------------------------------------------------- /SHv0/scripts/init-mps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/init-mps.sh -------------------------------------------------------------------------------- /SHv0/scripts/kill-all-python-procs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/kill-all-python-procs.sh -------------------------------------------------------------------------------- /SHv0/scripts/stop-mps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/scripts/stop-mps.sh -------------------------------------------------------------------------------- /SHv0/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/data_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /SHv0/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/eval_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/finetune_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/glue/data.py -------------------------------------------------------------------------------- /SHv0/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/glue/finetune.py -------------------------------------------------------------------------------- /SHv0/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/glue/mnli.py -------------------------------------------------------------------------------- /SHv0/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/glue/qqp.py -------------------------------------------------------------------------------- /SHv0/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/main.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/README.md -------------------------------------------------------------------------------- /SHv0/tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /SHv0/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/race/data.py -------------------------------------------------------------------------------- /SHv0/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/race/finetune.py -------------------------------------------------------------------------------- /SHv0/tasks/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/vision/classification.py -------------------------------------------------------------------------------- /SHv0/tasks/vision/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/vision/eval_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /SHv0/tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/vision/main.py -------------------------------------------------------------------------------- /SHv0/tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /SHv0/tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /SHv0/tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /SHv0/tests/cpp/foreach_non_finite_check_and_unscale.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/cpp/foreach_non_finite_check_and_unscale.cpp -------------------------------------------------------------------------------- /SHv0/tests/cpp/foreach_non_finite_check_and_unscale.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/cpp/foreach_non_finite_check_and_unscale.h -------------------------------------------------------------------------------- /SHv0/tests/cpp/tensor_to_cpu_or_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/cpp/tensor_to_cpu_or_cuda.cpp -------------------------------------------------------------------------------- /SHv0/tests/cpp/tensor_to_cpu_or_cuda.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/cpp/tensor_to_cpu_or_cuda.h -------------------------------------------------------------------------------- /SHv0/tests/test_bandwidth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_bandwidth.py -------------------------------------------------------------------------------- /SHv0/tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_basic.py -------------------------------------------------------------------------------- /SHv0/tests/test_cpp_extention_parallel_in_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_cpp_extention_parallel_in_ray.py -------------------------------------------------------------------------------- /SHv0/tests/test_cuda_stream_in_ray_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_cuda_stream_in_ray_1.py -------------------------------------------------------------------------------- /SHv0/tests/test_cuda_stream_in_ray_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_cuda_stream_in_ray_2.py -------------------------------------------------------------------------------- /SHv0/tests/test_cuda_stream_in_ray_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_cuda_stream_in_ray_3.py -------------------------------------------------------------------------------- /SHv0/tests/test_cuda_stream_in_ray_4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_cuda_stream_in_ray_4.py -------------------------------------------------------------------------------- /SHv0/tests/test_distil_roberta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_distil_roberta.py -------------------------------------------------------------------------------- /SHv0/tests/test_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_distributed.py -------------------------------------------------------------------------------- /SHv0/tests/test_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_ray.py -------------------------------------------------------------------------------- /SHv0/tests/test_tensor_to_function_parallel_in_ray_via_cpp_extention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tests/test_tensor_to_function_parallel_in_ray_via_cpp_extention.py -------------------------------------------------------------------------------- /SHv0/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/linter.py -------------------------------------------------------------------------------- /SHv0/tools/merge_mp_partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/merge_mp_partitions.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/README.md -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /SHv0/tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /SHv0/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/preprocess_data.py -------------------------------------------------------------------------------- /SHv0/tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /SHv0/tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/SHv0/tools/text_generation_cli.py -------------------------------------------------------------------------------- /examples/case1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case1.sh -------------------------------------------------------------------------------- /examples/case1_draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case1_draw.py -------------------------------------------------------------------------------- /examples/case1_extract.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case1_extract.sh -------------------------------------------------------------------------------- /examples/case2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case2.sh -------------------------------------------------------------------------------- /examples/case2_draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case2_draw.py -------------------------------------------------------------------------------- /examples/case2_extract.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case2_extract.sh -------------------------------------------------------------------------------- /examples/case3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case3.sh -------------------------------------------------------------------------------- /examples/case3_draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case3_draw.py -------------------------------------------------------------------------------- /examples/case3_extract.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case3_extract.sh -------------------------------------------------------------------------------- /examples/case4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case4.sh -------------------------------------------------------------------------------- /examples/case4_draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case4_draw.py -------------------------------------------------------------------------------- /examples/case4_extract.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case4_extract.sh -------------------------------------------------------------------------------- /examples/case5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case5.sh -------------------------------------------------------------------------------- /examples/case5_draw.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case5_draw.py -------------------------------------------------------------------------------- /examples/case5_extract.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/case5_extract.sh -------------------------------------------------------------------------------- /examples/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/examples/run.sh -------------------------------------------------------------------------------- /notebook/sc22ae.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/notebook/sc22ae.ipynb -------------------------------------------------------------------------------- /results/case1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/case1.csv -------------------------------------------------------------------------------- /results/case2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/case2.csv -------------------------------------------------------------------------------- /results/case3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/case3.csv -------------------------------------------------------------------------------- /results/case4.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/case4.csv -------------------------------------------------------------------------------- /results/case5.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/case5.csv -------------------------------------------------------------------------------- /results/log_l2l_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656736709.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_l2l_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656736709.txt -------------------------------------------------------------------------------- /results/log_l2l_l-78_hs-2048_bs-4_ws-4_2022-07-02.1656730580.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_l2l_l-78_hs-2048_bs-4_ws-4_2022-07-02.1656730580.txt -------------------------------------------------------------------------------- /results/log_megatron-lm_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656730240.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_megatron-lm_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656730240.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-100_hs-2048_bs-4_ws-4_2022-07-02.1656733406.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-100_hs-2048_bs-4_ws-4_2022-07-02.1656733406.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-16_hs-2048_bs-4_ws-15_2022-07-02.1656742529.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-16_hs-2048_bs-4_ws-15_2022-07-02.1656742529.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-24_hs-2048_bs-4_ws-15_2022-07-02.1656742286.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-24_hs-2048_bs-4_ws-15_2022-07-02.1656742286.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-10_2022-07-02.1656744387.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-10_2022-07-02.1656744387.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-12_2022-07-02.1656744774.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-12_2022-07-02.1656744774.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-14_2022-07-02.1656745146.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-14_2022-07-02.1656745146.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-15_2022-07-02.1656734808.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-15_2022-07-02.1656734808.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-2_2022-07-02.1656742692.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-2_2022-07-02.1656742692.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656743137.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656743137.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-6_2022-07-02.1656743570.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-6_2022-07-02.1656743570.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-32_hs-2048_bs-4_ws-8_2022-07-02.1656743979.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-32_hs-2048_bs-4_ws-8_2022-07-02.1656743979.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-40_hs-2048_bs-4_ws-15_2022-07-02.1656741833.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-40_hs-2048_bs-4_ws-15_2022-07-02.1656741833.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-48_hs-2048_bs-4_ws-15_2022-07-02.1656735152.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-48_hs-2048_bs-4_ws-15_2022-07-02.1656735152.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-56_hs-2048_bs-4_ws-15_2022-07-02.1656741167.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-56_hs-2048_bs-4_ws-15_2022-07-02.1656741167.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-64_hs-2048_bs-4_ws-15_2022-07-02.1656740376.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-64_hs-2048_bs-4_ws-15_2022-07-02.1656740376.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-78_hs-2048_bs-4_ws-15_2022-07-02.1656735719.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-78_hs-2048_bs-4_ws-15_2022-07-02.1656735719.txt -------------------------------------------------------------------------------- /results/log_stronghold_l-92_hs-2048_bs-4_ws-15_2022-07-02.1656739206.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_stronghold_l-92_hs-2048_bs-4_ws-15_2022-07-02.1656739206.txt -------------------------------------------------------------------------------- /results/log_zero-infinity_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656738599.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_zero-infinity_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656738599.txt -------------------------------------------------------------------------------- /results/log_zero-infinity_l-48_hs-2048_bs-4_ws-4_2022-07-02.1656732519.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_zero-infinity_l-48_hs-2048_bs-4_ws-4_2022-07-02.1656732519.txt -------------------------------------------------------------------------------- /results/log_zero-offload_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656738007.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_zero-offload_l-32_hs-2048_bs-4_ws-4_2022-07-02.1656738007.txt -------------------------------------------------------------------------------- /results/log_zero-offload_l-48_hs-2048_bs-4_ws-4_2022-07-02.1656731649.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xshaun/sc22-ae/HEAD/results/log_zero-offload_l-48_hs-2048_bs-4_ws-4_2022-07-02.1656731649.txt --------------------------------------------------------------------------------