├── .coveragerc ├── .github └── workflows │ └── documentation.yml ├── .gitignore ├── AUTHORS ├── LICENSE ├── README.md ├── docs ├── Makefile ├── _templates │ └── autosummary │ │ ├── base.rst │ │ ├── class.rst │ │ └── module.rst ├── api │ └── index.rst ├── conf.py ├── guide │ ├── faq.md │ ├── getting_started.md │ ├── index.md │ ├── instruction_tuning.md │ ├── tokenization.md │ └── weights_conversion.md ├── imgs │ └── llama-falcon.png ├── index.rst ├── make.bat └── requirements.txt ├── examples ├── evaluate_retriever_nq.sh ├── evaluate_zeroshot_gpt.sh ├── finetune.sh ├── finetune_mnli_distributed.sh ├── finetune_race_distributed.sh ├── finetune_retriever_distributed.sh ├── hf_to_megatron.sh ├── merge_mp_bert.sh ├── msdp │ ├── README.md │ ├── data_processing.sh │ ├── eval_knwl_generation.sh │ ├── eval_resp_generation.sh │ ├── prep_resp_gen.sh │ ├── prompt_knwl_gen.sh │ └── prompt_resp_gen.sh ├── parallelize.sh ├── pretrain_bert.sh ├── pretrain_bert_distributed.sh ├── pretrain_bert_distributed_with_mp.sh ├── pretrain_gpt.sh ├── pretrain_gpt3_175B.sh ├── pretrain_gpt_distributed.sh ├── pretrain_gpt_distributed_with_mp.sh ├── pretrain_ict.sh ├── pretrain_t5.sh ├── pretrain_t5_distributed.sh ├── pretrain_t5_distributed_with_mp.sh ├── run_text_generation_server_345M.sh ├── run_text_generation_server_345M_8_tensor_parallel.sh ├── sc21 │ ├── CONFIG.sh │ ├── README.md │ ├── SBATCH.sh │ ├── SRUN.sh │ ├── run_figure_11.sh │ ├── run_figure_12.sh │ ├── run_figure_13.sh │ ├── run_figure_14.sh │ ├── run_figure_15.sh │ ├── run_figure_16.sh │ ├── run_figure_17.sh │ ├── run_figure_18.sh │ └── run_table_1.sh └── verify.sh ├── finetune.py ├── megatron ├── __init__.py ├── arguments.py ├── checkpointing.py ├── core │ ├── __init__.py │ ├── parallel_state.py │ ├── tensor_parallel │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ └── utils.py │ └── utils.py ├── data │ ├── Makefile │ ├── __init__.py │ ├── autoaugment.py │ ├── bert_dataset.py │ ├── biencoder_dataset_utils.py │ ├── blendable_dataset.py │ ├── data_samplers.py │ ├── dataset_utils.py │ ├── gpt_dataset.py │ ├── helpers.cpp │ ├── ict_dataset.py │ ├── image_folder.py │ ├── indexed_dataset.py │ ├── instruction_dataset.py │ ├── orqa_wiki_dataset.py │ ├── realm_dataset_utils.py │ ├── realm_index.py │ ├── t5_dataset.py │ └── test │ │ ├── test_indexed_dataset.py │ │ └── test_preprocess_data.sh ├── dist_signal_handler.py ├── fp16_deprecated │ └── loss_scaler.py ├── fused_kernels │ ├── __init__.py │ ├── compat.h │ ├── fused_weight_gradient_dense.cpp │ ├── fused_weight_gradient_dense.cu │ ├── layer_norm_cuda.cpp │ ├── layer_norm_cuda_kernel.cu │ ├── scaled_masked_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_softmax.cpp │ ├── scaled_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.cpp │ ├── scaled_upper_triang_masked_softmax.h │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ ├── tests │ │ ├── __init__.py │ │ └── test_fused_kernels.py │ └── type_shim.h ├── global_vars.py ├── indexer.py ├── initialize.py ├── memory.py ├── metrics.py ├── microbatches.py ├── model │ ├── __init__.py │ ├── bert_model.py │ ├── biencoder_model.py │ ├── classification.py │ ├── distributed.py │ ├── enums.py │ ├── falcon_model.py │ ├── fused_bias_gelu.py │ ├── fused_layer_norm.py │ ├── fused_softmax.py │ ├── glu_activations.py │ ├── gpt_model.py │ ├── language_model.py │ ├── llama_model.py │ ├── mistral_model.py │ ├── module.py │ ├── multiple_choice.py │ ├── positional_embeddings.py │ ├── t5_model.py │ ├── transformer.py │ └── utils.py ├── mpu │ └── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py ├── optimizer │ ├── __init__.py │ ├── clip_grads.py │ ├── distrib_optimizer.py │ ├── grad_scaler.py │ └── optimizer.py ├── optimizer_param_scheduler.py ├── p2p_communication.py ├── schedules.py ├── static │ └── index.html ├── text_generation │ ├── __init__.py │ ├── api.py │ ├── beam_utils.py │ ├── communication.py │ ├── forward_step.py │ ├── generation.py │ ├── sampling.py │ └── tokenization.py ├── text_generation_server.py ├── timers.py ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ └── tokenizer.py ├── training.py ├── utils.py └── wandb_logger.py ├── pretrain_bert.py ├── pretrain_ict.py ├── pretrain_t5.py ├── requirements.txt ├── setup.py ├── tasks ├── data_utils.py ├── eval_utils.py ├── finetune_utils.py ├── glue │ ├── data.py │ ├── finetune.py │ ├── mnli.py │ └── qqp.py ├── main.py ├── msdp │ ├── README.md │ ├── evaluate.py │ ├── main.py │ ├── metrics.py │ ├── preprocessing.py │ └── prompt.py ├── orqa │ ├── README.md │ ├── evaluate_orqa.py │ ├── evaluate_utils.py │ ├── supervised │ │ ├── data.py │ │ ├── eval_utils.py │ │ └── finetune.py │ └── unsupervised │ │ ├── nq.py │ │ ├── qa_utils.py │ │ └── tokenizers.py ├── race │ ├── data.py │ └── finetune.py └── zeroshot_gpt │ ├── datasets.py │ ├── detokenizer.py │ └── evaluate.py ├── tests ├── __init__.py ├── conftest.py ├── pytest.ini ├── tensor_parallel │ ├── test_cross_entropy.py │ ├── test_data.py │ ├── test_mappings.py │ ├── test_random.py │ └── test_tensor_parallel_utils.py ├── test_activations.py ├── test_basic.py ├── test_layernorm_order.py ├── test_llama_weights.py ├── test_parallel_state.py ├── test_utilities.py ├── test_utils.py └── test_wandb.py ├── tools ├── checkpoint_loader_megatron.py ├── checkpoint_saver_megatron.py ├── checkpoint_util.py ├── linter.py ├── merge_datasets.py ├── openwebtext │ ├── README.md │ ├── add_id.py │ ├── blacklist_urls.py │ ├── cleanup_dataset.py │ ├── cleanup_fix_dataset.py │ ├── filter_ngrams.py │ ├── find_duplicates.py │ ├── group_duplicate_url.py │ ├── merge_jsons.py │ └── remove_group_duplicates.py ├── preprocess_data.py ├── preprocess_instruct_data.py ├── push_to_hub.py ├── run_text_generation_server.py └── text_generation_cli.py ├── verify_correctness.py └── weights_conversion ├── hf_to_megatron.py ├── megatron_to_hf.py └── utils ├── __init__.py ├── merge_llama.py └── permute_qkv.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/.coveragerc -------------------------------------------------------------------------------- /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/.github/workflows/documentation.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/.gitignore -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/AUTHORS -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/README.md -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/_templates/autosummary/base.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/_templates/autosummary/base.rst -------------------------------------------------------------------------------- /docs/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/_templates/autosummary/class.rst -------------------------------------------------------------------------------- /docs/_templates/autosummary/module.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/_templates/autosummary/module.rst -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/api/index.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/guide/faq.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/faq.md -------------------------------------------------------------------------------- /docs/guide/getting_started.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/getting_started.md -------------------------------------------------------------------------------- /docs/guide/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/index.md -------------------------------------------------------------------------------- /docs/guide/instruction_tuning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/instruction_tuning.md -------------------------------------------------------------------------------- /docs/guide/tokenization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/tokenization.md -------------------------------------------------------------------------------- /docs/guide/weights_conversion.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/weights_conversion.md -------------------------------------------------------------------------------- /docs/imgs/llama-falcon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/imgs/llama-falcon.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /examples/evaluate_retriever_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/evaluate_retriever_nq.sh -------------------------------------------------------------------------------- /examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /examples/finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune.sh -------------------------------------------------------------------------------- /examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /examples/finetune_retriever_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune_retriever_distributed.sh -------------------------------------------------------------------------------- /examples/hf_to_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/hf_to_megatron.sh -------------------------------------------------------------------------------- /examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /examples/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/README.md -------------------------------------------------------------------------------- /examples/msdp/data_processing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/data_processing.sh -------------------------------------------------------------------------------- /examples/msdp/eval_knwl_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/eval_knwl_generation.sh -------------------------------------------------------------------------------- /examples/msdp/eval_resp_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/eval_resp_generation.sh -------------------------------------------------------------------------------- /examples/msdp/prep_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/prep_resp_gen.sh -------------------------------------------------------------------------------- /examples/msdp/prompt_knwl_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/prompt_knwl_gen.sh -------------------------------------------------------------------------------- /examples/msdp/prompt_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/prompt_resp_gen.sh -------------------------------------------------------------------------------- /examples/parallelize.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/parallelize.sh -------------------------------------------------------------------------------- /examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /examples/run_text_generation_server_345M_8_tensor_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/run_text_generation_server_345M_8_tensor_parallel.sh -------------------------------------------------------------------------------- /examples/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/CONFIG.sh -------------------------------------------------------------------------------- /examples/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/README.md -------------------------------------------------------------------------------- /examples/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/SBATCH.sh -------------------------------------------------------------------------------- /examples/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/SRUN.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /examples/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_table_1.sh -------------------------------------------------------------------------------- /examples/verify.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/verify.sh -------------------------------------------------------------------------------- /finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/finetune.py -------------------------------------------------------------------------------- /megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/__init__.py -------------------------------------------------------------------------------- /megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/arguments.py -------------------------------------------------------------------------------- /megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/checkpointing.py -------------------------------------------------------------------------------- /megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/__init__.py -------------------------------------------------------------------------------- /megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/utils.py -------------------------------------------------------------------------------- /megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/Makefile -------------------------------------------------------------------------------- /megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /megatron/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/image_folder.py -------------------------------------------------------------------------------- /megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /megatron/data/instruction_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/instruction_dataset.py -------------------------------------------------------------------------------- /megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/realm_index.py -------------------------------------------------------------------------------- /megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /megatron/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/dist_signal_handler.py -------------------------------------------------------------------------------- /megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /megatron/fused_kernels/fused_weight_gradient_dense.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/fused_weight_gradient_dense.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/fused_weight_gradient_dense.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/fused_weight_gradient_dense.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/layer_norm_cuda.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/layer_norm_cuda.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/layer_norm_cuda_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/layer_norm_cuda_kernel.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/global_vars.py -------------------------------------------------------------------------------- /megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/indexer.py -------------------------------------------------------------------------------- /megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/initialize.py -------------------------------------------------------------------------------- /megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/memory.py -------------------------------------------------------------------------------- /megatron/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/metrics.py -------------------------------------------------------------------------------- /megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/microbatches.py -------------------------------------------------------------------------------- /megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/__init__.py -------------------------------------------------------------------------------- /megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/bert_model.py -------------------------------------------------------------------------------- /megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/classification.py -------------------------------------------------------------------------------- /megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/distributed.py -------------------------------------------------------------------------------- /megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/enums.py -------------------------------------------------------------------------------- /megatron/model/falcon_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/falcon_model.py -------------------------------------------------------------------------------- /megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /megatron/model/glu_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/glu_activations.py -------------------------------------------------------------------------------- /megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/language_model.py -------------------------------------------------------------------------------- /megatron/model/llama_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/llama_model.py -------------------------------------------------------------------------------- /megatron/model/mistral_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/mistral_model.py -------------------------------------------------------------------------------- /megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/module.py -------------------------------------------------------------------------------- /megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /megatron/model/positional_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/positional_embeddings.py -------------------------------------------------------------------------------- /megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/t5_model.py -------------------------------------------------------------------------------- /megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/transformer.py -------------------------------------------------------------------------------- /megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/utils.py -------------------------------------------------------------------------------- /megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /megatron/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /megatron/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /megatron/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/p2p_communication.py -------------------------------------------------------------------------------- /megatron/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/schedules.py -------------------------------------------------------------------------------- /megatron/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/static/index.html -------------------------------------------------------------------------------- /megatron/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/__init__.py -------------------------------------------------------------------------------- /megatron/text_generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/api.py -------------------------------------------------------------------------------- /megatron/text_generation/beam_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/beam_utils.py -------------------------------------------------------------------------------- /megatron/text_generation/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/communication.py -------------------------------------------------------------------------------- /megatron/text_generation/forward_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/forward_step.py -------------------------------------------------------------------------------- /megatron/text_generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/generation.py -------------------------------------------------------------------------------- /megatron/text_generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/sampling.py -------------------------------------------------------------------------------- /megatron/text_generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/tokenization.py -------------------------------------------------------------------------------- /megatron/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation_server.py -------------------------------------------------------------------------------- /megatron/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/timers.py -------------------------------------------------------------------------------- /megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/training.py -------------------------------------------------------------------------------- /megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/utils.py -------------------------------------------------------------------------------- /megatron/wandb_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/wandb_logger.py -------------------------------------------------------------------------------- /pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/pretrain_bert.py -------------------------------------------------------------------------------- /pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/pretrain_ict.py -------------------------------------------------------------------------------- /pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/pretrain_t5.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/data_utils.py -------------------------------------------------------------------------------- /tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/eval_utils.py -------------------------------------------------------------------------------- /tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/finetune_utils.py -------------------------------------------------------------------------------- /tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/data.py -------------------------------------------------------------------------------- /tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/finetune.py -------------------------------------------------------------------------------- /tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/mnli.py -------------------------------------------------------------------------------- /tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/qqp.py -------------------------------------------------------------------------------- /tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/main.py -------------------------------------------------------------------------------- /tasks/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/README.md -------------------------------------------------------------------------------- /tasks/msdp/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/evaluate.py -------------------------------------------------------------------------------- /tasks/msdp/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/main.py -------------------------------------------------------------------------------- /tasks/msdp/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/metrics.py -------------------------------------------------------------------------------- /tasks/msdp/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/preprocessing.py -------------------------------------------------------------------------------- /tasks/msdp/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/prompt.py -------------------------------------------------------------------------------- /tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/README.md -------------------------------------------------------------------------------- /tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/race/data.py -------------------------------------------------------------------------------- /tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/race/finetune.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/pytest.ini -------------------------------------------------------------------------------- /tests/tensor_parallel/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/tensor_parallel/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_data.py -------------------------------------------------------------------------------- /tests/tensor_parallel/test_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_mappings.py -------------------------------------------------------------------------------- /tests/tensor_parallel/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_random.py -------------------------------------------------------------------------------- /tests/tensor_parallel/test_tensor_parallel_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_tensor_parallel_utils.py -------------------------------------------------------------------------------- /tests/test_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_activations.py -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_basic.py -------------------------------------------------------------------------------- /tests/test_layernorm_order.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_layernorm_order.py -------------------------------------------------------------------------------- /tests/test_llama_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_llama_weights.py -------------------------------------------------------------------------------- /tests/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_parallel_state.py -------------------------------------------------------------------------------- /tests/test_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_utilities.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tests/test_wandb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_wandb.py -------------------------------------------------------------------------------- /tools/checkpoint_loader_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/checkpoint_loader_megatron.py -------------------------------------------------------------------------------- /tools/checkpoint_saver_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/checkpoint_saver_megatron.py -------------------------------------------------------------------------------- /tools/checkpoint_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/checkpoint_util.py -------------------------------------------------------------------------------- /tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/linter.py -------------------------------------------------------------------------------- /tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/merge_datasets.py -------------------------------------------------------------------------------- /tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/README.md -------------------------------------------------------------------------------- /tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/preprocess_data.py -------------------------------------------------------------------------------- /tools/preprocess_instruct_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/preprocess_instruct_data.py -------------------------------------------------------------------------------- /tools/push_to_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/push_to_hub.py -------------------------------------------------------------------------------- /tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/text_generation_cli.py -------------------------------------------------------------------------------- /verify_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/verify_correctness.py -------------------------------------------------------------------------------- /weights_conversion/hf_to_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/hf_to_megatron.py -------------------------------------------------------------------------------- /weights_conversion/megatron_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/megatron_to_hf.py -------------------------------------------------------------------------------- /weights_conversion/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/utils/__init__.py -------------------------------------------------------------------------------- /weights_conversion/utils/merge_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/utils/merge_llama.py -------------------------------------------------------------------------------- /weights_conversion/utils/permute_qkv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/utils/permute_qkv.py --------------------------------------------------------------------------------