├── .coveragerc
├── .github
    └── workflows
    │   └── documentation.yml
├── .gitignore
├── AUTHORS
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── _templates
    │   └── autosummary
    │   │   ├── base.rst
    │   │   ├── class.rst
    │   │   └── module.rst
    ├── api
    │   └── index.rst
    ├── conf.py
    ├── guide
    │   ├── faq.md
    │   ├── getting_started.md
    │   ├── index.md
    │   ├── instruction_tuning.md
    │   ├── tokenization.md
    │   └── weights_conversion.md
    ├── imgs
    │   └── llama-falcon.png
    ├── index.rst
    ├── make.bat
    └── requirements.txt
├── examples
    ├── evaluate_retriever_nq.sh
    ├── evaluate_zeroshot_gpt.sh
    ├── finetune.sh
    ├── finetune_mnli_distributed.sh
    ├── finetune_race_distributed.sh
    ├── finetune_retriever_distributed.sh
    ├── hf_to_megatron.sh
    ├── merge_mp_bert.sh
    ├── msdp
    │   ├── README.md
    │   ├── data_processing.sh
    │   ├── eval_knwl_generation.sh
    │   ├── eval_resp_generation.sh
    │   ├── prep_resp_gen.sh
    │   ├── prompt_knwl_gen.sh
    │   └── prompt_resp_gen.sh
    ├── parallelize.sh
    ├── pretrain_bert.sh
    ├── pretrain_bert_distributed.sh
    ├── pretrain_bert_distributed_with_mp.sh
    ├── pretrain_gpt.sh
    ├── pretrain_gpt3_175B.sh
    ├── pretrain_gpt_distributed.sh
    ├── pretrain_gpt_distributed_with_mp.sh
    ├── pretrain_ict.sh
    ├── pretrain_t5.sh
    ├── pretrain_t5_distributed.sh
    ├── pretrain_t5_distributed_with_mp.sh
    ├── run_text_generation_server_345M.sh
    ├── run_text_generation_server_345M_8_tensor_parallel.sh
    ├── sc21
    │   ├── CONFIG.sh
    │   ├── README.md
    │   ├── SBATCH.sh
    │   ├── SRUN.sh
    │   ├── run_figure_11.sh
    │   ├── run_figure_12.sh
    │   ├── run_figure_13.sh
    │   ├── run_figure_14.sh
    │   ├── run_figure_15.sh
    │   ├── run_figure_16.sh
    │   ├── run_figure_17.sh
    │   ├── run_figure_18.sh
    │   └── run_table_1.sh
    └── verify.sh
├── finetune.py
├── megatron
    ├── __init__.py
    ├── arguments.py
    ├── checkpointing.py
    ├── core
    │   ├── __init__.py
    │   ├── parallel_state.py
    │   ├── tensor_parallel
    │   │   ├── __init__.py
    │   │   ├── cross_entropy.py
    │   │   ├── data.py
    │   │   ├── layers.py
    │   │   ├── mappings.py
    │   │   ├── random.py
    │   │   └── utils.py
    │   └── utils.py
    ├── data
    │   ├── Makefile
    │   ├── __init__.py
    │   ├── autoaugment.py
    │   ├── bert_dataset.py
    │   ├── biencoder_dataset_utils.py
    │   ├── blendable_dataset.py
    │   ├── data_samplers.py
    │   ├── dataset_utils.py
    │   ├── gpt_dataset.py
    │   ├── helpers.cpp
    │   ├── ict_dataset.py
    │   ├── image_folder.py
    │   ├── indexed_dataset.py
    │   ├── instruction_dataset.py
    │   ├── orqa_wiki_dataset.py
    │   ├── realm_dataset_utils.py
    │   ├── realm_index.py
    │   ├── t5_dataset.py
    │   └── test
    │   │   ├── test_indexed_dataset.py
    │   │   └── test_preprocess_data.sh
    ├── dist_signal_handler.py
    ├── fp16_deprecated
    │   └── loss_scaler.py
    ├── fused_kernels
    │   ├── __init__.py
    │   ├── compat.h
    │   ├── fused_weight_gradient_dense.cpp
    │   ├── fused_weight_gradient_dense.cu
    │   ├── layer_norm_cuda.cpp
    │   ├── layer_norm_cuda_kernel.cu
    │   ├── scaled_masked_softmax.cpp
    │   ├── scaled_masked_softmax.h
    │   ├── scaled_masked_softmax_cuda.cu
    │   ├── scaled_softmax.cpp
    │   ├── scaled_softmax_cuda.cu
    │   ├── scaled_upper_triang_masked_softmax.cpp
    │   ├── scaled_upper_triang_masked_softmax.h
    │   ├── scaled_upper_triang_masked_softmax_cuda.cu
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_fused_kernels.py
    │   └── type_shim.h
    ├── global_vars.py
    ├── indexer.py
    ├── initialize.py
    ├── memory.py
    ├── metrics.py
    ├── microbatches.py
    ├── model
    │   ├── __init__.py
    │   ├── bert_model.py
    │   ├── biencoder_model.py
    │   ├── classification.py
    │   ├── distributed.py
    │   ├── enums.py
    │   ├── falcon_model.py
    │   ├── fused_bias_gelu.py
    │   ├── fused_layer_norm.py
    │   ├── fused_softmax.py
    │   ├── glu_activations.py
    │   ├── gpt_model.py
    │   ├── language_model.py
    │   ├── llama_model.py
    │   ├── mistral_model.py
    │   ├── module.py
    │   ├── multiple_choice.py
    │   ├── positional_embeddings.py
    │   ├── t5_model.py
    │   ├── transformer.py
    │   └── utils.py
    ├── mpu
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── commons.py
    │   │   ├── test_cross_entropy.py
    │   │   ├── test_data.py
    │   │   ├── test_initialize.py
    │   │   ├── test_layers.py
    │   │   └── test_random.py
    ├── optimizer
    │   ├── __init__.py
    │   ├── clip_grads.py
    │   ├── distrib_optimizer.py
    │   ├── grad_scaler.py
    │   └── optimizer.py
    ├── optimizer_param_scheduler.py
    ├── p2p_communication.py
    ├── schedules.py
    ├── static
    │   └── index.html
    ├── text_generation
    │   ├── __init__.py
    │   ├── api.py
    │   ├── beam_utils.py
    │   ├── communication.py
    │   ├── forward_step.py
    │   ├── generation.py
    │   ├── sampling.py
    │   └── tokenization.py
    ├── text_generation_server.py
    ├── timers.py
    ├── tokenizer
    │   ├── __init__.py
    │   ├── bert_tokenization.py
    │   ├── gpt2_tokenization.py
    │   └── tokenizer.py
    ├── training.py
    ├── utils.py
    └── wandb_logger.py
├── pretrain_bert.py
├── pretrain_ict.py
├── pretrain_t5.py
├── requirements.txt
├── setup.py
├── tasks
    ├── data_utils.py
    ├── eval_utils.py
    ├── finetune_utils.py
    ├── glue
    │   ├── data.py
    │   ├── finetune.py
    │   ├── mnli.py
    │   └── qqp.py
    ├── main.py
    ├── msdp
    │   ├── README.md
    │   ├── evaluate.py
    │   ├── main.py
    │   ├── metrics.py
    │   ├── preprocessing.py
    │   └── prompt.py
    ├── orqa
    │   ├── README.md
    │   ├── evaluate_orqa.py
    │   ├── evaluate_utils.py
    │   ├── supervised
    │   │   ├── data.py
    │   │   ├── eval_utils.py
    │   │   └── finetune.py
    │   └── unsupervised
    │   │   ├── nq.py
    │   │   ├── qa_utils.py
    │   │   └── tokenizers.py
    ├── race
    │   ├── data.py
    │   └── finetune.py
    └── zeroshot_gpt
    │   ├── datasets.py
    │   ├── detokenizer.py
    │   └── evaluate.py
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── pytest.ini
    ├── tensor_parallel
    │   ├── test_cross_entropy.py
    │   ├── test_data.py
    │   ├── test_mappings.py
    │   ├── test_random.py
    │   └── test_tensor_parallel_utils.py
    ├── test_activations.py
    ├── test_basic.py
    ├── test_layernorm_order.py
    ├── test_llama_weights.py
    ├── test_parallel_state.py
    ├── test_utilities.py
    ├── test_utils.py
    └── test_wandb.py
├── tools
    ├── checkpoint_loader_megatron.py
    ├── checkpoint_saver_megatron.py
    ├── checkpoint_util.py
    ├── linter.py
    ├── merge_datasets.py
    ├── openwebtext
    │   ├── README.md
    │   ├── add_id.py
    │   ├── blacklist_urls.py
    │   ├── cleanup_dataset.py
    │   ├── cleanup_fix_dataset.py
    │   ├── filter_ngrams.py
    │   ├── find_duplicates.py
    │   ├── group_duplicate_url.py
    │   ├── merge_jsons.py
    │   └── remove_group_duplicates.py
    ├── preprocess_data.py
    ├── preprocess_instruct_data.py
    ├── push_to_hub.py
    ├── run_text_generation_server.py
    └── text_generation_cli.py
├── verify_correctness.py
└── weights_conversion
    ├── hf_to_megatron.py
    ├── megatron_to_hf.py
    └── utils
        ├── __init__.py
        ├── merge_llama.py
        └── permute_qkv.py


/.coveragerc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/.coveragerc


--------------------------------------------------------------------------------
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/.github/workflows/documentation.yml


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/.gitignore


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/AUTHORS


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/README.md


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/Makefile


--------------------------------------------------------------------------------
/docs/_templates/autosummary/base.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/_templates/autosummary/base.rst


--------------------------------------------------------------------------------
/docs/_templates/autosummary/class.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/_templates/autosummary/class.rst


--------------------------------------------------------------------------------
/docs/_templates/autosummary/module.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/_templates/autosummary/module.rst


--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/api/index.rst


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/conf.py


--------------------------------------------------------------------------------
/docs/guide/faq.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/faq.md


--------------------------------------------------------------------------------
/docs/guide/getting_started.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/getting_started.md


--------------------------------------------------------------------------------
/docs/guide/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/index.md


--------------------------------------------------------------------------------
/docs/guide/instruction_tuning.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/instruction_tuning.md


--------------------------------------------------------------------------------
/docs/guide/tokenization.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/tokenization.md


--------------------------------------------------------------------------------
/docs/guide/weights_conversion.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/guide/weights_conversion.md


--------------------------------------------------------------------------------
/docs/imgs/llama-falcon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/imgs/llama-falcon.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/index.rst


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/make.bat


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/docs/requirements.txt


--------------------------------------------------------------------------------
/examples/evaluate_retriever_nq.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/evaluate_retriever_nq.sh


--------------------------------------------------------------------------------
/examples/evaluate_zeroshot_gpt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/evaluate_zeroshot_gpt.sh


--------------------------------------------------------------------------------
/examples/finetune.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune.sh


--------------------------------------------------------------------------------
/examples/finetune_mnli_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune_mnli_distributed.sh


--------------------------------------------------------------------------------
/examples/finetune_race_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune_race_distributed.sh


--------------------------------------------------------------------------------
/examples/finetune_retriever_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/finetune_retriever_distributed.sh


--------------------------------------------------------------------------------
/examples/hf_to_megatron.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/hf_to_megatron.sh


--------------------------------------------------------------------------------
/examples/merge_mp_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/merge_mp_bert.sh


--------------------------------------------------------------------------------
/examples/msdp/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/README.md


--------------------------------------------------------------------------------
/examples/msdp/data_processing.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/data_processing.sh


--------------------------------------------------------------------------------
/examples/msdp/eval_knwl_generation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/eval_knwl_generation.sh


--------------------------------------------------------------------------------
/examples/msdp/eval_resp_generation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/eval_resp_generation.sh


--------------------------------------------------------------------------------
/examples/msdp/prep_resp_gen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/prep_resp_gen.sh


--------------------------------------------------------------------------------
/examples/msdp/prompt_knwl_gen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/prompt_knwl_gen.sh


--------------------------------------------------------------------------------
/examples/msdp/prompt_resp_gen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/msdp/prompt_resp_gen.sh


--------------------------------------------------------------------------------
/examples/parallelize.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/parallelize.sh


--------------------------------------------------------------------------------
/examples/pretrain_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_bert.sh


--------------------------------------------------------------------------------
/examples/pretrain_bert_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_bert_distributed.sh


--------------------------------------------------------------------------------
/examples/pretrain_bert_distributed_with_mp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_bert_distributed_with_mp.sh


--------------------------------------------------------------------------------
/examples/pretrain_gpt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt.sh


--------------------------------------------------------------------------------
/examples/pretrain_gpt3_175B.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt3_175B.sh


--------------------------------------------------------------------------------
/examples/pretrain_gpt_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt_distributed.sh


--------------------------------------------------------------------------------
/examples/pretrain_gpt_distributed_with_mp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_gpt_distributed_with_mp.sh


--------------------------------------------------------------------------------
/examples/pretrain_ict.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_ict.sh


--------------------------------------------------------------------------------
/examples/pretrain_t5.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_t5.sh


--------------------------------------------------------------------------------
/examples/pretrain_t5_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_t5_distributed.sh


--------------------------------------------------------------------------------
/examples/pretrain_t5_distributed_with_mp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/pretrain_t5_distributed_with_mp.sh


--------------------------------------------------------------------------------
/examples/run_text_generation_server_345M.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/run_text_generation_server_345M.sh


--------------------------------------------------------------------------------
/examples/run_text_generation_server_345M_8_tensor_parallel.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/run_text_generation_server_345M_8_tensor_parallel.sh


--------------------------------------------------------------------------------
/examples/sc21/CONFIG.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/CONFIG.sh


--------------------------------------------------------------------------------
/examples/sc21/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/README.md


--------------------------------------------------------------------------------
/examples/sc21/SBATCH.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/SBATCH.sh


--------------------------------------------------------------------------------
/examples/sc21/SRUN.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/SRUN.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_11.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_11.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_12.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_12.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_13.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_13.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_14.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_14.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_15.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_15.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_16.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_16.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_17.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_17.sh


--------------------------------------------------------------------------------
/examples/sc21/run_figure_18.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_figure_18.sh


--------------------------------------------------------------------------------
/examples/sc21/run_table_1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/sc21/run_table_1.sh


--------------------------------------------------------------------------------
/examples/verify.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/examples/verify.sh


--------------------------------------------------------------------------------
/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/finetune.py


--------------------------------------------------------------------------------
/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/__init__.py


--------------------------------------------------------------------------------
/megatron/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/arguments.py


--------------------------------------------------------------------------------
/megatron/checkpointing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/checkpointing.py


--------------------------------------------------------------------------------
/megatron/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/__init__.py


--------------------------------------------------------------------------------
/megatron/core/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/parallel_state.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/__init__.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/cross_entropy.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/data.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/layers.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/mappings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/mappings.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/random.py


--------------------------------------------------------------------------------
/megatron/core/tensor_parallel/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/tensor_parallel/utils.py


--------------------------------------------------------------------------------
/megatron/core/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/core/utils.py


--------------------------------------------------------------------------------
/megatron/data/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/Makefile


--------------------------------------------------------------------------------
/megatron/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import indexed_dataset
2 | 


--------------------------------------------------------------------------------
/megatron/data/autoaugment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/autoaugment.py


--------------------------------------------------------------------------------
/megatron/data/bert_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/bert_dataset.py


--------------------------------------------------------------------------------
/megatron/data/biencoder_dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/biencoder_dataset_utils.py


--------------------------------------------------------------------------------
/megatron/data/blendable_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/blendable_dataset.py


--------------------------------------------------------------------------------
/megatron/data/data_samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/data_samplers.py


--------------------------------------------------------------------------------
/megatron/data/dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/dataset_utils.py


--------------------------------------------------------------------------------
/megatron/data/gpt_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/gpt_dataset.py


--------------------------------------------------------------------------------
/megatron/data/helpers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/helpers.cpp


--------------------------------------------------------------------------------
/megatron/data/ict_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/ict_dataset.py


--------------------------------------------------------------------------------
/megatron/data/image_folder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/image_folder.py


--------------------------------------------------------------------------------
/megatron/data/indexed_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/indexed_dataset.py


--------------------------------------------------------------------------------
/megatron/data/instruction_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/instruction_dataset.py


--------------------------------------------------------------------------------
/megatron/data/orqa_wiki_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/orqa_wiki_dataset.py


--------------------------------------------------------------------------------
/megatron/data/realm_dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/realm_dataset_utils.py


--------------------------------------------------------------------------------
/megatron/data/realm_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/realm_index.py


--------------------------------------------------------------------------------
/megatron/data/t5_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/t5_dataset.py


--------------------------------------------------------------------------------
/megatron/data/test/test_indexed_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/test/test_indexed_dataset.py


--------------------------------------------------------------------------------
/megatron/data/test/test_preprocess_data.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/data/test/test_preprocess_data.sh


--------------------------------------------------------------------------------
/megatron/dist_signal_handler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/dist_signal_handler.py


--------------------------------------------------------------------------------
/megatron/fp16_deprecated/loss_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fp16_deprecated/loss_scaler.py


--------------------------------------------------------------------------------
/megatron/fused_kernels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/__init__.py


--------------------------------------------------------------------------------
/megatron/fused_kernels/compat.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/compat.h


--------------------------------------------------------------------------------
/megatron/fused_kernels/fused_weight_gradient_dense.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/fused_weight_gradient_dense.cpp


--------------------------------------------------------------------------------
/megatron/fused_kernels/fused_weight_gradient_dense.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/fused_weight_gradient_dense.cu


--------------------------------------------------------------------------------
/megatron/fused_kernels/layer_norm_cuda.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/layer_norm_cuda.cpp


--------------------------------------------------------------------------------
/megatron/fused_kernels/layer_norm_cuda_kernel.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/layer_norm_cuda_kernel.cu


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_masked_softmax.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_masked_softmax.cpp


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_masked_softmax.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_masked_softmax.h


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_masked_softmax_cuda.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_masked_softmax_cuda.cu


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_softmax.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_softmax.cpp


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_softmax_cuda.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_softmax_cuda.cu


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h


--------------------------------------------------------------------------------
/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu


--------------------------------------------------------------------------------
/megatron/fused_kernels/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/megatron/fused_kernels/tests/test_fused_kernels.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/tests/test_fused_kernels.py


--------------------------------------------------------------------------------
/megatron/fused_kernels/type_shim.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/fused_kernels/type_shim.h


--------------------------------------------------------------------------------
/megatron/global_vars.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/global_vars.py


--------------------------------------------------------------------------------
/megatron/indexer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/indexer.py


--------------------------------------------------------------------------------
/megatron/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/initialize.py


--------------------------------------------------------------------------------
/megatron/memory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/memory.py


--------------------------------------------------------------------------------
/megatron/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/metrics.py


--------------------------------------------------------------------------------
/megatron/microbatches.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/microbatches.py


--------------------------------------------------------------------------------
/megatron/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/__init__.py


--------------------------------------------------------------------------------
/megatron/model/bert_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/bert_model.py


--------------------------------------------------------------------------------
/megatron/model/biencoder_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/biencoder_model.py


--------------------------------------------------------------------------------
/megatron/model/classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/classification.py


--------------------------------------------------------------------------------
/megatron/model/distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/distributed.py


--------------------------------------------------------------------------------
/megatron/model/enums.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/enums.py


--------------------------------------------------------------------------------
/megatron/model/falcon_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/falcon_model.py


--------------------------------------------------------------------------------
/megatron/model/fused_bias_gelu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/fused_bias_gelu.py


--------------------------------------------------------------------------------
/megatron/model/fused_layer_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/fused_layer_norm.py


--------------------------------------------------------------------------------
/megatron/model/fused_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/fused_softmax.py


--------------------------------------------------------------------------------
/megatron/model/glu_activations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/glu_activations.py


--------------------------------------------------------------------------------
/megatron/model/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/gpt_model.py


--------------------------------------------------------------------------------
/megatron/model/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/language_model.py


--------------------------------------------------------------------------------
/megatron/model/llama_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/llama_model.py


--------------------------------------------------------------------------------
/megatron/model/mistral_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/mistral_model.py


--------------------------------------------------------------------------------
/megatron/model/module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/module.py


--------------------------------------------------------------------------------
/megatron/model/multiple_choice.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/multiple_choice.py


--------------------------------------------------------------------------------
/megatron/model/positional_embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/positional_embeddings.py


--------------------------------------------------------------------------------
/megatron/model/t5_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/t5_model.py


--------------------------------------------------------------------------------
/megatron/model/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/transformer.py


--------------------------------------------------------------------------------
/megatron/model/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/model/utils.py


--------------------------------------------------------------------------------
/megatron/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/megatron/mpu/tests/commons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/commons.py


--------------------------------------------------------------------------------
/megatron/mpu/tests/test_cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_cross_entropy.py


--------------------------------------------------------------------------------
/megatron/mpu/tests/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_data.py


--------------------------------------------------------------------------------
/megatron/mpu/tests/test_initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_initialize.py


--------------------------------------------------------------------------------
/megatron/mpu/tests/test_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_layers.py


--------------------------------------------------------------------------------
/megatron/mpu/tests/test_random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/mpu/tests/test_random.py


--------------------------------------------------------------------------------
/megatron/optimizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/__init__.py


--------------------------------------------------------------------------------
/megatron/optimizer/clip_grads.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/clip_grads.py


--------------------------------------------------------------------------------
/megatron/optimizer/distrib_optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/distrib_optimizer.py


--------------------------------------------------------------------------------
/megatron/optimizer/grad_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/grad_scaler.py


--------------------------------------------------------------------------------
/megatron/optimizer/optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer/optimizer.py


--------------------------------------------------------------------------------
/megatron/optimizer_param_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/optimizer_param_scheduler.py


--------------------------------------------------------------------------------
/megatron/p2p_communication.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/p2p_communication.py


--------------------------------------------------------------------------------
/megatron/schedules.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/schedules.py


--------------------------------------------------------------------------------
/megatron/static/index.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/static/index.html


--------------------------------------------------------------------------------
/megatron/text_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/__init__.py


--------------------------------------------------------------------------------
/megatron/text_generation/api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/api.py


--------------------------------------------------------------------------------
/megatron/text_generation/beam_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/beam_utils.py


--------------------------------------------------------------------------------
/megatron/text_generation/communication.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/communication.py


--------------------------------------------------------------------------------
/megatron/text_generation/forward_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/forward_step.py


--------------------------------------------------------------------------------
/megatron/text_generation/generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/generation.py


--------------------------------------------------------------------------------
/megatron/text_generation/sampling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/sampling.py


--------------------------------------------------------------------------------
/megatron/text_generation/tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation/tokenization.py


--------------------------------------------------------------------------------
/megatron/text_generation_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/text_generation_server.py


--------------------------------------------------------------------------------
/megatron/timers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/timers.py


--------------------------------------------------------------------------------
/megatron/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/__init__.py


--------------------------------------------------------------------------------
/megatron/tokenizer/bert_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/bert_tokenization.py


--------------------------------------------------------------------------------
/megatron/tokenizer/gpt2_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/gpt2_tokenization.py


--------------------------------------------------------------------------------
/megatron/tokenizer/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/tokenizer/tokenizer.py


--------------------------------------------------------------------------------
/megatron/training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/training.py


--------------------------------------------------------------------------------
/megatron/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/utils.py


--------------------------------------------------------------------------------
/megatron/wandb_logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/megatron/wandb_logger.py


--------------------------------------------------------------------------------
/pretrain_bert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/pretrain_bert.py


--------------------------------------------------------------------------------
/pretrain_ict.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/pretrain_ict.py


--------------------------------------------------------------------------------
/pretrain_t5.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/pretrain_t5.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/setup.py


--------------------------------------------------------------------------------
/tasks/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/data_utils.py


--------------------------------------------------------------------------------
/tasks/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/eval_utils.py


--------------------------------------------------------------------------------
/tasks/finetune_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/finetune_utils.py


--------------------------------------------------------------------------------
/tasks/glue/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/data.py


--------------------------------------------------------------------------------
/tasks/glue/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/finetune.py


--------------------------------------------------------------------------------
/tasks/glue/mnli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/mnli.py


--------------------------------------------------------------------------------
/tasks/glue/qqp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/glue/qqp.py


--------------------------------------------------------------------------------
/tasks/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/main.py


--------------------------------------------------------------------------------
/tasks/msdp/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/README.md


--------------------------------------------------------------------------------
/tasks/msdp/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/evaluate.py


--------------------------------------------------------------------------------
/tasks/msdp/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/main.py


--------------------------------------------------------------------------------
/tasks/msdp/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/metrics.py


--------------------------------------------------------------------------------
/tasks/msdp/preprocessing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/preprocessing.py


--------------------------------------------------------------------------------
/tasks/msdp/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/msdp/prompt.py


--------------------------------------------------------------------------------
/tasks/orqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/README.md


--------------------------------------------------------------------------------
/tasks/orqa/evaluate_orqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/evaluate_orqa.py


--------------------------------------------------------------------------------
/tasks/orqa/evaluate_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/evaluate_utils.py


--------------------------------------------------------------------------------
/tasks/orqa/supervised/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/supervised/data.py


--------------------------------------------------------------------------------
/tasks/orqa/supervised/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/supervised/eval_utils.py


--------------------------------------------------------------------------------
/tasks/orqa/supervised/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/supervised/finetune.py


--------------------------------------------------------------------------------
/tasks/orqa/unsupervised/nq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/unsupervised/nq.py


--------------------------------------------------------------------------------
/tasks/orqa/unsupervised/qa_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/unsupervised/qa_utils.py


--------------------------------------------------------------------------------
/tasks/orqa/unsupervised/tokenizers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/orqa/unsupervised/tokenizers.py


--------------------------------------------------------------------------------
/tasks/race/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/race/data.py


--------------------------------------------------------------------------------
/tasks/race/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/race/finetune.py


--------------------------------------------------------------------------------
/tasks/zeroshot_gpt/datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/zeroshot_gpt/datasets.py


--------------------------------------------------------------------------------
/tasks/zeroshot_gpt/detokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/zeroshot_gpt/detokenizer.py


--------------------------------------------------------------------------------
/tasks/zeroshot_gpt/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tasks/zeroshot_gpt/evaluate.py


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/conftest.py


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/pytest.ini


--------------------------------------------------------------------------------
/tests/tensor_parallel/test_cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_cross_entropy.py


--------------------------------------------------------------------------------
/tests/tensor_parallel/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_data.py


--------------------------------------------------------------------------------
/tests/tensor_parallel/test_mappings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_mappings.py


--------------------------------------------------------------------------------
/tests/tensor_parallel/test_random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_random.py


--------------------------------------------------------------------------------
/tests/tensor_parallel/test_tensor_parallel_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/tensor_parallel/test_tensor_parallel_utils.py


--------------------------------------------------------------------------------
/tests/test_activations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_activations.py


--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_basic.py


--------------------------------------------------------------------------------
/tests/test_layernorm_order.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_layernorm_order.py


--------------------------------------------------------------------------------
/tests/test_llama_weights.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_llama_weights.py


--------------------------------------------------------------------------------
/tests/test_parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_parallel_state.py


--------------------------------------------------------------------------------
/tests/test_utilities.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_utilities.py


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_utils.py


--------------------------------------------------------------------------------
/tests/test_wandb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tests/test_wandb.py


--------------------------------------------------------------------------------
/tools/checkpoint_loader_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/checkpoint_loader_megatron.py


--------------------------------------------------------------------------------
/tools/checkpoint_saver_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/checkpoint_saver_megatron.py


--------------------------------------------------------------------------------
/tools/checkpoint_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/checkpoint_util.py


--------------------------------------------------------------------------------
/tools/linter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/linter.py


--------------------------------------------------------------------------------
/tools/merge_datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/merge_datasets.py


--------------------------------------------------------------------------------
/tools/openwebtext/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/README.md


--------------------------------------------------------------------------------
/tools/openwebtext/add_id.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/add_id.py


--------------------------------------------------------------------------------
/tools/openwebtext/blacklist_urls.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/blacklist_urls.py


--------------------------------------------------------------------------------
/tools/openwebtext/cleanup_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/cleanup_dataset.py


--------------------------------------------------------------------------------
/tools/openwebtext/cleanup_fix_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/cleanup_fix_dataset.py


--------------------------------------------------------------------------------
/tools/openwebtext/filter_ngrams.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/filter_ngrams.py


--------------------------------------------------------------------------------
/tools/openwebtext/find_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/find_duplicates.py


--------------------------------------------------------------------------------
/tools/openwebtext/group_duplicate_url.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/group_duplicate_url.py


--------------------------------------------------------------------------------
/tools/openwebtext/merge_jsons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/merge_jsons.py


--------------------------------------------------------------------------------
/tools/openwebtext/remove_group_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/openwebtext/remove_group_duplicates.py


--------------------------------------------------------------------------------
/tools/preprocess_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/preprocess_data.py


--------------------------------------------------------------------------------
/tools/preprocess_instruct_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/preprocess_instruct_data.py


--------------------------------------------------------------------------------
/tools/push_to_hub.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/push_to_hub.py


--------------------------------------------------------------------------------
/tools/run_text_generation_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/run_text_generation_server.py


--------------------------------------------------------------------------------
/tools/text_generation_cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/tools/text_generation_cli.py


--------------------------------------------------------------------------------
/verify_correctness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/verify_correctness.py


--------------------------------------------------------------------------------
/weights_conversion/hf_to_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/hf_to_megatron.py


--------------------------------------------------------------------------------
/weights_conversion/megatron_to_hf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/megatron_to_hf.py


--------------------------------------------------------------------------------
/weights_conversion/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/utils/__init__.py


--------------------------------------------------------------------------------
/weights_conversion/utils/merge_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/utils/merge_llama.py


--------------------------------------------------------------------------------
/weights_conversion/utils/permute_qkv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epfLLM/Megatron-LLM/HEAD/weights_conversion/utils/permute_qkv.py


--------------------------------------------------------------------------------