├── .flake8 ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ ├── feature_request.md │ ├── question.md │ └── regression.md ├── actions │ ├── action.yml │ └── check-nvidia-sso-membership │ │ └── action.yml ├── copy-pr-bot.yaml ├── pull_request_template.md └── workflows │ ├── _build_test_publish_wheel.yml │ ├── _update_dependencies.yml │ ├── auto-assign-milestone.yml │ ├── auto-reminder-bot.yml │ ├── auto-swap-labels.yml │ ├── auto-update-copy-pr-bot.yml │ ├── build-test-publish-wheel.yml │ ├── check_api_backwards_compatibility_workflow.yml │ ├── cherry-pick-release-commit.yml │ ├── cicd-approve-test-queue.yml │ ├── cicd-main.yml │ ├── close-inactive-issue-pr.yml │ ├── community-bot.yml │ ├── copyright-check.yml │ ├── dependabot.yml │ ├── install-test.yml │ └── multi-approval-bot.yml ├── .gitignore ├── .gitlab-ci.yml ├── .gitlab ├── labeler-config.yml ├── scripts │ ├── build.sh │ ├── check_imports.py │ └── fetch-legacy-suite.sh └── stages │ ├── 00.pre.yml │ ├── 01.build.yml │ ├── 02.test.yml │ ├── 03.integration-tests.yml │ ├── 04.functional-tests.yml │ └── 05.publish.yml ├── .pre-commit-config.yaml ├── .pylintrc ├── .python-version ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docker ├── .ngc_version.dev ├── .ngc_version.lts ├── Dockerfile.ci.dev ├── Dockerfile.ci.nemo ├── Dockerfile.linting ├── common │ ├── install.sh │ └── install_source_wheels.sh └── patches │ └── deepep.patch ├── docs ├── api-backwards-compatibility-check.md ├── llama_mistral.md └── source │ ├── api-guide │ ├── context_parallel.rst │ ├── custom_fsdp.md │ ├── datasets.rst │ ├── dist_checkpointing.rst │ ├── dist_checkpointing.strategies.rst │ ├── dist_optimizer.md │ ├── distributed.rst │ ├── fusions.rst │ ├── index.rst │ ├── models.bert.rst │ ├── models.gpt.rst │ ├── models.rst │ ├── models.t5.rst │ ├── moe.rst │ ├── multi_latent_attention.rst │ ├── multi_token_prediction.md │ ├── num_microbatches_calculator.rst │ ├── optimizer_cpu_offload.rst │ ├── optimizer_param_scheduler.rst │ ├── pipeline_parallel.rst │ ├── pipeline_parallel_layout.md │ ├── tensor_parallel.rst │ ├── tokenizers.md │ └── transformer.rst │ ├── images │ ├── context_parallel │ │ ├── CP_overview.png │ │ └── CP_results.png │ ├── custom_fsdp │ │ ├── FSDP_Allreduce.png │ │ ├── FSDP_workflow.png │ │ └── MCore_Custom_FSDP_Class_Diagram.png │ ├── distrib_optimizer │ │ ├── data_flow.png │ │ └── sharding_scheme.png │ ├── moe │ │ └── token_drop.png │ └── multi_token_prediction │ │ └── MTP_implementation.png │ ├── index.rst │ └── user-guide │ └── index.rst ├── examples ├── __init__.py ├── academic_paper_scripts │ ├── detxoify_lm │ │ ├── README.md │ │ ├── annotations │ │ │ ├── filter-selfgeneration.py │ │ │ ├── perspective_api_annotate.py │ │ │ └── preprocess.sh │ │ ├── finetune_gpt.py │ │ ├── finetune_gpt_distributed-1.3b.sh │ │ ├── generate-1.3b.sh │ │ ├── generate_samples_gpt.py │ │ ├── perspective_api.py │ │ └── self_generation │ │ │ └── selfgenerate-1.3b-unconditional.sh │ ├── msdp │ │ ├── README.md │ │ ├── data_processing.sh │ │ ├── eval_knwl_generation.sh │ │ ├── eval_resp_generation.sh │ │ ├── prep_resp_gen.sh │ │ ├── prompt_knwl_gen.sh │ │ └── prompt_resp_gen.sh │ └── sc21 │ │ ├── CONFIG.sh │ │ ├── README.md │ │ ├── SBATCH.sh │ │ ├── SRUN.sh │ │ ├── run_figure_11.sh │ │ ├── run_figure_12.sh │ │ ├── run_figure_13.sh │ │ ├── run_figure_14.sh │ │ ├── run_figure_15.sh │ │ ├── run_figure_16.sh │ │ ├── run_figure_17.sh │ │ ├── run_figure_18.sh │ │ └── run_table_1.sh ├── bert │ ├── README.md │ └── train_bert_340m_distributed.sh ├── export │ ├── README.md │ └── trtllm_export │ │ ├── README.md │ │ ├── distributed_export │ │ └── gpt_distributed_gpu_export.py │ │ └── single_device_export │ │ └── gpt_single_device_cpu_export.py ├── gpt3 │ ├── README.md │ ├── gpt_config.yaml │ └── train_gpt3_175b_distributed.sh ├── inference │ ├── README.md │ ├── gpt │ │ ├── gpt_dynamic_inference.py │ │ ├── gpt_dynamic_inference_12b.sh │ │ ├── gpt_dynamic_inference_357m.sh │ │ ├── gpt_dynamic_inference_with_coordinator.py │ │ ├── gpt_static_inference.py │ │ └── utils.py │ ├── llama_mistral │ │ ├── huggingface_reference.py │ │ ├── run_static_inference_llama4_scout.sh │ │ ├── run_text_generation_llama3.1.sh │ │ ├── run_text_generation_llama3.sh │ │ └── run_text_generation_mistral.sh │ ├── run_text_generation_server_345M.sh │ ├── run_text_generation_server_345M_8_tensor_parallel.sh │ └── t5 │ │ └── simple_t5_batch_inference.py ├── llama │ ├── README.md │ └── train_llama3_8b_h100_fp8.sh ├── mamba │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── run_text_gen_server_8b.sh │ ├── run_text_gen_server_8b_gpt3.sh │ └── train.sh ├── mimo │ ├── __init__.py │ ├── avlm_inference.py │ ├── configs │ │ ├── llava_avlm.py │ │ ├── llava_vlm.py │ │ └── mock.py │ ├── data │ │ ├── __init__.py │ │ ├── avlm_sample_loader.py │ │ ├── energon_avlm_task_encoder.py │ │ ├── energon_vlm_task_encoder.py │ │ ├── mock.py │ │ ├── prepare_video_llava_data.py │ │ └── utils │ │ │ └── calculate_audio_tokens.py │ ├── model_providers │ │ ├── __init__.py │ │ ├── hf_clip_encoder.py │ │ ├── hf_whisper_encoder.py │ │ ├── llava_avlm.py │ │ ├── llava_vlm.py │ │ └── mock.py │ ├── scripts │ │ ├── run_avlm_train.sh │ │ ├── run_mock_train.sh │ │ ├── run_video_vlm_train.sh │ │ └── run_vlm_train.sh │ ├── train.py │ └── utils │ │ ├── __init__.py │ │ ├── data_helpers.py │ │ ├── logging.py │ │ └── model_helpers.py ├── mixtral │ ├── README.md │ └── train_mixtral_8x7b_distributed.sh ├── multimodal │ ├── Dockerfile │ ├── README.md │ ├── assets │ │ └── pretrain_curves.png │ ├── combine_lm_vision_checkpoints.sh │ ├── combine_state_dicts.py │ ├── config.py │ ├── convert_llava_pretrain_to_wds.py │ ├── dataloader_provider.py │ ├── dataset_helpers.py │ ├── energon_util.py │ ├── evaluation │ │ ├── evaluate_ai2d.py │ │ ├── evaluate_chartqa.py │ │ ├── evaluate_coco.py │ │ ├── evaluate_infovqa.py │ │ ├── evaluate_mathvista.py │ │ ├── evaluate_mmmu.py │ │ ├── evaluate_ocrbench.py │ │ ├── evaluate_ocrbench_v2.py │ │ ├── evaluate_rd_tablebench.py │ │ ├── evaluate_realworldqa.py │ │ ├── evaluate_spdocvqa.py │ │ ├── evaluate_textvqa.py │ │ ├── evaluate_video_motionbench.py │ │ ├── evaluate_video_mvbench.py │ │ ├── evaluate_video_phys_game_bench.py │ │ ├── evaluate_vqav2.py │ │ ├── evaluation_datasets.py │ │ └── mmmu_utils.py │ ├── image_processing.py │ ├── layer_scaling.py │ ├── layer_specs.py │ ├── llama_3p1_nemotron_nano_vl_8b_v1 │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── pretraining_llama_3p1_nemotron_nano_vl_8b_v1.sh │ │ ├── sft_llama_3p1_nemotron_nano_vl_8b_v1.sh │ │ └── text_generation.sh │ ├── manual_prompts.json │ ├── model.py │ ├── model_converter │ │ ├── clip_converter.py │ │ ├── internvit_converter.py │ │ ├── radio_converter.py │ │ ├── siglip_converter.py │ │ └── vision_model_tester.py │ ├── multimodal_args.py │ ├── nvlm │ │ ├── README.md │ │ ├── internvit.py │ │ ├── nvlm_prompts.json │ │ ├── pp_checkpoint_converter.py │ │ ├── pretrain_blend.yaml │ │ ├── pretrain_qwen20_72b_internvit_6b.sh │ │ ├── pretrain_yi_34b_internvit_6b.sh │ │ ├── run_text_generation_qwen20_72b_internvit_6b.sh │ │ ├── run_text_generation_qwen25_7b_internvit_video.sh │ │ ├── run_text_generation_qwen25_7b_siglip.sh │ │ ├── run_text_generation_yi_34b_internvit_6b.sh │ │ ├── sft_34b_internvit.sh │ │ ├── sft_blend.yaml │ │ ├── sft_qwen20_72b_internvit_6b.sh │ │ └── sft_qwen2p5_7b_internvit_6b_video.sh │ ├── pretrain_dataset.yaml │ ├── pretrain_mistral_clip.sh │ ├── radio │ │ └── radio_g.py │ ├── run_text_generation.py │ ├── sft_dataset.yaml │ ├── sft_mistral_clip.sh │ ├── text_generation_mistral_clip.sh │ └── train.py ├── post_training │ └── modelopt │ │ ├── .gitignore │ │ ├── ADVANCED.md │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── conf │ │ ├── Qwen │ │ │ ├── Qwen2.5-0.5B-Instruct.sh │ │ │ ├── Qwen2.5-7B-Instruct.sh │ │ │ ├── Qwen3-0.6B.sh │ │ │ ├── Qwen3-235B-A22B.sh │ │ │ ├── Qwen3-30B-A3B.sh │ │ │ └── Qwen3-8B.sh │ │ ├── arguments.sh │ │ ├── deepseek-ai │ │ │ ├── DeepSeek-R1.sh │ │ │ └── DeepSeek-V2-Lite.sh │ │ ├── meta-llama │ │ │ ├── Llama-3.1-8B-Instruct.sh │ │ │ ├── Llama-3.2-1B-Instruct.sh │ │ │ ├── Llama-4-Maverick-17B-128E-Instruct.sh │ │ │ └── Llama-4-Scout-17B-16E-Instruct.sh │ │ ├── moonshotai │ │ │ ├── Kimi-K2-Instruct.sh │ │ │ ├── kimi_k2_instruct.sh │ │ │ └── kimi_k2_instruct_export.sh │ │ ├── nvidia │ │ │ ├── NVIDIA-Nemotron-Nano-9B-v2-Base.sh │ │ │ ├── NVIDIA-Nemotron-Nano-9B-v2.sh │ │ │ ├── Nemotron-H-4B-Instruct.sh │ │ │ ├── Nemotron-H-8B-Base-8K.sh │ │ │ └── Nemotron-Mini-4B-Instruct.sh │ │ └── openai │ │ │ ├── gpt-oss-120b.sh │ │ │ └── gpt-oss-20b.sh │ │ ├── convert.sh │ │ ├── convert_model.py │ │ ├── eagle3.sh │ │ ├── export.py │ │ ├── export.sh │ │ ├── finetune.py │ │ ├── finetune.sh │ │ ├── generate.py │ │ ├── generate.sh │ │ ├── generation_server.sh │ │ ├── mmlu.py │ │ ├── mmlu.sh │ │ ├── offline_feature_extract.py │ │ ├── offline_feature_extract.sh │ │ ├── prune.py │ │ ├── prune.sh │ │ ├── quantize.py │ │ ├── quantize.sh │ │ ├── requirements.txt │ │ ├── requirements_ssm.txt │ │ ├── slurm │ │ ├── env_setup_template.sh │ │ └── sbatch.sh │ │ ├── speculative.md │ │ ├── validate.py │ │ └── validate.sh ├── retro │ ├── README.md │ ├── preprocess_data.sh │ └── train_retro_2b_distributed.sh ├── rl │ ├── README.md │ ├── environment_configs │ │ ├── dapo.yaml │ │ ├── default.yaml │ │ ├── gsm8k.yaml │ │ └── math.yaml │ └── environments │ │ ├── __init__.py │ │ ├── countdown │ │ ├── README.md │ │ ├── __init__.py │ │ ├── countdown.py │ │ └── countdown_agent.py │ │ └── math │ │ ├── __init__.py │ │ ├── aime_agent.py │ │ ├── bigmath_agent.py │ │ ├── dapo_agent.py │ │ ├── gsm8k_agent.py │ │ ├── math_agent.py │ │ └── openmath_agent.py ├── run_simple_mcore_train_loop.py └── t5 │ ├── README.md │ ├── t5_mcore_train_curve.png │ └── train_t5_220m_distributed.sh ├── gpt_builders.py ├── images ├── model_table.png ├── strong_scaling.png └── weak_scaling.png ├── mamba_builders.py ├── megatron ├── core │ ├── MSC_Integration.md │ ├── QuickStart.md │ ├── README.md │ ├── README_STRAGGLER.md │ ├── __init__.py │ ├── activations.py │ ├── config.py │ ├── config_logger.py │ ├── datasets │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── bert_dataset.py │ │ ├── blended_dataset.py │ │ ├── blended_megatron_dataset_builder.py │ │ ├── blended_megatron_dataset_config.py │ │ ├── gpt_dataset.py │ │ ├── helpers.cpp │ │ ├── helpers.py │ │ ├── indexed_dataset.py │ │ ├── masked_dataset.py │ │ ├── megatron_dataset.py │ │ ├── megatron_tokenizer.py │ │ ├── multimodal_dataset.py │ │ ├── object_storage_utils.py │ │ ├── readme.md │ │ ├── retro │ │ │ ├── __init__.py │ │ │ ├── config │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_embedders.py │ │ │ │ ├── config.py │ │ │ │ ├── gpt_chunk_datasets.py │ │ │ │ └── tokenizers.py │ │ │ ├── db │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── dataset.py │ │ │ │ └── utils.py │ │ │ ├── external_libs.py │ │ │ ├── index │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── factory.py │ │ │ │ ├── index.py │ │ │ │ ├── indexes │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── faiss_base.py │ │ │ │ │ └── faiss_par_add.py │ │ │ │ ├── utils.py │ │ │ │ └── validate.py │ │ │ ├── query │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt_chunk_dataset.py │ │ │ │ ├── multi_split_gpt_dataset.py │ │ │ │ ├── query.py │ │ │ │ ├── retro_dataset.py │ │ │ │ └── utils.py │ │ │ └── utils.py │ │ ├── t5_dataset.py │ │ ├── utils.py │ │ └── utils_s3.py │ ├── dist_checkpointing │ │ ├── __init__.py │ │ ├── core.py │ │ ├── dict_utils.py │ │ ├── exchange_utils.py │ │ ├── mapping.py │ │ ├── optimizer.py │ │ ├── serialization.py │ │ ├── state_dict_utils.py │ │ ├── strategies │ │ │ ├── __init__.py │ │ │ ├── async_utils.py │ │ │ ├── base.py │ │ │ ├── cached_metadata_filesystem_reader.py │ │ │ ├── checkpointable.py │ │ │ ├── common.py │ │ │ ├── filesystem_async.py │ │ │ ├── fully_parallel.py │ │ │ ├── resharding.py │ │ │ ├── state_dict_saver.py │ │ │ ├── tensorstore.py │ │ │ ├── torch.py │ │ │ ├── two_stage.py │ │ │ └── zarr.py │ │ ├── tensor_aware_state_dict.py │ │ ├── utils.py │ │ └── validation.py │ ├── distributed │ │ ├── README.md │ │ ├── __init__.py │ │ ├── data_parallel_base.py │ │ ├── distributed_data_parallel.py │ │ ├── distributed_data_parallel_config.py │ │ ├── finalize_model_grads.py │ │ ├── fsdp │ │ │ ├── __init__.py │ │ │ ├── mcore_fsdp_adapter.py │ │ │ └── src │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── megatron_fsdp │ │ │ │ ├── __init__.py │ │ │ │ ├── distributed_data_parallel_config.py │ │ │ │ ├── fully_shard.py │ │ │ │ ├── megatron_fsdp.py │ │ │ │ ├── package_info.py │ │ │ │ ├── param_and_grad_buffer.py │ │ │ │ ├── uneven_dtensor.py │ │ │ │ └── utils.py │ │ │ │ └── pyproject.toml │ │ ├── param_and_grad_buffer.py │ │ ├── reduce_scatter_with_fp32_accumulation.py │ │ ├── torch_fully_sharded_data_parallel.py │ │ └── torch_fully_sharded_data_parallel_config.py │ ├── energy_monitor.py │ ├── enums.py │ ├── export │ │ ├── __init__.py │ │ ├── data_type.py │ │ ├── export_config.py │ │ ├── model_type.py │ │ └── trtllm │ │ │ ├── __init__.py │ │ │ ├── engine_builder │ │ │ ├── __init__.py │ │ │ └── trtllm_engine_builder.py │ │ │ ├── model_to_trllm_mapping │ │ │ ├── __init__.py │ │ │ └── default_conversion_dict.py │ │ │ ├── trt_model_config.py │ │ │ ├── trt_model_type.py │ │ │ ├── trtllm_helper.py │ │ │ ├── trtllm_layers.py │ │ │ └── trtllm_weights_converter │ │ │ ├── __init__.py │ │ │ ├── distributed_trtllm_model_weights_converter.py │ │ │ ├── single_device_trtllm_model_weights_converter.py │ │ │ └── utils.py │ ├── extensions │ │ ├── __init__.py │ │ ├── kitchen.py │ │ ├── transformer_engine.py │ │ └── transformer_engine_spec_provider.py │ ├── fp4_utils.py │ ├── fp8_utils.py │ ├── full_cuda_graph.py │ ├── fusions │ │ ├── __init__.py │ │ ├── fused_bias_dropout.py │ │ ├── fused_bias_geglu.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_bias_swiglu.py │ │ ├── fused_cross_entropy.py │ │ ├── fused_indices_converter.py │ │ ├── fused_layer_norm.py │ │ ├── fused_mla_yarn_rope_apply.py │ │ ├── fused_pad_routing_map.py │ │ ├── fused_softmax.py │ │ └── fused_weighted_squared_relu.py │ ├── hyper_comm_grid.py │ ├── inference │ │ ├── __init__.py │ │ ├── async_stream.py │ │ ├── batch_dimensions_utils.py │ │ ├── common_inference_params.py │ │ ├── communication_utils.py │ │ ├── contexts │ │ │ ├── __init__.py │ │ │ ├── attention_context │ │ │ │ ├── mamba_metadata.py │ │ │ │ ├── metadata_base.py │ │ │ │ └── mha_metadata.py │ │ │ ├── base_context.py │ │ │ ├── dynamic_block_allocator.py │ │ │ ├── dynamic_context.py │ │ │ ├── fused_kv_append_kernel.py │ │ │ └── static_context.py │ │ ├── data_parallel_inference_coordinator.py │ │ ├── engines │ │ │ ├── __init__.py │ │ │ ├── abstract_engine.py │ │ │ ├── dynamic_engine.py │ │ │ ├── mcore_engine.py │ │ │ └── static_engine.py │ │ ├── headers.py │ │ ├── inference_client.py │ │ ├── inference_request.py │ │ ├── model_inference_wrappers │ │ │ ├── __init__.py │ │ │ ├── abstract_model_inference_wrapper.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ └── gpt_inference_wrapper.py │ │ │ ├── inference_wrapper_config.py │ │ │ ├── multimodal │ │ │ │ └── vlm_inference_wrapper.py │ │ │ └── t5 │ │ │ │ ├── __init__.py │ │ │ │ └── t5_inference_wrapper.py │ │ ├── sampling_params.py │ │ ├── scheduler.py │ │ ├── text_generation_controllers │ │ │ ├── __init__.py │ │ │ ├── encoder_decoder_text_generation_controller.py │ │ │ ├── simple_text_generation_controller.py │ │ │ ├── text_generation_controller.py │ │ │ └── vlm_text_generation_controller.py │ │ ├── text_generation_server │ │ │ ├── __init__.py │ │ │ ├── endpoints │ │ │ │ ├── common.py │ │ │ │ └── completions.py │ │ │ ├── run_mcore_engine.py │ │ │ ├── text_generation_server.py │ │ │ └── tokenization.py │ │ ├── unified_memory.py │ │ └── utils.py │ ├── inference_params.py │ ├── jit.py │ ├── model_parallel_config.py │ ├── models │ │ ├── T5 │ │ │ ├── __init__.py │ │ │ ├── t5_model.py │ │ │ └── t5_spec.py │ │ ├── __init__.py │ │ ├── backends.py │ │ ├── bert │ │ │ ├── __init__.py │ │ │ ├── bert_layer_specs.py │ │ │ ├── bert_lm_head.py │ │ │ ├── bert_model.py │ │ │ └── pooler.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── embeddings │ │ │ │ ├── __init__.py │ │ │ │ ├── language_model_embedding.py │ │ │ │ ├── relative_pos_embedding.py │ │ │ │ ├── rope_utils.py │ │ │ │ ├── rotary_pos_embedding.py │ │ │ │ └── yarn_rotary_pos_embedding.py │ │ │ ├── language_module │ │ │ │ ├── __init__.py │ │ │ │ └── language_module.py │ │ │ ├── model_chunk_schedule_plan.py │ │ │ └── vision_module │ │ │ │ ├── __init__.py │ │ │ │ └── vision_module.py │ │ ├── gpt │ │ │ ├── __init__.py │ │ │ ├── fine_grained_callables.py │ │ │ ├── gpt_layer_specs.py │ │ │ ├── gpt_model.py │ │ │ ├── heterogeneous │ │ │ │ └── heterogeneous_layer_specs.py │ │ │ └── moe_module_specs.py │ │ ├── huggingface │ │ │ ├── __init__.py │ │ │ ├── clip_model.py │ │ │ ├── module.py │ │ │ └── qwen_model.py │ │ ├── mamba │ │ │ ├── __init__.py │ │ │ ├── mamba_layer_specs.py │ │ │ └── mamba_model.py │ │ ├── mimo │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── config │ │ │ │ ├── __init__.py │ │ │ │ └── base_configs.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ │ └── submodules │ │ │ │ ├── audio.py │ │ │ │ ├── base.py │ │ │ │ └── vision.py │ │ ├── multimodal │ │ │ ├── __init__.py │ │ │ ├── context_parallel.py │ │ │ ├── llava_model.py │ │ │ └── llava_spec.py │ │ ├── retro │ │ │ ├── __init__.py │ │ │ ├── base_attention.py │ │ │ ├── config.py │ │ │ ├── decoder_attention.py │ │ │ ├── decoder_spec.py │ │ │ ├── encoder_attention.py │ │ │ ├── encoder_spec.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ └── vision │ │ │ ├── __init__.py │ │ │ ├── clip_vit_model.py │ │ │ ├── multimodal_projector.py │ │ │ ├── radio.py │ │ │ └── vit_layer_specs.py │ ├── msc_utils.py │ ├── nccl_allocator.py │ ├── num_microbatches_calculator.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── clip_grads.py │ │ ├── cpu_offloading │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ └── hybrid_optimizer.py │ │ ├── distrib_optimizer.py │ │ ├── grad_scaler.py │ │ ├── optimizer.py │ │ └── optimizer_config.py │ ├── optimizer_param_scheduler.py │ ├── package_info.py │ ├── packed_seq_params.py │ ├── parallel_state.py │ ├── pipeline_parallel │ │ ├── __init__.py │ │ ├── bridge_communicator.py │ │ ├── combined_1f1b.py │ │ ├── p2p_communication.py │ │ ├── schedules.py │ │ └── utils.py │ ├── post_training │ │ ├── __init__.py │ │ └── modelopt │ │ │ ├── __init__.py │ │ │ ├── gpt │ │ │ ├── __init__.py │ │ │ ├── model_specs.py │ │ │ └── state_dict_hooks.py │ │ │ ├── layers.py │ │ │ └── mamba │ │ │ ├── __init__.py │ │ │ └── model_specs.py │ ├── process_groups_config.py │ ├── quantization │ │ ├── __init__.py │ │ ├── quant_config.py │ │ └── utils.py │ ├── requirements.txt │ ├── rerun_state_machine.py │ ├── safe_globals.py │ ├── ssm │ │ ├── __init__.py │ │ ├── mamba_block.py │ │ ├── mamba_context_parallel.py │ │ ├── mamba_hybrid_layer_allocation.py │ │ ├── mamba_layer.py │ │ ├── mamba_mixer.py │ │ ├── mlp_layer.py │ │ └── triton_cache_manager.py │ ├── tensor_parallel │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── inference_layers.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ └── utils.py │ ├── timers.py │ ├── tokenizers │ │ ├── __init__.py │ │ ├── base_tokenizer.py │ │ ├── megatron_tokenizer.py │ │ └── text │ │ │ ├── __init__.py │ │ │ ├── libraries │ │ │ ├── __init__.py │ │ │ ├── abstract_tokenizer.py │ │ │ ├── bytelevel_tokenizer.py │ │ │ ├── chat_template.py │ │ │ ├── huggingface_tokenizer.py │ │ │ ├── megatron_hf_tokenizer.py │ │ │ ├── null_tokenizer.py │ │ │ ├── sentencepiece_tokenizer.py │ │ │ └── tiktoken_tokenizer.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── bert_tokenizer.py │ │ │ ├── default_tokenizer.py │ │ │ ├── gpt_tokenizer.py │ │ │ ├── mamba_tokenizer.py │ │ │ ├── retro_tokenizer.py │ │ │ └── t5_tokenizer.py │ │ │ ├── text_tokenizer.py │ │ │ └── utils │ │ │ └── build_tokenizer.py │ ├── transformer │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── cuda_graphs.py │ │ ├── custom_layers │ │ │ ├── __init__.py │ │ │ └── transformer_engine.py │ │ ├── dot_product_attention.py │ │ ├── enums.py │ │ ├── fsdp_dtensor_checkpoint.py │ │ ├── heterogeneous │ │ │ ├── heterogeneous_config.py │ │ │ └── linear_replacements.py │ │ ├── identity_op.py │ │ ├── mlp.py │ │ ├── module.py │ │ ├── moe │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── experts.py │ │ │ ├── fused_a2a.py │ │ │ ├── grouped_gemm_util.py │ │ │ ├── moe_layer.py │ │ │ ├── moe_utils.py │ │ │ ├── router.py │ │ │ ├── shared_experts.py │ │ │ ├── token_dispatcher.py │ │ │ └── upcycling_utils.py │ │ ├── multi_latent_attention.py │ │ ├── multi_token_prediction.py │ │ ├── pipeline_parallel_layer_layout.py │ │ ├── spec_utils.py │ │ ├── torch_layer_norm.py │ │ ├── torch_norm.py │ │ ├── transformer_block.py │ │ ├── transformer_config.py │ │ ├── transformer_layer.py │ │ └── utils.py │ └── utils.py ├── legacy │ ├── data │ │ ├── __init__.py │ │ ├── autoaugment.py │ │ ├── biencoder_dataset_utils.py │ │ ├── dataset_utils.py │ │ ├── ict_dataset.py │ │ ├── image_folder.py │ │ ├── multimodal_dataset.py │ │ ├── orqa_wiki_dataset.py │ │ ├── realm_dataset_utils.py │ │ ├── realm_index.py │ │ └── vit_dataset.py │ ├── fp16_deprecated │ │ └── loss_scaler.py │ ├── fused_kernels │ │ ├── __init__.py │ │ ├── compat.h │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_fused_kernels.py │ │ └── type_shim.h │ ├── indexer.py │ ├── model │ │ ├── __init__.py │ │ ├── bert_model.py │ │ ├── biencoder_model.py │ │ ├── classification.py │ │ ├── enums.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_layer_norm.py │ │ ├── fused_softmax.py │ │ ├── gpt_model.py │ │ ├── language_model.py │ │ ├── module.py │ │ ├── multiple_choice.py │ │ ├── realm_model.py │ │ ├── rms_norm.py │ │ ├── t5_model.py │ │ ├── transformer.py │ │ ├── utils.py │ │ └── vision │ │ │ ├── classification.py │ │ │ ├── dino.py │ │ │ ├── esvit_swin_backbone.py │ │ │ ├── inpainting.py │ │ │ ├── knn_monitor.py │ │ │ ├── mit_backbone.py │ │ │ ├── swin_backbone.py │ │ │ ├── utils.py │ │ │ └── vit_backbone.py │ └── mpu │ │ └── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py ├── post_training │ ├── __init__.py │ ├── arguments.py │ ├── checkpointing.py │ ├── docs │ │ └── distillation.md │ ├── generate.py │ ├── loss_func.py │ ├── model_builder.py │ ├── non_loss_data_func.py │ └── utils.py ├── rl │ ├── README.md │ ├── __init__.py │ ├── agent │ │ ├── __init__.py │ │ ├── api.py │ │ ├── pass_at_evaluation_agent.py │ │ ├── remote_agent.py │ │ ├── reward_only_agent.py │ │ └── weighted_multi_task.py │ ├── inference │ │ ├── __init__.py │ │ ├── api.py │ │ ├── chat_templates.py │ │ ├── inference_interface.py │ │ └── megatron.py │ ├── logging.py │ ├── rl_utils.py │ └── server │ │ ├── __init__.py │ │ ├── agent │ │ ├── __init__.py │ │ └── fastapi_env_server.py │ │ ├── api.py │ │ └── inference │ │ ├── __init__.py │ │ └── inference_interface_server.py └── training │ ├── __init__.py │ ├── arguments.py │ ├── async_utils.py │ ├── checkpointing.py │ ├── datasets │ ├── README.md │ ├── __init__.py │ ├── data_samplers.py │ ├── fim_dataset.py │ └── sft_dataset.py │ ├── dist_signal_handler.py │ ├── ft_integration.py │ ├── global_vars.py │ ├── initialize.py │ ├── inprocess_restart.py │ ├── log_handler.py │ ├── one_logger_utils.py │ ├── theoretical_memory_usage.py │ ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ ├── multimodal_tokenizer.py │ ├── sft_tokenizer.py │ └── tokenizer.py │ ├── training.py │ ├── utils.py │ ├── wandb_utils.py │ └── yaml_arguments.py ├── model_provider.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_ict.py ├── pretrain_mamba.py ├── pretrain_retro.py ├── pretrain_t5.py ├── pretrain_vision_classify.py ├── pretrain_vision_dino.py ├── pretrain_vision_inpaint.py ├── pretrain_vlm.py ├── pyproject.toml ├── scripts └── check_api_backwards_compatibility.py ├── setup.py ├── tasks ├── data_utils.py ├── eval_utils.py └── finetune_utils.py ├── tests ├── __init__.py ├── functional_tests │ ├── __init__.py │ ├── python_test_utils │ │ ├── __init__.py │ │ ├── common.py │ │ ├── conftest.py │ │ ├── get_test_results_from_tensorboard_logs.py │ │ ├── test_inference_regular_pipeline.py │ │ ├── test_optimizer_grads_match.py │ │ ├── test_pretraining_regular_pipeline.py │ │ └── test_pretraining_resume_checkpoint_pipeline.py │ ├── shell_test_utils │ │ ├── _run_training.sh │ │ ├── run_ci_test.sh │ │ └── start_interactive_job.sh │ └── test_cases │ │ ├── bert │ │ ├── bert_mcore_tp1_pp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp1_pp4_vp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp2_pp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp2_pp2_frozen_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp2_pp2_local_spec │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp2_pp2_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp2_pp2_resume_torch_dist_local_spec │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── bert_mcore_tp4_pp1 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ └── bert_release │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── common │ │ └── ckpt_converter │ │ │ ├── __main__.py │ │ │ └── model_config.yaml │ │ ├── gpt-nemo │ │ ├── bert-nemo_340m_mr_mbs2_gbs32_mcore_te_tp2_pp2_1N8G │ │ │ └── model_config.yaml │ │ ├── gemma2-nemo_2b_mr_mbs1_gbs8_mcore_te_tp4_pp1_cp1_1N8G │ │ │ └── model_config.yaml │ │ ├── llama3-nemo_8b_mr_mbs1_gbs8_mcore_te_8experts_tp2_ep2_pp2_dgx_a100_1N8G │ │ │ └── model_config.yaml │ │ ├── llama3-nemo_8b_mr_mbs4_gbs64_mcore_te_tp1_pp1_cp2_dgx_a100_1N8G │ │ │ └── model_config.yaml │ │ ├── mixtral-nemo_8x7b_mr_mbs1_gbs8_mcore_te_tp2_pp1_ep2_1N8G │ │ │ └── model_config.yaml │ │ └── t5-nemo_220m_mr_mbs4_gbs64_te_tp1_pp1_1N8G │ │ │ └── model_config.yaml │ │ ├── gpt │ │ ├── gpt3_15b_8t_release │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_15b_8t_release_sm │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_7b_tp1_pp4_memory_speed │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_7b_tp4_pp1_memory_speed │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_disable │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_enable │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_persistent_1 │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_persistent_2 │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_reshard │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_resume │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_resume_check_grads │ │ │ ├── README.md │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_reruns_transient │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp1_uniform_full_recompute │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_rope_embeddings │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_disable_bias_linear │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_sequence_parallel │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_swiglu │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxh100_dgxc.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_cp2_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_modelopt_distill_resume │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cp2_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_ddp_average_in_collective │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed │ │ │ └── golden_values_dev_dgxh100_dgxc.json │ │ ├── gpt3_mcore_te_tp2_pp2_mla │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_no_mmap_bin_files │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxh100_dgxc.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp2_fp16 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp2_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp4 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp1_pp4_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_uninstall_te │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp4_pp1 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp4_pp1_resume_torch │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp4_pp1_resume_torch_dist │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_b200_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap │ │ │ ├── model_config.yaml │ │ │ └── tp_comm_overlap_cfg.yaml │ │ ├── gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_b200_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_b200_mcore_tp4_cp2_mxfp8_tp_sp_cp │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_b200_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_nondet_tp1_pp1_fp8_no_model_parallel │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp1_pp1_bf16_baseline │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp1_pp1_fp8_no_model_parallel │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp1_pp2_fp8_pp │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap │ │ │ ├── model_config.yaml │ │ │ └── tp_comm_overlap_cfg.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp2_pp2_fp8_tp_pp_sp │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp2_pp2_native_fp8_tp_pp_sp │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_fsdp │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp4_cp2_native_fp8_tp_sp_cp_tp_overlap │ │ │ └── model_config.yaml │ │ ├── gpt3_weekly_dgx_h100_mcore_tp4_pp2_fp8_tp_pp │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation │ │ │ ├── cuda_graphs.py │ │ │ ├── cuda_graphs.sh │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp1_583m_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp8_pp1_583m_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch │ │ │ ├── README.md │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── model_config.yaml │ │ │ └── test_prompts.jsonl │ │ ├── gpt_static_inference_tp1_pp1_583m_cudagraphs │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ └── gpt_static_inference_tp1_pp1_583m_logitsmatch │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── hybrid │ │ ├── hybrid_dynamic_inference_tp1_pp1_dp8_583m │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── hybrid_static_inference_tp1_pp1_2B_cudagraphs │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ └── hybrid_static_inference_tp1_pp1_2B_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── mimo │ │ └── mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8 │ │ │ ├── golden_values_dev.json │ │ │ └── model_config.yaml │ │ ├── mixtral │ │ ├── deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release │ │ │ └── model_config.yaml │ │ ├── mixtral_8x22b_tp2pp8ep8vpp1_release │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── mixtral_8x7b_alltoall_tp2pp4ep4_release │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── mixtral_8x7b_alltoall_tp2pp4ep4_release_sm │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ └── mixtral_8x7b_tp1pp4ep8vpp8_release │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── moe │ │ ├── gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4 │ │ │ ├── golden_values_dev.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxh100_dgxc.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8 │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel │ │ │ ├── golden_values_dev.json │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ │ ├── golden_values_lts.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ ├── golden_values_lts_dgxa100_dracooci-ord.json │ │ │ ├── golden_values_lts_dgxa100_dracooci.json │ │ │ └── model_config.yaml │ │ ├── gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts │ │ │ ├── golden_values_dev.json │ │ │ ├── golden_values_lts.json │ │ │ └── model_config.yaml │ │ ├── gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ ├── gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_dev_dgxh100_dgxc.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ └── gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ └── model_config.yaml │ │ ├── multimodal-llava │ │ ├── multimodal_llava_mcore_te_tp1_pp1 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ └── multimodal_llava_mcore_te_tp4_sp_cp2 │ │ │ ├── golden_values_dev_dgx_a100.json │ │ │ ├── golden_values_dev_dgx_h100.json │ │ │ ├── golden_values_lts_dgx_a100.json │ │ │ └── model_config.yaml │ │ └── t5 │ │ ├── t5_11b_mcore_tp4_pp1 │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_te_tp1_pp1_vp1_resume_torch │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_te_tp2_pp1_vp1 │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_te_tp2_pp1_vp1_sequence_parallel │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_te_tp4_pp1 │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_te_tp4_pp1_resume_torch_dist │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_tp1_pp1_vp1 │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_tp1_pp1_vp1_resume_torch │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_tp2_pp1_vp1 │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_dev_dgxa100_dracooci-ord.json │ │ ├── golden_values_dev_dgxa100_dracooci.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_tp4_pp1 │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_mcore_tp4_pp1_resume_torch_dist │ │ ├── golden_values_dev_dgx_a100.json │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_release │ │ ├── golden_values_dev_dgx_h100.json │ │ ├── golden_values_lts_dgx_a100.json │ │ └── model_config.yaml │ │ ├── t5_weekly_mcore_te_tp2_pp1_vp1 │ │ └── golden_values_lts_dgx_a100.json │ │ └── t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel │ │ └── golden_values_lts_dgx_a100.json ├── test_utils │ ├── python_scripts │ │ ├── approve_merge_gate.py │ │ ├── auto_reminder.py │ │ ├── auto_reminder_github.py │ │ ├── check_status_of_main.py │ │ ├── dashboard.py │ │ ├── download_coverage_results.py │ │ ├── download_golden_values.py │ │ ├── download_unit_tests_dataset.py │ │ ├── generate_jet_trigger_job.py │ │ ├── generate_local_jobs.py │ │ ├── launch_jet_workload.py │ │ ├── launch_nemo_run_workload.py │ │ ├── notify.py │ │ ├── recipe_parser.py │ │ ├── swap_pr_labels.py │ │ └── wait_for_resources.py │ └── recipes │ │ ├── _build-mcore-dev.yaml │ │ ├── _build-mcore-lts.yaml │ │ ├── _build-nemo.yaml │ │ ├── bert.yaml │ │ ├── ckpt_converter.yaml │ │ ├── gpt-dynamic-inference-cuda-graphs.yaml │ │ ├── gpt-dynamic-inference-with-coordinator.yaml │ │ ├── gpt-dynamic-inference.yaml │ │ ├── gpt-grads.yaml │ │ ├── gpt-nemo.yaml │ │ ├── gpt-static-inference.yaml │ │ ├── gpt.yaml │ │ ├── mamba-dynamic-inference.yaml │ │ ├── mamba-static-inference.yaml │ │ ├── mamba.yaml │ │ ├── mimo.yaml │ │ ├── moe-dynamic-inference.yaml │ │ ├── moe-static-inference.yaml │ │ ├── moe.yaml │ │ ├── multimodal-llava.yaml │ │ ├── t5.yaml │ │ └── unit-tests.yaml └── unit_tests │ ├── __init__.py │ ├── a2a_overlap │ ├── test_schedule_chunk_1f1b.py │ ├── test_schedule_layer_1f1b.py │ └── utils.py │ ├── conftest.py │ ├── data │ ├── __init__.py │ ├── test_bin_reader.py │ ├── test_builder.py │ ├── test_fim_dataset.py │ ├── test_gpt_dataset.py │ ├── test_multimodal_dataset.py │ ├── test_preprocess_data.py │ └── test_preprocess_mmdata.py │ ├── dist_checkpointing │ ├── __init__.py │ ├── conftest.py │ ├── models │ │ ├── __init__.py │ │ ├── common.py │ │ ├── test_bert_model.py │ │ ├── test_gpt_model.py │ │ ├── test_mamba.py │ │ ├── test_mlp_glu.py │ │ ├── test_moe_experts.py │ │ └── test_t5_model.py │ ├── test_async_save.py │ ├── test_checkpointable.py │ ├── test_flattened_resharding.py │ ├── test_fp8.py │ ├── test_fully_parallel.py │ ├── test_global_metadata_reuse.py │ ├── test_local.py │ ├── test_mapping.py │ ├── test_msc.py │ ├── test_nonpersistent.py │ ├── test_optimizer.py │ ├── test_pipeline_parallel_layout.py │ ├── test_replication.py │ ├── test_safe_globals.py │ ├── test_serialization.py │ ├── test_strict.py │ ├── test_torch_dist.py │ └── utils.py │ ├── distributed │ ├── fsdp │ │ └── test_mfsdp_fully_shard.py │ ├── test_distributed_data_parallel.py │ ├── test_finalize_model_grads.py │ ├── test_grad_reduce_for_replicated_embedder.py │ ├── test_grad_sync_with_expert_parallel.py │ ├── test_mcore_fully_sharded_data_parallel.py │ ├── test_param_and_grad_buffer.py │ ├── test_reduce_scatter_with_fp32_accumulation.py │ └── test_torch_fully_sharded_parallel.py │ ├── export │ └── trtllm │ │ ├── __init__.py │ │ ├── test_distributed_fp8.py │ │ ├── test_single_device_fp8.py │ │ ├── test_trtllm_distributed_gpu_converter.py │ │ ├── test_trtllm_helper.py │ │ ├── test_trtllm_layers.py │ │ └── test_trtllm_single_device_converter.py │ ├── find_test_cases.py │ ├── fusions │ ├── test_bias_dropout_fusion.py │ ├── test_mla_yarn_rope_apply.py │ ├── test_swiglu_fusion.py │ ├── test_torch_softmax.py │ └── test_weighted_squared_relu_fusion.py │ ├── inference │ ├── __init__.py │ ├── contexts │ │ └── test_dynamic_context.py │ ├── engines │ │ ├── __init__.py │ │ ├── test_dynamic_engine.py │ │ └── test_static_engine.py │ ├── model_inference_wrappers │ │ ├── __init__.py │ │ ├── gpt │ │ │ └── test_gpt_inference_wrapper.py │ │ ├── t5 │ │ │ └── test_t5_inference_wrapper.py │ │ └── test_model_inference_wrapper_config.py │ ├── test_common_inference_params.py │ ├── test_communication_utils.py │ ├── test_data_parallel_inference_coordinator.py │ ├── test_flash_decode.py │ ├── test_inference_utils.py │ ├── test_scheduler.py │ ├── test_wandb_logging.py │ └── text_generation_controllers │ │ ├── __init__.py │ │ ├── test_encoder_decoder_text_generation_controller.py │ │ ├── test_simple_text_generation_controller.py │ │ └── test_vlm_text_generation_controller.py │ ├── models │ ├── __init__.py │ ├── test_base_embedding.py │ ├── test_bert_model.py │ ├── test_clip_vit_model.py │ ├── test_gpt_model.py │ ├── test_gpt_model_quantization.py │ ├── test_heterogeneous_gpt_model.py │ ├── test_llava_model.py │ ├── test_mamba_model.py │ ├── test_mimo_audio_submodules.py │ ├── test_mimo_embedding_alignment.py │ ├── test_mimo_model.py │ ├── test_mimo_submodules.py │ ├── test_multimodal_projector.py │ ├── test_radio_model.py │ └── test_t5_model.py │ ├── pipeline_parallel │ ├── __init__.py │ ├── test_bridge_communicator.py │ ├── test_helpers.py │ ├── test_pipeline_layout.py │ └── test_schedules.py │ ├── post_training │ ├── __init__.py │ └── test_modelopt_module_spec.py │ ├── run_ci_test.sh │ ├── ssm │ ├── test_mamba_block.py │ ├── test_mamba_context_parallel.py │ ├── test_mamba_hybrid_layer_allocation.py │ ├── test_mamba_layer.py │ └── test_mamba_mixer.py │ ├── tensor_parallel │ ├── __init__.py │ ├── test_cross_entropy.py │ ├── test_data.py │ ├── test_initialization.py │ ├── test_layers.py │ ├── test_mappings.py │ ├── test_random.py │ └── test_tensor_parallel_utils.py │ ├── test_api_backwards_compat_setup.py │ ├── test_basic.py │ ├── test_checkpointing.py │ ├── test_fp8_param.py │ ├── test_fp8_utils.py │ ├── test_hyper_comm_grid.py │ ├── test_imports.py │ ├── test_inference.py │ ├── test_local_multi_tensor_fns.py │ ├── test_model_configs.py │ ├── test_nccl_allocator.py │ ├── test_num_microbatches_calculator.py │ ├── test_optimizer.py │ ├── test_optimizer_cpu_offloading.py │ ├── test_optimizer_param_scheduler.py │ ├── test_parallel_state.py │ ├── test_process_groups_config.py │ ├── test_rl_utils.py │ ├── test_tokenizer.py │ ├── test_training.py │ ├── test_utilities.py │ ├── test_utils.py │ ├── tokenizers │ └── test_tokenizer.py │ ├── transformer │ ├── __init__.py │ ├── moe │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_a2a_token_dispatcher.py │ │ ├── test_aux_loss.py │ │ ├── test_grouped_mlp.py │ │ ├── test_moe_layer.py │ │ ├── test_moe_layer_discrepancy.py │ │ ├── test_multihot_indices_converter.py │ │ ├── test_routers.py │ │ ├── test_sequential_mlp.py │ │ ├── test_shared_experts.py │ │ ├── test_token_dispatcher.py │ │ └── test_upcycling.py │ ├── test_attention.py │ ├── test_attention_no_rope.py │ ├── test_attention_packed_seq.py │ ├── test_core_attention.py │ ├── test_cuda_graphs.py │ ├── test_full_cuda_graph.py │ ├── test_mlp.py │ ├── test_module.py │ ├── test_multi_latent_attention.py │ ├── test_multi_token_prediction.py │ ├── test_quantization_config.py │ ├── test_relative_attention.py │ ├── test_retro_attention.py │ ├── test_rope.py │ ├── test_spec_customization.py │ ├── test_submodule_callables.py │ ├── test_transformer_block.py │ ├── test_transformer_block_custom_pgs.py │ ├── test_transformer_layer.py │ └── test_utils.py │ └── utils │ └── test_experimental_log_once.py ├── tools ├── __init__.py ├── autoformat.sh ├── bert_embedding │ ├── __init__.py │ ├── dataset.py │ ├── embed.py │ ├── external_libs.py │ └── huggingface.py ├── check_copyright.py ├── checkpoint │ ├── checkpoint_inspector.py │ ├── convert.py │ ├── hybrid_conversion.py │ ├── loader_base.py │ ├── loader_core.py │ ├── loader_legacy.py │ ├── loader_llama_mistral.py │ ├── loader_llava.py │ ├── loader_mixtral_hf.py │ ├── saver_base.py │ ├── saver_core.py │ ├── saver_hf_llava.py │ ├── saver_legacy.py │ ├── saver_llava.py │ ├── schema_base.py │ ├── schema_core.py │ ├── schema_hf.py │ └── utils.py ├── copyright.sh ├── linter.py ├── merge_datasets.py ├── preprocess_data.py ├── preprocess_data_nmt.py ├── preprocess_mmdata.py ├── report_theoretical_memory.py ├── retro │ ├── README.md │ ├── build_db.md │ ├── cli │ │ ├── __init__.py │ │ ├── __main__.py │ │ └── cli.py │ ├── config_utils.py │ ├── docker │ │ └── Dockerfile │ ├── preprocess_data.py │ ├── sft │ │ ├── README.md │ │ ├── dataset_conv.py │ │ ├── open_inst.sh │ │ ├── sft_retro.py │ │ └── sft_retro_lm.sh │ └── text_generation │ │ ├── evaluate.py │ │ ├── metrics.py │ │ ├── retro_api.py │ │ ├── retro_generate.sh │ │ ├── retro_generation.py │ │ └── retro_text_generation.py ├── run_inference_performance_test.py ├── run_mamba_text_generation_server.py ├── run_mamba_text_generation_server_completions.py ├── run_text_generation_server.py ├── run_vlm_text_generation.py ├── text_generation_cli.py ├── upgrade_dependencies.sh └── wait_daemon.sh ├── train_rl.py └── uv.lock /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.flake8 -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/ISSUE_TEMPLATE/question.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/regression.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/ISSUE_TEMPLATE/regression.md -------------------------------------------------------------------------------- /.github/actions/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/actions/action.yml -------------------------------------------------------------------------------- /.github/actions/check-nvidia-sso-membership/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/actions/check-nvidia-sso-membership/action.yml -------------------------------------------------------------------------------- /.github/copy-pr-bot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/copy-pr-bot.yaml -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/workflows/_build_test_publish_wheel.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/_build_test_publish_wheel.yml -------------------------------------------------------------------------------- /.github/workflows/_update_dependencies.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/_update_dependencies.yml -------------------------------------------------------------------------------- /.github/workflows/auto-assign-milestone.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/auto-assign-milestone.yml -------------------------------------------------------------------------------- /.github/workflows/auto-reminder-bot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/auto-reminder-bot.yml -------------------------------------------------------------------------------- /.github/workflows/auto-swap-labels.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/auto-swap-labels.yml -------------------------------------------------------------------------------- /.github/workflows/auto-update-copy-pr-bot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/auto-update-copy-pr-bot.yml -------------------------------------------------------------------------------- /.github/workflows/build-test-publish-wheel.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/build-test-publish-wheel.yml -------------------------------------------------------------------------------- /.github/workflows/cherry-pick-release-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/cherry-pick-release-commit.yml -------------------------------------------------------------------------------- /.github/workflows/cicd-approve-test-queue.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/cicd-approve-test-queue.yml -------------------------------------------------------------------------------- /.github/workflows/cicd-main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/cicd-main.yml -------------------------------------------------------------------------------- /.github/workflows/close-inactive-issue-pr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/close-inactive-issue-pr.yml -------------------------------------------------------------------------------- /.github/workflows/community-bot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/community-bot.yml -------------------------------------------------------------------------------- /.github/workflows/copyright-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/copyright-check.yml -------------------------------------------------------------------------------- /.github/workflows/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/install-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/install-test.yml -------------------------------------------------------------------------------- /.github/workflows/multi-approval-bot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.github/workflows/multi-approval-bot.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab-ci.yml -------------------------------------------------------------------------------- /.gitlab/labeler-config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/labeler-config.yml -------------------------------------------------------------------------------- /.gitlab/scripts/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/scripts/build.sh -------------------------------------------------------------------------------- /.gitlab/scripts/check_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/scripts/check_imports.py -------------------------------------------------------------------------------- /.gitlab/scripts/fetch-legacy-suite.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/scripts/fetch-legacy-suite.sh -------------------------------------------------------------------------------- /.gitlab/stages/00.pre.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/stages/00.pre.yml -------------------------------------------------------------------------------- /.gitlab/stages/01.build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/stages/01.build.yml -------------------------------------------------------------------------------- /.gitlab/stages/02.test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/stages/02.test.yml -------------------------------------------------------------------------------- /.gitlab/stages/03.integration-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/stages/03.integration-tests.yml -------------------------------------------------------------------------------- /.gitlab/stages/04.functional-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/stages/04.functional-tests.yml -------------------------------------------------------------------------------- /.gitlab/stages/05.publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.gitlab/stages/05.publish.yml -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/.pylintrc -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/README.md -------------------------------------------------------------------------------- /docker/.ngc_version.dev: -------------------------------------------------------------------------------- 1 | nvcr.io/nvidia/pytorch:25.09-py3 -------------------------------------------------------------------------------- /docker/.ngc_version.lts: -------------------------------------------------------------------------------- 1 | nvcr.io/nvidia/pytorch:25.09-py3 -------------------------------------------------------------------------------- /docker/Dockerfile.ci.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docker/Dockerfile.ci.dev -------------------------------------------------------------------------------- /docker/Dockerfile.ci.nemo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docker/Dockerfile.ci.nemo -------------------------------------------------------------------------------- /docker/Dockerfile.linting: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docker/Dockerfile.linting -------------------------------------------------------------------------------- /docker/common/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docker/common/install.sh -------------------------------------------------------------------------------- /docker/common/install_source_wheels.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docker/common/install_source_wheels.sh -------------------------------------------------------------------------------- /docker/patches/deepep.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docker/patches/deepep.patch -------------------------------------------------------------------------------- /docs/api-backwards-compatibility-check.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/api-backwards-compatibility-check.md -------------------------------------------------------------------------------- /docs/llama_mistral.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/llama_mistral.md -------------------------------------------------------------------------------- /docs/source/api-guide/context_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/context_parallel.rst -------------------------------------------------------------------------------- /docs/source/api-guide/custom_fsdp.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/custom_fsdp.md -------------------------------------------------------------------------------- /docs/source/api-guide/datasets.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/datasets.rst -------------------------------------------------------------------------------- /docs/source/api-guide/dist_checkpointing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/dist_checkpointing.rst -------------------------------------------------------------------------------- /docs/source/api-guide/dist_optimizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/dist_optimizer.md -------------------------------------------------------------------------------- /docs/source/api-guide/distributed.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/distributed.rst -------------------------------------------------------------------------------- /docs/source/api-guide/fusions.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/fusions.rst -------------------------------------------------------------------------------- /docs/source/api-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/index.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.bert.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/models.bert.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.gpt.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/models.gpt.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/models.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.t5.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/models.t5.rst -------------------------------------------------------------------------------- /docs/source/api-guide/moe.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/moe.rst -------------------------------------------------------------------------------- /docs/source/api-guide/multi_latent_attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/multi_latent_attention.rst -------------------------------------------------------------------------------- /docs/source/api-guide/multi_token_prediction.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/multi_token_prediction.md -------------------------------------------------------------------------------- /docs/source/api-guide/num_microbatches_calculator.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/num_microbatches_calculator.rst -------------------------------------------------------------------------------- /docs/source/api-guide/optimizer_cpu_offload.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/optimizer_cpu_offload.rst -------------------------------------------------------------------------------- /docs/source/api-guide/optimizer_param_scheduler.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/optimizer_param_scheduler.rst -------------------------------------------------------------------------------- /docs/source/api-guide/pipeline_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/pipeline_parallel.rst -------------------------------------------------------------------------------- /docs/source/api-guide/pipeline_parallel_layout.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/pipeline_parallel_layout.md -------------------------------------------------------------------------------- /docs/source/api-guide/tensor_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/tensor_parallel.rst -------------------------------------------------------------------------------- /docs/source/api-guide/tokenizers.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/tokenizers.md -------------------------------------------------------------------------------- /docs/source/api-guide/transformer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/api-guide/transformer.rst -------------------------------------------------------------------------------- /docs/source/images/context_parallel/CP_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/images/context_parallel/CP_overview.png -------------------------------------------------------------------------------- /docs/source/images/context_parallel/CP_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/images/context_parallel/CP_results.png -------------------------------------------------------------------------------- /docs/source/images/custom_fsdp/FSDP_Allreduce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/images/custom_fsdp/FSDP_Allreduce.png -------------------------------------------------------------------------------- /docs/source/images/custom_fsdp/FSDP_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/images/custom_fsdp/FSDP_workflow.png -------------------------------------------------------------------------------- /docs/source/images/distrib_optimizer/data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/images/distrib_optimizer/data_flow.png -------------------------------------------------------------------------------- /docs/source/images/moe/token_drop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/images/moe/token_drop.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/user-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/docs/source/user-guide/index.rst -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/detxoify_lm/README.md -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/msdp/README.md -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/sc21/CONFIG.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/sc21/README.md -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/sc21/SBATCH.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/sc21/SRUN.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/academic_paper_scripts/sc21/run_table_1.sh -------------------------------------------------------------------------------- /examples/bert/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/bert/README.md -------------------------------------------------------------------------------- /examples/bert/train_bert_340m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/bert/train_bert_340m_distributed.sh -------------------------------------------------------------------------------- /examples/export/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/export/README.md -------------------------------------------------------------------------------- /examples/export/trtllm_export/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/export/trtllm_export/README.md -------------------------------------------------------------------------------- /examples/gpt3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/gpt3/README.md -------------------------------------------------------------------------------- /examples/gpt3/gpt_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/gpt3/gpt_config.yaml -------------------------------------------------------------------------------- /examples/gpt3/train_gpt3_175b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/gpt3/train_gpt3_175b_distributed.sh -------------------------------------------------------------------------------- /examples/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/README.md -------------------------------------------------------------------------------- /examples/inference/gpt/gpt_dynamic_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/gpt/gpt_dynamic_inference.py -------------------------------------------------------------------------------- /examples/inference/gpt/gpt_dynamic_inference_12b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/gpt/gpt_dynamic_inference_12b.sh -------------------------------------------------------------------------------- /examples/inference/gpt/gpt_dynamic_inference_357m.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/gpt/gpt_dynamic_inference_357m.sh -------------------------------------------------------------------------------- /examples/inference/gpt/gpt_static_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/gpt/gpt_static_inference.py -------------------------------------------------------------------------------- /examples/inference/gpt/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/gpt/utils.py -------------------------------------------------------------------------------- /examples/inference/t5/simple_t5_batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/inference/t5/simple_t5_batch_inference.py -------------------------------------------------------------------------------- /examples/llama/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/llama/README.md -------------------------------------------------------------------------------- /examples/llama/train_llama3_8b_h100_fp8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/llama/train_llama3_8b_h100_fp8.sh -------------------------------------------------------------------------------- /examples/mamba/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mamba/.gitignore -------------------------------------------------------------------------------- /examples/mamba/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mamba/Dockerfile -------------------------------------------------------------------------------- /examples/mamba/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mamba/README.md -------------------------------------------------------------------------------- /examples/mamba/run_text_gen_server_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mamba/run_text_gen_server_8b.sh -------------------------------------------------------------------------------- /examples/mamba/run_text_gen_server_8b_gpt3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mamba/run_text_gen_server_8b_gpt3.sh -------------------------------------------------------------------------------- /examples/mamba/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mamba/train.sh -------------------------------------------------------------------------------- /examples/mimo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mimo/avlm_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/avlm_inference.py -------------------------------------------------------------------------------- /examples/mimo/configs/llava_avlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/configs/llava_avlm.py -------------------------------------------------------------------------------- /examples/mimo/configs/llava_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/configs/llava_vlm.py -------------------------------------------------------------------------------- /examples/mimo/configs/mock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/configs/mock.py -------------------------------------------------------------------------------- /examples/mimo/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/__init__.py -------------------------------------------------------------------------------- /examples/mimo/data/avlm_sample_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/avlm_sample_loader.py -------------------------------------------------------------------------------- /examples/mimo/data/energon_avlm_task_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/energon_avlm_task_encoder.py -------------------------------------------------------------------------------- /examples/mimo/data/energon_vlm_task_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/energon_vlm_task_encoder.py -------------------------------------------------------------------------------- /examples/mimo/data/mock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/mock.py -------------------------------------------------------------------------------- /examples/mimo/data/prepare_video_llava_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/prepare_video_llava_data.py -------------------------------------------------------------------------------- /examples/mimo/data/utils/calculate_audio_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/data/utils/calculate_audio_tokens.py -------------------------------------------------------------------------------- /examples/mimo/model_providers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mimo/model_providers/hf_clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/model_providers/hf_clip_encoder.py -------------------------------------------------------------------------------- /examples/mimo/model_providers/hf_whisper_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/model_providers/hf_whisper_encoder.py -------------------------------------------------------------------------------- /examples/mimo/model_providers/llava_avlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/model_providers/llava_avlm.py -------------------------------------------------------------------------------- /examples/mimo/model_providers/llava_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/model_providers/llava_vlm.py -------------------------------------------------------------------------------- /examples/mimo/model_providers/mock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/model_providers/mock.py -------------------------------------------------------------------------------- /examples/mimo/scripts/run_avlm_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/scripts/run_avlm_train.sh -------------------------------------------------------------------------------- /examples/mimo/scripts/run_mock_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/scripts/run_mock_train.sh -------------------------------------------------------------------------------- /examples/mimo/scripts/run_video_vlm_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/scripts/run_video_vlm_train.sh -------------------------------------------------------------------------------- /examples/mimo/scripts/run_vlm_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/scripts/run_vlm_train.sh -------------------------------------------------------------------------------- /examples/mimo/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/train.py -------------------------------------------------------------------------------- /examples/mimo/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mimo/utils/data_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/utils/data_helpers.py -------------------------------------------------------------------------------- /examples/mimo/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/utils/logging.py -------------------------------------------------------------------------------- /examples/mimo/utils/model_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mimo/utils/model_helpers.py -------------------------------------------------------------------------------- /examples/mixtral/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mixtral/README.md -------------------------------------------------------------------------------- /examples/mixtral/train_mixtral_8x7b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/mixtral/train_mixtral_8x7b_distributed.sh -------------------------------------------------------------------------------- /examples/multimodal/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/Dockerfile -------------------------------------------------------------------------------- /examples/multimodal/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/README.md -------------------------------------------------------------------------------- /examples/multimodal/assets/pretrain_curves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/assets/pretrain_curves.png -------------------------------------------------------------------------------- /examples/multimodal/combine_lm_vision_checkpoints.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/combine_lm_vision_checkpoints.sh -------------------------------------------------------------------------------- /examples/multimodal/combine_state_dicts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/combine_state_dicts.py -------------------------------------------------------------------------------- /examples/multimodal/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/config.py -------------------------------------------------------------------------------- /examples/multimodal/convert_llava_pretrain_to_wds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/convert_llava_pretrain_to_wds.py -------------------------------------------------------------------------------- /examples/multimodal/dataloader_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/dataloader_provider.py -------------------------------------------------------------------------------- /examples/multimodal/dataset_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/dataset_helpers.py -------------------------------------------------------------------------------- /examples/multimodal/energon_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/energon_util.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_ai2d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_ai2d.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_chartqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_chartqa.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_coco.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_infovqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_infovqa.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_mathvista.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_mathvista.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_mmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_mmmu.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_ocrbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_ocrbench.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_spdocvqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_spdocvqa.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_textvqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_textvqa.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/evaluate_vqav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/evaluate_vqav2.py -------------------------------------------------------------------------------- /examples/multimodal/evaluation/mmmu_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/evaluation/mmmu_utils.py -------------------------------------------------------------------------------- /examples/multimodal/image_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/image_processing.py -------------------------------------------------------------------------------- /examples/multimodal/layer_scaling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/layer_scaling.py -------------------------------------------------------------------------------- /examples/multimodal/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/layer_specs.py -------------------------------------------------------------------------------- /examples/multimodal/manual_prompts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/manual_prompts.json -------------------------------------------------------------------------------- /examples/multimodal/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/model.py -------------------------------------------------------------------------------- /examples/multimodal/multimodal_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/multimodal_args.py -------------------------------------------------------------------------------- /examples/multimodal/nvlm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/README.md -------------------------------------------------------------------------------- /examples/multimodal/nvlm/internvit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/internvit.py -------------------------------------------------------------------------------- /examples/multimodal/nvlm/nvlm_prompts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/nvlm_prompts.json -------------------------------------------------------------------------------- /examples/multimodal/nvlm/pp_checkpoint_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/pp_checkpoint_converter.py -------------------------------------------------------------------------------- /examples/multimodal/nvlm/pretrain_blend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/pretrain_blend.yaml -------------------------------------------------------------------------------- /examples/multimodal/nvlm/sft_34b_internvit.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/sft_34b_internvit.sh -------------------------------------------------------------------------------- /examples/multimodal/nvlm/sft_blend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/nvlm/sft_blend.yaml -------------------------------------------------------------------------------- /examples/multimodal/pretrain_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/pretrain_dataset.yaml -------------------------------------------------------------------------------- /examples/multimodal/pretrain_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/pretrain_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/radio/radio_g.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/radio/radio_g.py -------------------------------------------------------------------------------- /examples/multimodal/run_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/run_text_generation.py -------------------------------------------------------------------------------- /examples/multimodal/sft_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/sft_dataset.yaml -------------------------------------------------------------------------------- /examples/multimodal/sft_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/sft_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/text_generation_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/text_generation_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/multimodal/train.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/.gitignore: -------------------------------------------------------------------------------- 1 | !slurm* 2 | -------------------------------------------------------------------------------- /examples/post_training/modelopt/ADVANCED.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/ADVANCED.md -------------------------------------------------------------------------------- /examples/post_training/modelopt/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/Dockerfile -------------------------------------------------------------------------------- /examples/post_training/modelopt/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/README.md -------------------------------------------------------------------------------- /examples/post_training/modelopt/conf/arguments.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/conf/arguments.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base.sh: -------------------------------------------------------------------------------- 1 | NVIDIA-Nemotron-Nano-9B-v2.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/convert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/convert.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/convert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/convert_model.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/eagle3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/eagle3.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/export.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/export.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/export.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/finetune.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/finetune.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/generate.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/generate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/generate.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/generation_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/generation_server.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/mmlu.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/mmlu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/mmlu.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/prune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/prune.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/prune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/prune.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/quantize.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/quantize.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/quantize.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/requirements.txt -------------------------------------------------------------------------------- /examples/post_training/modelopt/requirements_ssm.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/requirements_ssm.txt -------------------------------------------------------------------------------- /examples/post_training/modelopt/slurm/sbatch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/slurm/sbatch.sh -------------------------------------------------------------------------------- /examples/post_training/modelopt/speculative.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/speculative.md -------------------------------------------------------------------------------- /examples/post_training/modelopt/validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/validate.py -------------------------------------------------------------------------------- /examples/post_training/modelopt/validate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/post_training/modelopt/validate.sh -------------------------------------------------------------------------------- /examples/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/retro/README.md -------------------------------------------------------------------------------- /examples/retro/preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/retro/preprocess_data.sh -------------------------------------------------------------------------------- /examples/retro/train_retro_2b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/retro/train_retro_2b_distributed.sh -------------------------------------------------------------------------------- /examples/rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/README.md -------------------------------------------------------------------------------- /examples/rl/environment_configs/dapo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environment_configs/dapo.yaml -------------------------------------------------------------------------------- /examples/rl/environment_configs/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environment_configs/default.yaml -------------------------------------------------------------------------------- /examples/rl/environment_configs/gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environment_configs/gsm8k.yaml -------------------------------------------------------------------------------- /examples/rl/environment_configs/math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environment_configs/math.yaml -------------------------------------------------------------------------------- /examples/rl/environments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/rl/environments/countdown/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/countdown/README.md -------------------------------------------------------------------------------- /examples/rl/environments/countdown/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/rl/environments/countdown/countdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/countdown/countdown.py -------------------------------------------------------------------------------- /examples/rl/environments/math/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/rl/environments/math/aime_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/math/aime_agent.py -------------------------------------------------------------------------------- /examples/rl/environments/math/bigmath_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/math/bigmath_agent.py -------------------------------------------------------------------------------- /examples/rl/environments/math/dapo_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/math/dapo_agent.py -------------------------------------------------------------------------------- /examples/rl/environments/math/gsm8k_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/math/gsm8k_agent.py -------------------------------------------------------------------------------- /examples/rl/environments/math/math_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/math/math_agent.py -------------------------------------------------------------------------------- /examples/rl/environments/math/openmath_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/rl/environments/math/openmath_agent.py -------------------------------------------------------------------------------- /examples/run_simple_mcore_train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/run_simple_mcore_train_loop.py -------------------------------------------------------------------------------- /examples/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/t5/README.md -------------------------------------------------------------------------------- /examples/t5/t5_mcore_train_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/t5/t5_mcore_train_curve.png -------------------------------------------------------------------------------- /examples/t5/train_t5_220m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/examples/t5/train_t5_220m_distributed.sh -------------------------------------------------------------------------------- /gpt_builders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/gpt_builders.py -------------------------------------------------------------------------------- /images/model_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/images/model_table.png -------------------------------------------------------------------------------- /images/strong_scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/images/strong_scaling.png -------------------------------------------------------------------------------- /images/weak_scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/images/weak_scaling.png -------------------------------------------------------------------------------- /mamba_builders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/mamba_builders.py -------------------------------------------------------------------------------- /megatron/core/MSC_Integration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/MSC_Integration.md -------------------------------------------------------------------------------- /megatron/core/QuickStart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/QuickStart.md -------------------------------------------------------------------------------- /megatron/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/README.md -------------------------------------------------------------------------------- /megatron/core/README_STRAGGLER.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/README_STRAGGLER.md -------------------------------------------------------------------------------- /megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/__init__.py -------------------------------------------------------------------------------- /megatron/core/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/activations.py -------------------------------------------------------------------------------- /megatron/core/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/config.py -------------------------------------------------------------------------------- /megatron/core/config_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/config_logger.py -------------------------------------------------------------------------------- /megatron/core/datasets/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/Makefile -------------------------------------------------------------------------------- /megatron/core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/datasets/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/bert_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/blended_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/gpt_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/helpers.cpp -------------------------------------------------------------------------------- /megatron/core/datasets/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/helpers.py -------------------------------------------------------------------------------- /megatron/core/datasets/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/indexed_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/masked_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/masked_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/megatron_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/megatron_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/megatron_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/megatron_tokenizer.py -------------------------------------------------------------------------------- /megatron/core/datasets/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/multimodal_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/object_storage_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/object_storage_utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/readme.md -------------------------------------------------------------------------------- /megatron/core/datasets/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/config/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/config/config.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/config/tokenizers.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/db/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/db/build.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/db/dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/db/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/external_libs.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/index/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/index/build.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/index/factory.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/index/index.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/index/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/index/validate.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/query/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/query/query.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/retro_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/query/retro_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/query/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/retro/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/t5_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/utils_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/datasets/utils_s3.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/core.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/dict_utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/exchange_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/exchange_utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/mapping.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/optimizer.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/serialization.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/state_dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/state_dict_utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/strategies/base.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/strategies/torch.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/zarr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/strategies/zarr.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/dist_checkpointing/validation.py -------------------------------------------------------------------------------- /megatron/core/distributed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/README.md -------------------------------------------------------------------------------- /megatron/core/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/__init__.py -------------------------------------------------------------------------------- /megatron/core/distributed/data_parallel_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/data_parallel_base.py -------------------------------------------------------------------------------- /megatron/core/distributed/finalize_model_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/finalize_model_grads.py -------------------------------------------------------------------------------- /megatron/core/distributed/fsdp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/fsdp/__init__.py -------------------------------------------------------------------------------- /megatron/core/distributed/fsdp/mcore_fsdp_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py -------------------------------------------------------------------------------- /megatron/core/distributed/fsdp/src/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/fsdp/src/README.md -------------------------------------------------------------------------------- /megatron/core/distributed/fsdp/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/fsdp/src/__init__.py -------------------------------------------------------------------------------- /megatron/core/distributed/fsdp/src/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/fsdp/src/pyproject.toml -------------------------------------------------------------------------------- /megatron/core/distributed/param_and_grad_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/distributed/param_and_grad_buffer.py -------------------------------------------------------------------------------- /megatron/core/energy_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/energy_monitor.py -------------------------------------------------------------------------------- /megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/enums.py -------------------------------------------------------------------------------- /megatron/core/export/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/__init__.py -------------------------------------------------------------------------------- /megatron/core/export/data_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/data_type.py -------------------------------------------------------------------------------- /megatron/core/export/export_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/export_config.py -------------------------------------------------------------------------------- /megatron/core/export/model_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/model_type.py -------------------------------------------------------------------------------- /megatron/core/export/trtllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/trtllm/__init__.py -------------------------------------------------------------------------------- /megatron/core/export/trtllm/trt_model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/trtllm/trt_model_config.py -------------------------------------------------------------------------------- /megatron/core/export/trtllm/trt_model_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/trtllm/trt_model_type.py -------------------------------------------------------------------------------- /megatron/core/export/trtllm/trtllm_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/trtllm/trtllm_helper.py -------------------------------------------------------------------------------- /megatron/core/export/trtllm/trtllm_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/export/trtllm/trtllm_layers.py -------------------------------------------------------------------------------- /megatron/core/extensions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/extensions/kitchen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/extensions/kitchen.py -------------------------------------------------------------------------------- /megatron/core/extensions/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/extensions/transformer_engine.py -------------------------------------------------------------------------------- /megatron/core/fp4_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fp4_utils.py -------------------------------------------------------------------------------- /megatron/core/fp8_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fp8_utils.py -------------------------------------------------------------------------------- /megatron/core/full_cuda_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/full_cuda_graph.py -------------------------------------------------------------------------------- /megatron/core/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_bias_dropout.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_geglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_bias_geglu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_swiglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_bias_swiglu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_indices_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_indices_converter.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_mla_yarn_rope_apply.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_mla_yarn_rope_apply.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_pad_routing_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_pad_routing_map.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_softmax.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_weighted_squared_relu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/fusions/fused_weighted_squared_relu.py -------------------------------------------------------------------------------- /megatron/core/hyper_comm_grid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/hyper_comm_grid.py -------------------------------------------------------------------------------- /megatron/core/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/__init__.py -------------------------------------------------------------------------------- /megatron/core/inference/async_stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/async_stream.py -------------------------------------------------------------------------------- /megatron/core/inference/batch_dimensions_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/batch_dimensions_utils.py -------------------------------------------------------------------------------- /megatron/core/inference/common_inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/common_inference_params.py -------------------------------------------------------------------------------- /megatron/core/inference/communication_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/communication_utils.py -------------------------------------------------------------------------------- /megatron/core/inference/contexts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/contexts/__init__.py -------------------------------------------------------------------------------- /megatron/core/inference/contexts/base_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/contexts/base_context.py -------------------------------------------------------------------------------- /megatron/core/inference/contexts/dynamic_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/contexts/dynamic_context.py -------------------------------------------------------------------------------- /megatron/core/inference/contexts/static_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/contexts/static_context.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/engines/__init__.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/abstract_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/engines/abstract_engine.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/dynamic_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/engines/dynamic_engine.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/mcore_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/engines/mcore_engine.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/static_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/engines/static_engine.py -------------------------------------------------------------------------------- /megatron/core/inference/headers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/headers.py -------------------------------------------------------------------------------- /megatron/core/inference/inference_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/inference_client.py -------------------------------------------------------------------------------- /megatron/core/inference/inference_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/inference_request.py -------------------------------------------------------------------------------- /megatron/core/inference/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/sampling_params.py -------------------------------------------------------------------------------- /megatron/core/inference/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/scheduler.py -------------------------------------------------------------------------------- /megatron/core/inference/unified_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/unified_memory.py -------------------------------------------------------------------------------- /megatron/core/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference/utils.py -------------------------------------------------------------------------------- /megatron/core/inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/inference_params.py -------------------------------------------------------------------------------- /megatron/core/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/jit.py -------------------------------------------------------------------------------- /megatron/core/model_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/model_parallel_config.py -------------------------------------------------------------------------------- /megatron/core/models/T5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/T5/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/T5/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/T5/t5_model.py -------------------------------------------------------------------------------- /megatron/core/models/T5/t5_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/T5/t5_spec.py -------------------------------------------------------------------------------- /megatron/core/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/backends.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/backends.py -------------------------------------------------------------------------------- /megatron/core/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/bert/bert_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/bert/bert_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/models/bert/bert_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/bert/bert_lm_head.py -------------------------------------------------------------------------------- /megatron/core/models/bert/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/bert/bert_model.py -------------------------------------------------------------------------------- /megatron/core/models/bert/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/bert/pooler.py -------------------------------------------------------------------------------- /megatron/core/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/common/embeddings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/common/embeddings/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/common/embeddings/rope_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/common/embeddings/rope_utils.py -------------------------------------------------------------------------------- /megatron/core/models/common/language_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/common/vision_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/gpt/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/fine_grained_callables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/gpt/fine_grained_callables.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/gpt_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/gpt/gpt_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/gpt/gpt_model.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/moe_module_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/gpt/moe_module_specs.py -------------------------------------------------------------------------------- /megatron/core/models/huggingface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/huggingface/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/huggingface/clip_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/huggingface/clip_model.py -------------------------------------------------------------------------------- /megatron/core/models/huggingface/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/huggingface/module.py -------------------------------------------------------------------------------- /megatron/core/models/huggingface/qwen_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/huggingface/qwen_model.py -------------------------------------------------------------------------------- /megatron/core/models/mamba/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mamba/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/mamba/mamba_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mamba/mamba_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/models/mamba/mamba_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mamba/mamba_model.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/README.md -------------------------------------------------------------------------------- /megatron/core/models/mimo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/config/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/config/base_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/config/base_configs.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/model/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/model/base.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/submodules/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/submodules/audio.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/submodules/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/submodules/base.py -------------------------------------------------------------------------------- /megatron/core/models/mimo/submodules/vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/mimo/submodules/vision.py -------------------------------------------------------------------------------- /megatron/core/models/multimodal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/multimodal/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/multimodal/context_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/multimodal/context_parallel.py -------------------------------------------------------------------------------- /megatron/core/models/multimodal/llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/multimodal/llava_model.py -------------------------------------------------------------------------------- /megatron/core/models/multimodal/llava_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/multimodal/llava_spec.py -------------------------------------------------------------------------------- /megatron/core/models/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/retro/base_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/base_attention.py -------------------------------------------------------------------------------- /megatron/core/models/retro/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/config.py -------------------------------------------------------------------------------- /megatron/core/models/retro/decoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/decoder_attention.py -------------------------------------------------------------------------------- /megatron/core/models/retro/decoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/decoder_spec.py -------------------------------------------------------------------------------- /megatron/core/models/retro/encoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/encoder_attention.py -------------------------------------------------------------------------------- /megatron/core/models/retro/encoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/encoder_spec.py -------------------------------------------------------------------------------- /megatron/core/models/retro/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/model.py -------------------------------------------------------------------------------- /megatron/core/models/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/retro/utils.py -------------------------------------------------------------------------------- /megatron/core/models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/vision/clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/vision/clip_vit_model.py -------------------------------------------------------------------------------- /megatron/core/models/vision/multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/vision/multimodal_projector.py -------------------------------------------------------------------------------- /megatron/core/models/vision/radio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/vision/radio.py -------------------------------------------------------------------------------- /megatron/core/models/vision/vit_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/models/vision/vit_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/msc_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/msc_utils.py -------------------------------------------------------------------------------- /megatron/core/nccl_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/nccl_allocator.py -------------------------------------------------------------------------------- /megatron/core/num_microbatches_calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/num_microbatches_calculator.py -------------------------------------------------------------------------------- /megatron/core/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/__init__.py -------------------------------------------------------------------------------- /megatron/core/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/clip_grads.py -------------------------------------------------------------------------------- /megatron/core/optimizer/cpu_offloading/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/cpu_offloading/README.md -------------------------------------------------------------------------------- /megatron/core/optimizer/cpu_offloading/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/cpu_offloading/__init__.py -------------------------------------------------------------------------------- /megatron/core/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /megatron/core/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /megatron/core/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/optimizer.py -------------------------------------------------------------------------------- /megatron/core/optimizer/optimizer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer/optimizer_config.py -------------------------------------------------------------------------------- /megatron/core/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /megatron/core/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/package_info.py -------------------------------------------------------------------------------- /megatron/core/packed_seq_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/packed_seq_params.py -------------------------------------------------------------------------------- /megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/pipeline_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/combined_1f1b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/pipeline_parallel/combined_1f1b.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/pipeline_parallel/p2p_communication.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/pipeline_parallel/schedules.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/pipeline_parallel/utils.py -------------------------------------------------------------------------------- /megatron/core/post_training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/post_training/__init__.py -------------------------------------------------------------------------------- /megatron/core/post_training/modelopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/post_training/modelopt/__init__.py -------------------------------------------------------------------------------- /megatron/core/post_training/modelopt/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/post_training/modelopt/gpt/__init__.py -------------------------------------------------------------------------------- /megatron/core/post_training/modelopt/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/post_training/modelopt/layers.py -------------------------------------------------------------------------------- /megatron/core/process_groups_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/process_groups_config.py -------------------------------------------------------------------------------- /megatron/core/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/quantization/__init__.py -------------------------------------------------------------------------------- /megatron/core/quantization/quant_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/quantization/quant_config.py -------------------------------------------------------------------------------- /megatron/core/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/quantization/utils.py -------------------------------------------------------------------------------- /megatron/core/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | packaging 3 | -------------------------------------------------------------------------------- /megatron/core/rerun_state_machine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/rerun_state_machine.py -------------------------------------------------------------------------------- /megatron/core/safe_globals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/safe_globals.py -------------------------------------------------------------------------------- /megatron/core/ssm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/__init__.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/mamba_block.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_context_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/mamba_context_parallel.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_hybrid_layer_allocation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/mamba_hybrid_layer_allocation.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/mamba_layer.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_mixer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/mamba_mixer.py -------------------------------------------------------------------------------- /megatron/core/ssm/mlp_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/mlp_layer.py -------------------------------------------------------------------------------- /megatron/core/ssm/triton_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/ssm/triton_cache_manager.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/inference_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/inference_layers.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /megatron/core/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/timers.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/__init__.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/base_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/base_tokenizer.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/megatron_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/megatron_tokenizer.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/text/__init__.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/text/libraries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/text/libraries/__init__.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/text/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/text/models/__init__.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/text/models/t5_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/text/models/t5_tokenizer.py -------------------------------------------------------------------------------- /megatron/core/tokenizers/text/text_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/tokenizers/text/text_tokenizer.py -------------------------------------------------------------------------------- /megatron/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/__init__.py -------------------------------------------------------------------------------- /megatron/core/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/cuda_graphs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/cuda_graphs.py -------------------------------------------------------------------------------- /megatron/core/transformer/custom_layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/transformer/dot_product_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/dot_product_attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/enums.py -------------------------------------------------------------------------------- /megatron/core/transformer/fsdp_dtensor_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/fsdp_dtensor_checkpoint.py -------------------------------------------------------------------------------- /megatron/core/transformer/identity_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/identity_op.py -------------------------------------------------------------------------------- /megatron/core/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/mlp.py -------------------------------------------------------------------------------- /megatron/core/transformer/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/module.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/README.md -------------------------------------------------------------------------------- /megatron/core/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/transformer/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/experts.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/fused_a2a.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/fused_a2a.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/grouped_gemm_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/grouped_gemm_util.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/moe_layer.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/moe_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/moe_utils.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/router.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/shared_experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/shared_experts.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/token_dispatcher.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/upcycling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/moe/upcycling_utils.py -------------------------------------------------------------------------------- /megatron/core/transformer/multi_latent_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/multi_latent_attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/multi_token_prediction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/multi_token_prediction.py -------------------------------------------------------------------------------- /megatron/core/transformer/spec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/spec_utils.py -------------------------------------------------------------------------------- /megatron/core/transformer/torch_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/torch_layer_norm.py -------------------------------------------------------------------------------- /megatron/core/transformer/torch_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/torch_norm.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/transformer_block.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/transformer_config.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/transformer_layer.py -------------------------------------------------------------------------------- /megatron/core/transformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/transformer/utils.py -------------------------------------------------------------------------------- /megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/core/utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/__init__.py -------------------------------------------------------------------------------- /megatron/legacy/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/autoaugment.py -------------------------------------------------------------------------------- /megatron/legacy/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/dataset_utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/ict_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/image_folder.py -------------------------------------------------------------------------------- /megatron/legacy/data/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/multimodal_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/realm_index.py -------------------------------------------------------------------------------- /megatron/legacy/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/data/vit_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/fused_kernels/__init__.py -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/fused_kernels/compat.h -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /megatron/legacy/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/indexer.py -------------------------------------------------------------------------------- /megatron/legacy/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/__init__.py -------------------------------------------------------------------------------- /megatron/legacy/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/bert_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/biencoder_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/classification.py -------------------------------------------------------------------------------- /megatron/legacy/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/enums.py -------------------------------------------------------------------------------- /megatron/legacy/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/legacy/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/legacy/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/fused_softmax.py -------------------------------------------------------------------------------- /megatron/legacy/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/gpt_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/language_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/module.py -------------------------------------------------------------------------------- /megatron/legacy/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/multiple_choice.py -------------------------------------------------------------------------------- /megatron/legacy/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/realm_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/rms_norm.py -------------------------------------------------------------------------------- /megatron/legacy/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/t5_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/transformer.py -------------------------------------------------------------------------------- /megatron/legacy/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/utils.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/classification.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/dino.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/esvit_swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/esvit_swin_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/inpainting.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/knn_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/knn_monitor.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/mit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/mit_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/swin_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/utils.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/vit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/model/vision/vit_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/mpu/tests/commons.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/mpu/tests/test_data.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/legacy/mpu/tests/test_random.py -------------------------------------------------------------------------------- /megatron/post_training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/__init__.py -------------------------------------------------------------------------------- /megatron/post_training/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/arguments.py -------------------------------------------------------------------------------- /megatron/post_training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/checkpointing.py -------------------------------------------------------------------------------- /megatron/post_training/docs/distillation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/docs/distillation.md -------------------------------------------------------------------------------- /megatron/post_training/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/generate.py -------------------------------------------------------------------------------- /megatron/post_training/loss_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/loss_func.py -------------------------------------------------------------------------------- /megatron/post_training/model_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/model_builder.py -------------------------------------------------------------------------------- /megatron/post_training/non_loss_data_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/non_loss_data_func.py -------------------------------------------------------------------------------- /megatron/post_training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/post_training/utils.py -------------------------------------------------------------------------------- /megatron/rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/README.md -------------------------------------------------------------------------------- /megatron/rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/__init__.py -------------------------------------------------------------------------------- /megatron/rl/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/agent/__init__.py -------------------------------------------------------------------------------- /megatron/rl/agent/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/agent/api.py -------------------------------------------------------------------------------- /megatron/rl/agent/pass_at_evaluation_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/agent/pass_at_evaluation_agent.py -------------------------------------------------------------------------------- /megatron/rl/agent/remote_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/agent/remote_agent.py -------------------------------------------------------------------------------- /megatron/rl/agent/reward_only_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/agent/reward_only_agent.py -------------------------------------------------------------------------------- /megatron/rl/agent/weighted_multi_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/agent/weighted_multi_task.py -------------------------------------------------------------------------------- /megatron/rl/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/inference/__init__.py -------------------------------------------------------------------------------- /megatron/rl/inference/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/inference/api.py -------------------------------------------------------------------------------- /megatron/rl/inference/chat_templates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/inference/chat_templates.py -------------------------------------------------------------------------------- /megatron/rl/inference/inference_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/inference/inference_interface.py -------------------------------------------------------------------------------- /megatron/rl/inference/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/inference/megatron.py -------------------------------------------------------------------------------- /megatron/rl/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/logging.py -------------------------------------------------------------------------------- /megatron/rl/rl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/rl_utils.py -------------------------------------------------------------------------------- /megatron/rl/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/server/__init__.py -------------------------------------------------------------------------------- /megatron/rl/server/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/server/agent/__init__.py -------------------------------------------------------------------------------- /megatron/rl/server/agent/fastapi_env_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/server/agent/fastapi_env_server.py -------------------------------------------------------------------------------- /megatron/rl/server/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/server/api.py -------------------------------------------------------------------------------- /megatron/rl/server/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/rl/server/inference/__init__.py -------------------------------------------------------------------------------- /megatron/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/__init__.py -------------------------------------------------------------------------------- /megatron/training/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/arguments.py -------------------------------------------------------------------------------- /megatron/training/async_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/async_utils.py -------------------------------------------------------------------------------- /megatron/training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/checkpointing.py -------------------------------------------------------------------------------- /megatron/training/datasets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/datasets/README.md -------------------------------------------------------------------------------- /megatron/training/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/datasets/__init__.py -------------------------------------------------------------------------------- /megatron/training/datasets/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/datasets/data_samplers.py -------------------------------------------------------------------------------- /megatron/training/datasets/fim_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/datasets/fim_dataset.py -------------------------------------------------------------------------------- /megatron/training/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/datasets/sft_dataset.py -------------------------------------------------------------------------------- /megatron/training/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/dist_signal_handler.py -------------------------------------------------------------------------------- /megatron/training/ft_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/ft_integration.py -------------------------------------------------------------------------------- /megatron/training/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/global_vars.py -------------------------------------------------------------------------------- /megatron/training/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/initialize.py -------------------------------------------------------------------------------- /megatron/training/inprocess_restart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/inprocess_restart.py -------------------------------------------------------------------------------- /megatron/training/log_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/log_handler.py -------------------------------------------------------------------------------- /megatron/training/one_logger_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/one_logger_utils.py -------------------------------------------------------------------------------- /megatron/training/theoretical_memory_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/theoretical_memory_usage.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/tokenizer/__init__.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/multimodal_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/tokenizer/multimodal_tokenizer.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/sft_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/tokenizer/sft_tokenizer.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /megatron/training/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/training.py -------------------------------------------------------------------------------- /megatron/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/utils.py -------------------------------------------------------------------------------- /megatron/training/wandb_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/wandb_utils.py -------------------------------------------------------------------------------- /megatron/training/yaml_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/megatron/training/yaml_arguments.py -------------------------------------------------------------------------------- /model_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/model_provider.py -------------------------------------------------------------------------------- /pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_bert.py -------------------------------------------------------------------------------- /pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_gpt.py -------------------------------------------------------------------------------- /pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_ict.py -------------------------------------------------------------------------------- /pretrain_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_mamba.py -------------------------------------------------------------------------------- /pretrain_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_retro.py -------------------------------------------------------------------------------- /pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_t5.py -------------------------------------------------------------------------------- /pretrain_vision_classify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_vision_classify.py -------------------------------------------------------------------------------- /pretrain_vision_dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_vision_dino.py -------------------------------------------------------------------------------- /pretrain_vision_inpaint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_vision_inpaint.py -------------------------------------------------------------------------------- /pretrain_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pretrain_vlm.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/check_api_backwards_compatibility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/scripts/check_api_backwards_compatibility.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tasks/data_utils.py -------------------------------------------------------------------------------- /tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tasks/eval_utils.py -------------------------------------------------------------------------------- /tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tasks/finetune_utils.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/functional_tests/python_test_utils/common.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/functional_tests/python_test_utils/conftest.py -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_lts_dgx_a100.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_lts_dgx_a100.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgxa100_dracooci.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_utils/python_scripts/auto_reminder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/python_scripts/auto_reminder.py -------------------------------------------------------------------------------- /tests/test_utils/python_scripts/dashboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/python_scripts/dashboard.py -------------------------------------------------------------------------------- /tests/test_utils/python_scripts/notify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/python_scripts/notify.py -------------------------------------------------------------------------------- /tests/test_utils/python_scripts/recipe_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/python_scripts/recipe_parser.py -------------------------------------------------------------------------------- /tests/test_utils/python_scripts/swap_pr_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/python_scripts/swap_pr_labels.py -------------------------------------------------------------------------------- /tests/test_utils/recipes/_build-mcore-dev.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/_build-mcore-dev.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/_build-mcore-lts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/_build-mcore-lts.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/_build-nemo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/_build-nemo.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/bert.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/ckpt_converter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/ckpt_converter.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/gpt-dynamic-inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/gpt-dynamic-inference.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/gpt-grads.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/gpt-grads.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/gpt-nemo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/gpt-nemo.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/gpt-static-inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/gpt-static-inference.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/gpt.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/mamba-static-inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/mamba-static-inference.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/mamba.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/mamba.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/mimo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/mimo.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/moe-dynamic-inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/moe-dynamic-inference.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/moe-static-inference.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/moe-static-inference.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/moe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/moe.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/multimodal-llava.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/multimodal-llava.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/t5.yaml -------------------------------------------------------------------------------- /tests/test_utils/recipes/unit-tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/test_utils/recipes/unit-tests.yaml -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/a2a_overlap/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/a2a_overlap/utils.py -------------------------------------------------------------------------------- /tests/unit_tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/conftest.py -------------------------------------------------------------------------------- /tests/unit_tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/data/test_bin_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_bin_reader.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_builder.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_fim_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_fim_dataset.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_gpt_dataset.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_multimodal_dataset.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_preprocess_data.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/data/test_preprocess_mmdata.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/conftest.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/models/common.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/test_fp8.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/test_local.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/test_mapping.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_msc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/test_msc.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_strict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/test_strict.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/dist_checkpointing/utils.py -------------------------------------------------------------------------------- /tests/unit_tests/export/trtllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/export/trtllm/test_trtllm_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/export/trtllm/test_trtllm_helper.py -------------------------------------------------------------------------------- /tests/unit_tests/export/trtllm/test_trtllm_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/export/trtllm/test_trtllm_layers.py -------------------------------------------------------------------------------- /tests/unit_tests/find_test_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/find_test_cases.py -------------------------------------------------------------------------------- /tests/unit_tests/fusions/test_bias_dropout_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/fusions/test_bias_dropout_fusion.py -------------------------------------------------------------------------------- /tests/unit_tests/fusions/test_mla_yarn_rope_apply.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/fusions/test_mla_yarn_rope_apply.py -------------------------------------------------------------------------------- /tests/unit_tests/fusions/test_swiglu_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/fusions/test_swiglu_fusion.py -------------------------------------------------------------------------------- /tests/unit_tests/fusions/test_torch_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/fusions/test_torch_softmax.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/engines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/model_inference_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_flash_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/inference/test_flash_decode.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_inference_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/inference/test_inference_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/inference/test_scheduler.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_wandb_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/inference/test_wandb_logging.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/text_generation_controllers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/models/test_base_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_base_embedding.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_bert_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_clip_vit_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_gpt_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_llava_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_mamba_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_mamba_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_mimo_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_mimo_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_mimo_submodules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_mimo_submodules.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_multimodal_projector.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_radio_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_radio_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/models/test_t5_model.py -------------------------------------------------------------------------------- /tests/unit_tests/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/pipeline_parallel/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/pipeline_parallel/test_helpers.py -------------------------------------------------------------------------------- /tests/unit_tests/pipeline_parallel/test_schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/pipeline_parallel/test_schedules.py -------------------------------------------------------------------------------- /tests/unit_tests/post_training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/run_ci_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/run_ci_test.sh -------------------------------------------------------------------------------- /tests/unit_tests/ssm/test_mamba_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/ssm/test_mamba_block.py -------------------------------------------------------------------------------- /tests/unit_tests/ssm/test_mamba_context_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/ssm/test_mamba_context_parallel.py -------------------------------------------------------------------------------- /tests/unit_tests/ssm/test_mamba_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/ssm/test_mamba_layer.py -------------------------------------------------------------------------------- /tests/unit_tests/ssm/test_mamba_mixer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/ssm/test_mamba_mixer.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/tensor_parallel/test_data.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/tensor_parallel/test_layers.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/tensor_parallel/test_mappings.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/tensor_parallel/test_random.py -------------------------------------------------------------------------------- /tests/unit_tests/test_api_backwards_compat_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_api_backwards_compat_setup.py -------------------------------------------------------------------------------- /tests/unit_tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_basic.py -------------------------------------------------------------------------------- /tests/unit_tests/test_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_checkpointing.py -------------------------------------------------------------------------------- /tests/unit_tests/test_fp8_param.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_fp8_param.py -------------------------------------------------------------------------------- /tests/unit_tests/test_fp8_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_fp8_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/test_hyper_comm_grid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_hyper_comm_grid.py -------------------------------------------------------------------------------- /tests/unit_tests/test_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_imports.py -------------------------------------------------------------------------------- /tests/unit_tests/test_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_inference.py -------------------------------------------------------------------------------- /tests/unit_tests/test_local_multi_tensor_fns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_local_multi_tensor_fns.py -------------------------------------------------------------------------------- /tests/unit_tests/test_model_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_model_configs.py -------------------------------------------------------------------------------- /tests/unit_tests/test_nccl_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_nccl_allocator.py -------------------------------------------------------------------------------- /tests/unit_tests/test_num_microbatches_calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_num_microbatches_calculator.py -------------------------------------------------------------------------------- /tests/unit_tests/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_optimizer.py -------------------------------------------------------------------------------- /tests/unit_tests/test_optimizer_cpu_offloading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_optimizer_cpu_offloading.py -------------------------------------------------------------------------------- /tests/unit_tests/test_optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_optimizer_param_scheduler.py -------------------------------------------------------------------------------- /tests/unit_tests/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_parallel_state.py -------------------------------------------------------------------------------- /tests/unit_tests/test_process_groups_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_process_groups_config.py -------------------------------------------------------------------------------- /tests/unit_tests/test_rl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_rl_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_tokenizer.py -------------------------------------------------------------------------------- /tests/unit_tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_training.py -------------------------------------------------------------------------------- /tests/unit_tests/test_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_utilities.py -------------------------------------------------------------------------------- /tests/unit_tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/test_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/tokenizers/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/tokenizers/test_tokenizer.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/moe/conftest.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_aux_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/moe/test_aux_loss.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_grouped_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/moe/test_grouped_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/moe/test_moe_layer.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_routers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/moe/test_routers.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_upcycling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/moe/test_upcycling.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_attention.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_core_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_core_attention.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_cuda_graphs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_cuda_graphs.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_full_cuda_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_full_cuda_graph.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_module.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_retro_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_retro_attention.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_rope.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/transformer/test_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/utils/test_experimental_log_once.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tests/unit_tests/utils/test_experimental_log_once.py -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/autoformat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/autoformat.sh -------------------------------------------------------------------------------- /tools/bert_embedding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/bert_embedding/__init__.py -------------------------------------------------------------------------------- /tools/bert_embedding/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/bert_embedding/dataset.py -------------------------------------------------------------------------------- /tools/bert_embedding/embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/bert_embedding/embed.py -------------------------------------------------------------------------------- /tools/bert_embedding/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/bert_embedding/external_libs.py -------------------------------------------------------------------------------- /tools/bert_embedding/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/bert_embedding/huggingface.py -------------------------------------------------------------------------------- /tools/check_copyright.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/check_copyright.py -------------------------------------------------------------------------------- /tools/checkpoint/checkpoint_inspector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/checkpoint_inspector.py -------------------------------------------------------------------------------- /tools/checkpoint/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/convert.py -------------------------------------------------------------------------------- /tools/checkpoint/hybrid_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/hybrid_conversion.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/loader_base.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/loader_core.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/loader_legacy.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_llama_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/loader_llama_mistral.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/loader_llava.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_mixtral_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/loader_mixtral_hf.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/saver_base.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/saver_core.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_hf_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/saver_hf_llava.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/saver_legacy.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/saver_llava.py -------------------------------------------------------------------------------- /tools/checkpoint/schema_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/schema_base.py -------------------------------------------------------------------------------- /tools/checkpoint/schema_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/schema_core.py -------------------------------------------------------------------------------- /tools/checkpoint/schema_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/schema_hf.py -------------------------------------------------------------------------------- /tools/checkpoint/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/checkpoint/utils.py -------------------------------------------------------------------------------- /tools/copyright.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/copyright.sh -------------------------------------------------------------------------------- /tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/linter.py -------------------------------------------------------------------------------- /tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/merge_datasets.py -------------------------------------------------------------------------------- /tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/preprocess_data.py -------------------------------------------------------------------------------- /tools/preprocess_data_nmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/preprocess_data_nmt.py -------------------------------------------------------------------------------- /tools/preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/preprocess_mmdata.py -------------------------------------------------------------------------------- /tools/report_theoretical_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/report_theoretical_memory.py -------------------------------------------------------------------------------- /tools/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/README.md -------------------------------------------------------------------------------- /tools/retro/build_db.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/build_db.md -------------------------------------------------------------------------------- /tools/retro/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/cli/__init__.py -------------------------------------------------------------------------------- /tools/retro/cli/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/cli/__main__.py -------------------------------------------------------------------------------- /tools/retro/cli/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/cli/cli.py -------------------------------------------------------------------------------- /tools/retro/config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/config_utils.py -------------------------------------------------------------------------------- /tools/retro/docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/docker/Dockerfile -------------------------------------------------------------------------------- /tools/retro/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/preprocess_data.py -------------------------------------------------------------------------------- /tools/retro/sft/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/sft/README.md -------------------------------------------------------------------------------- /tools/retro/sft/dataset_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/sft/dataset_conv.py -------------------------------------------------------------------------------- /tools/retro/sft/open_inst.sh: -------------------------------------------------------------------------------- 1 | DATA_BLEND="1.0 open_inst" 2 | -------------------------------------------------------------------------------- /tools/retro/sft/sft_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/sft/sft_retro.py -------------------------------------------------------------------------------- /tools/retro/sft/sft_retro_lm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/sft/sft_retro_lm.sh -------------------------------------------------------------------------------- /tools/retro/text_generation/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/text_generation/evaluate.py -------------------------------------------------------------------------------- /tools/retro/text_generation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/text_generation/metrics.py -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/text_generation/retro_api.py -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_generate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/text_generation/retro_generate.sh -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/text_generation/retro_generation.py -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/retro/text_generation/retro_text_generation.py -------------------------------------------------------------------------------- /tools/run_inference_performance_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/run_inference_performance_test.py -------------------------------------------------------------------------------- /tools/run_mamba_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/run_mamba_text_generation_server.py -------------------------------------------------------------------------------- /tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /tools/run_vlm_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/run_vlm_text_generation.py -------------------------------------------------------------------------------- /tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/text_generation_cli.py -------------------------------------------------------------------------------- /tools/upgrade_dependencies.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/upgrade_dependencies.sh -------------------------------------------------------------------------------- /tools/wait_daemon.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/tools/wait_daemon.sh -------------------------------------------------------------------------------- /train_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/train_rl.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/Megatron-LM/HEAD/uv.lock --------------------------------------------------------------------------------