├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug.md │ ├── enhancement.md │ ├── question.md │ └── regression.md └── workflows │ └── stale.yml ├── .gitignore ├── .gitlab-ci.yml ├── CODEOWNERS ├── CONTRIBUTING.md ├── Dockerfile.ci ├── Dockerfile.linting ├── LICENSE ├── MANIFEST.in ├── README.md ├── artifact ├── README.md ├── example-results │ ├── full-exp.txt │ └── quick-exp.txt ├── exp_one_host.csv ├── full_exp.sh ├── quick_exp.sh └── show_result_full_exp.py ├── docs ├── llama_mistral.md └── source │ ├── api-guide │ ├── context_parallel.rst │ ├── datasets.rst │ ├── dist_checkpointing.rst │ ├── dist_checkpointing.strategies.rst │ ├── distributed.rst │ ├── fusions.rst │ ├── index.rst │ ├── models.bert.rst │ ├── models.gpt.rst │ ├── models.rst │ ├── models.t5.rst │ ├── moe.rst │ ├── num_microbatches_calculator.rst │ ├── pipeline_parallel.rst │ ├── tensor_parallel.rst │ └── transformer.rst │ ├── distrib_optimizer.md │ ├── images │ ├── context_parallel │ │ ├── CP_overview.png │ │ └── CP_results.png │ └── distrib_optimizer │ │ ├── data_flow.png │ │ └── sharding_scheme.png │ ├── index.rst │ └── user-guide │ └── index.rst ├── examples ├── academic_paper_scripts │ ├── detxoify_lm │ │ ├── README.md │ │ ├── annotations │ │ │ ├── filter-selfgeneration.py │ │ │ ├── perspective_api_annotate.py │ │ │ └── preprocess.sh │ │ ├── finetune_gpt.py │ │ ├── finetune_gpt_distributed-1.3b.sh │ │ ├── generate-1.3b.sh │ │ ├── generate_samples_gpt.py │ │ ├── perspective_api.py │ │ └── self_generation │ │ │ └── selfgenerate-1.3b-unconditional.sh │ ├── msdp │ │ ├── README.md │ │ ├── data_processing.sh │ │ ├── eval_knwl_generation.sh │ │ ├── eval_resp_generation.sh │ │ ├── prep_resp_gen.sh │ │ ├── prompt_knwl_gen.sh │ │ └── prompt_resp_gen.sh │ └── sc21 │ │ ├── CONFIG.sh │ │ ├── README.md │ │ ├── SBATCH.sh │ │ ├── SRUN.sh │ │ ├── run_figure_11.sh │ │ ├── run_figure_12.sh │ │ ├── run_figure_13.sh │ │ ├── run_figure_14.sh │ │ ├── run_figure_15.sh │ │ ├── run_figure_16.sh │ │ ├── run_figure_17.sh │ │ ├── run_figure_18.sh │ │ └── run_table_1.sh ├── bert │ ├── README.md │ └── train_bert_340m_distributed.sh ├── gpt3 │ ├── README.md │ ├── gpt_config.yaml │ └── train_gpt3_175b_distributed.sh ├── inference │ ├── README.md │ ├── gpt │ │ └── simple_gpt_batch_inference.py │ ├── quantization │ │ ├── README.md │ │ ├── ptq_trtllm_llama_7b.sh │ │ ├── ptq_trtllm_nemotron3_8b.sh │ │ ├── text_generation_ptq.py │ │ └── trtllm_text_generation.py │ ├── run_text_generation_server_345M.sh │ └── run_text_generation_server_345M_8_tensor_parallel.sh ├── mamba │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── run_text_gen_server_8b.sh │ ├── run_text_gen_server_8b_gpt3.sh │ └── train.sh ├── mixtral │ ├── README.md │ └── train_mixtral_8x7b_distributed.sh ├── multimodal │ ├── Dockerfile │ ├── README.md │ ├── assets │ │ └── pretrain_curves.png │ ├── clip_converter.py │ ├── combine_mistral_clip.sh │ ├── combine_state_dicts.py │ ├── config.py │ ├── convert_llava_pretrain_to_wds.py │ ├── dataloader_provider.py │ ├── dataset_helpers.py │ ├── evaluate_coco.py │ ├── evaluate_mmmu.py │ ├── evaluate_textvqa.py │ ├── evaluate_vqav2.py │ ├── layer_specs.py │ ├── manual_prompts.json │ ├── pretrain_dataset.yaml │ ├── pretrain_mistral_clip.sh │ ├── run_text_generation.py │ ├── sft_dataset.yaml │ ├── sft_mistral_clip.sh │ ├── text_generation_mistral_clip.sh │ └── train.py ├── retro │ ├── README.md │ ├── preprocess_data.sh │ └── train_retro_2b_distributed.sh ├── run_simple_mcore_train_loop.py └── t5 │ ├── README.md │ ├── t5_mcore_train_curve.png │ └── train_t5_220m_distributed.sh ├── images ├── expt-pp32-flops.png ├── expt-pp32-mem.png ├── model_table.png ├── schedule-interlaced.png ├── schedule-vocab-1.png ├── schedule-vocab-2.png ├── st-passes-1.png ├── st-passes-2.png ├── strong_scaling.png └── weak_scaling.png ├── input_store.py ├── jet-tests.yml ├── megatron ├── core │ ├── QuickStart.md │ ├── README.md │ ├── README_STRAGGLER.md │ ├── __init__.py │ ├── datasets │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── bert_dataset.py │ │ ├── blended_dataset.py │ │ ├── blended_megatron_dataset_builder.py │ │ ├── blended_megatron_dataset_config.py │ │ ├── gpt_dataset.py │ │ ├── helpers.cpp │ │ ├── indexed_dataset.py │ │ ├── masked_dataset.py │ │ ├── megatron_dataset.py │ │ ├── megatron_tokenizer.py │ │ ├── multimodal_dataset.py │ │ ├── readme.md │ │ ├── retro │ │ │ ├── __init__.py │ │ │ ├── config │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_embedders.py │ │ │ │ ├── config.py │ │ │ │ ├── gpt_chunk_datasets.py │ │ │ │ └── tokenizers.py │ │ │ ├── db │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── dataset.py │ │ │ │ └── utils.py │ │ │ ├── external_libs.py │ │ │ ├── index │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── factory.py │ │ │ │ ├── index.py │ │ │ │ ├── indexes │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── faiss_base.py │ │ │ │ │ └── faiss_par_add.py │ │ │ │ ├── utils.py │ │ │ │ └── validate.py │ │ │ ├── query │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt_chunk_dataset.py │ │ │ │ ├── multi_split_gpt_dataset.py │ │ │ │ ├── query.py │ │ │ │ ├── retro_dataset.py │ │ │ │ └── utils.py │ │ │ └── utils.py │ │ ├── t5_dataset.py │ │ ├── utils.py │ │ └── utils_s3.py │ ├── dist_checkpointing │ │ ├── __init__.py │ │ ├── core.py │ │ ├── dict_utils.py │ │ ├── mapping.py │ │ ├── optimizer.py │ │ ├── serialization.py │ │ ├── strategies │ │ │ ├── __init__.py │ │ │ ├── async_utils.py │ │ │ ├── base.py │ │ │ ├── common.py │ │ │ ├── filesystem_async.py │ │ │ ├── fully_parallel.py │ │ │ ├── resharding.py │ │ │ ├── state_dict_saver.py │ │ │ ├── tensorstore.py │ │ │ ├── torch.py │ │ │ ├── two_stage.py │ │ │ └── zarr.py │ │ ├── utils.py │ │ └── validation.py │ ├── distributed │ │ ├── __init__.py │ │ ├── distributed_data_parallel.py │ │ ├── distributed_data_parallel_config.py │ │ ├── finalize_model_grads.py │ │ └── param_and_grad_buffer.py │ ├── enums.py │ ├── fusions │ │ ├── __init__.py │ │ ├── fused_bias_dropout.py │ │ ├── fused_bias_geglu.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_bias_swiglu.py │ │ ├── fused_cross_entropy.py │ │ ├── fused_layer_norm.py │ │ └── fused_softmax.py │ ├── inference │ │ ├── __init__.py │ │ ├── ammo_support │ │ │ ├── __init__.py │ │ │ └── gpt │ │ │ │ ├── __init__.py │ │ │ │ ├── model_specs.py │ │ │ │ └── state_dict_hooks.py │ │ ├── common_inference_params.py │ │ ├── communication_utils.py │ │ ├── engines │ │ │ ├── __init__.py │ │ │ ├── abstract_engine.py │ │ │ └── mcore_engine.py │ │ ├── inference_request.py │ │ ├── model_inference_wrappers │ │ │ ├── __init__.py │ │ │ ├── abstract_model_inference_wrapper.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ └── gpt_inference_wrapper.py │ │ │ └── inference_wrapper_config.py │ │ ├── scheduler.py │ │ ├── text_generation_controllers │ │ │ ├── __init__.py │ │ │ └── simple_text_generation_controller.py │ │ └── utils.py │ ├── inference_params.py │ ├── jit.py │ ├── model_parallel_config.py │ ├── models │ │ ├── T5 │ │ │ ├── __init__.py │ │ │ ├── t5_model.py │ │ │ └── t5_spec.py │ │ ├── __init__.py │ │ ├── bert │ │ │ ├── __init__.py │ │ │ ├── bert_layer_specs.py │ │ │ ├── bert_lm_head.py │ │ │ ├── bert_model.py │ │ │ └── pooler.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── embeddings │ │ │ │ ├── __init__.py │ │ │ │ ├── language_model_embedding.py │ │ │ │ └── rotary_pos_embedding.py │ │ │ ├── language_module │ │ │ │ ├── __init__.py │ │ │ │ └── language_module.py │ │ │ └── vision_module │ │ │ │ ├── __init__.py │ │ │ │ └── vision_module.py │ │ ├── gpt │ │ │ ├── __init__.py │ │ │ ├── gpt_layer_specs.py │ │ │ └── gpt_model.py │ │ ├── mamba │ │ │ ├── __init__.py │ │ │ ├── mamba_layer_specs.py │ │ │ └── mamba_model.py │ │ ├── multimodal │ │ │ ├── __init__.py │ │ │ ├── llava_model.py │ │ │ └── llava_spec.py │ │ ├── retro │ │ │ ├── __init__.py │ │ │ ├── base_attention.py │ │ │ ├── config.py │ │ │ ├── decoder_attention.py │ │ │ ├── decoder_spec.py │ │ │ ├── encoder_attention.py │ │ │ ├── encoder_spec.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ └── vision │ │ │ ├── __init__.py │ │ │ ├── clip_vit_model.py │ │ │ ├── multimodal_projector.py │ │ │ └── vit_layer_specs.py │ ├── num_microbatches_calculator.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── clip_grads.py │ │ ├── distrib_optimizer.py │ │ ├── grad_scaler.py │ │ ├── optimizer.py │ │ └── optimizer_config.py │ ├── package_info.py │ ├── packed_seq_params.py │ ├── parallel_state.py │ ├── pipeline_parallel │ │ ├── __init__.py │ │ ├── interlaced_schedule.py │ │ ├── p2p_communication.py │ │ ├── schedule_timers.py │ │ ├── schedules.py │ │ └── vocab_parallel_schedule.py │ ├── requirements.txt │ ├── ssm │ │ ├── __init__.py │ │ ├── mamba_block.py │ │ ├── mamba_hybrid_layer_allocation.py │ │ ├── mamba_layer.py │ │ ├── mamba_mixer.py │ │ └── triton_cache_manager.py │ ├── tensor_parallel │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ ├── utils.py │ │ ├── vocab_input.py │ │ ├── vocab_input_store.py │ │ ├── vocab_output.py │ │ └── vocab_output_store.py │ ├── timers.py │ ├── transformer │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── custom_layers │ │ │ ├── __init__.py │ │ │ └── transformer_engine.py │ │ ├── dot_product_attention.py │ │ ├── enums.py │ │ ├── identity_op.py │ │ ├── mlp.py │ │ ├── module.py │ │ ├── moe │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── experts.py │ │ │ ├── grouped_gemm_util.py │ │ │ ├── moe_layer.py │ │ │ ├── moe_utils.py │ │ │ ├── router.py │ │ │ └── token_dispatcher.py │ │ ├── spec_utils.py │ │ ├── torch_layer_norm.py │ │ ├── transformer_block.py │ │ ├── transformer_config.py │ │ ├── transformer_layer.py │ │ └── utils.py │ └── utils.py ├── inference │ ├── __init__.py │ ├── arguments.py │ ├── checkpointing.py │ ├── gpt │ │ ├── __init__.py │ │ └── model_provider.py │ ├── static │ │ └── index.html │ ├── text_generation │ │ ├── __init__.py │ │ ├── api.py │ │ ├── beam_utils.py │ │ ├── communication.py │ │ ├── forward_step.py │ │ ├── generation.py │ │ ├── sampling.py │ │ └── tokenization.py │ └── text_generation_server.py ├── legacy │ ├── data │ │ ├── __init__.py │ │ ├── autoaugment.py │ │ ├── biencoder_dataset_utils.py │ │ ├── data_samplers.py │ │ ├── dataset_utils.py │ │ ├── ict_dataset.py │ │ ├── image_folder.py │ │ ├── multimodal_dataset.py │ │ ├── orqa_wiki_dataset.py │ │ ├── realm_dataset_utils.py │ │ ├── realm_index.py │ │ └── vit_dataset.py │ ├── fp16_deprecated │ │ └── loss_scaler.py │ ├── fused_kernels │ │ ├── __init__.py │ │ ├── compat.h │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_fused_kernels.py │ │ └── type_shim.h │ ├── indexer.py │ ├── model │ │ ├── __init__.py │ │ ├── bert_model.py │ │ ├── biencoder_model.py │ │ ├── classification.py │ │ ├── enums.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_layer_norm.py │ │ ├── fused_softmax.py │ │ ├── gpt_model.py │ │ ├── language_model.py │ │ ├── module.py │ │ ├── multiple_choice.py │ │ ├── realm_model.py │ │ ├── rms_norm.py │ │ ├── t5_model.py │ │ ├── transformer.py │ │ ├── utils.py │ │ └── vision │ │ │ ├── classification.py │ │ │ ├── dino.py │ │ │ ├── esvit_swin_backbone.py │ │ │ ├── inpainting.py │ │ │ ├── knn_monitor.py │ │ │ ├── mit_backbone.py │ │ │ ├── swin_backbone.py │ │ │ ├── utils.py │ │ │ └── vit_backbone.py │ └── mpu │ │ └── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py └── training │ ├── __init__.py │ ├── activations.py │ ├── arguments.py │ ├── async_utils.py │ ├── checkpointing.py │ ├── dist_signal_handler.py │ ├── global_vars.py │ ├── initialize.py │ ├── log_handler.py │ ├── one_logger_utils.py │ ├── optimizer_param_scheduler.py │ ├── theoretical_memory_usage.py │ ├── tokenizer │ ├── __init__.py │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ └── tokenizer.py │ ├── training.py │ ├── utils.py │ └── yaml_arguments.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_gpt.sh ├── pretrain_ict.py ├── pretrain_mamba.py ├── pretrain_retro.py ├── pretrain_t5.py ├── pretrain_vision_classify.py ├── pretrain_vision_dino.py ├── pretrain_vision_inpaint.py ├── pretrain_vlm.py ├── pyproject.toml ├── setup.py ├── tasks ├── data_utils.py ├── ensemble_classifier.py ├── eval_utils.py ├── finetune_utils.py ├── glue │ ├── data.py │ ├── finetune.py │ ├── mnli.py │ └── qqp.py ├── main.py ├── msdp │ ├── README.md │ ├── evaluate.py │ ├── main.py │ ├── metrics.py │ ├── preprocessing.py │ └── prompt.py ├── orqa │ ├── README.md │ ├── evaluate_orqa.py │ ├── evaluate_utils.py │ ├── supervised │ │ ├── data.py │ │ ├── eval_utils.py │ │ └── finetune.py │ └── unsupervised │ │ ├── nq.py │ │ ├── qa_utils.py │ │ └── tokenizers.py ├── race │ ├── data.py │ └── finetune.py ├── vision │ ├── classification │ │ ├── classification.py │ │ └── eval_utils.py │ ├── finetune_utils.py │ ├── main.py │ └── segmentation │ │ ├── cityscapes.py │ │ ├── data.py │ │ ├── finetune_segformer.py │ │ ├── finetune_setr.py │ │ ├── metrics.py │ │ ├── seg_heads.py │ │ ├── seg_models.py │ │ ├── transforms.py │ │ └── utils.py └── zeroshot_gpt │ ├── datasets.py │ ├── detokenizer.py │ └── evaluate.py ├── tests ├── __init__.py ├── functional_tests │ ├── __init__.py │ ├── jet_recipes │ │ ├── MR-bert.yaml │ │ ├── MR-gpt-nemo.yaml │ │ ├── MR-gpt.yaml │ │ ├── MR-multimodal.yaml │ │ ├── MR-t5.yaml │ │ ├── build-pyt.yaml │ │ ├── local-generator.py │ │ ├── nightly-bert.yaml │ │ ├── nightly-gpt.yaml │ │ ├── weekly-gpt.yaml │ │ └── weekly-t5.yaml │ ├── python_test_utils │ │ ├── __init__.py │ │ ├── common.py │ │ ├── get_test_results_from_tensorboard_logs.py │ │ ├── jet_test_pipeline.py │ │ ├── multitest_ci_pipeline.py │ │ ├── test_ci_pipeline.py │ │ ├── test_fp8_ci_pipeline.py │ │ └── test_resume_checkpoint_pipeline.py │ ├── shell_test_utils │ │ ├── _run_local_training.sh │ │ ├── restart_jet_log_jobs.sh │ │ └── run_release_record.sh │ ├── test_results │ │ └── jet │ │ │ ├── bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2.json │ │ │ ├── bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2.json │ │ │ ├── bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1.json │ │ │ ├── bert_345m_nightly_dgx_a100_1N8G_tp1_pp2.json │ │ │ ├── bert_345m_nightly_dgx_a100_1N8G_tp4_pp1.json │ │ │ ├── bert_mr_mcore_tp2_pp2_dgx_a100_1N8G.json │ │ │ ├── bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G.json │ │ │ ├── bert_mr_resume_tp1_pp2dgx_a100_1N8G_.json │ │ │ ├── bert_mr_tp1_pp4_vp2_dgx_a100_1N8G.json │ │ │ ├── bert_mr_tp2_pp2_dgx_a100_1N8G.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_4experts2parallel.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_4experts2parallel.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce.json │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_resume_dgx_a100_1N8G_tp1_pp2.json │ │ │ ├── gpt3_mr_te_tp2_pp2_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G.json │ │ │ ├── gpt3_mr_tp2_pp2_dgx_a100_1N8G.json │ │ │ ├── multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G.json │ │ │ ├── multimodal_llava_mr_mcore_te_tp2_pp4_dgx_a100_1N8G.json │ │ │ └── t5_220m_mr_mcore_tp2_pp4_dgx_a100_1N8G.json │ └── test_scripts │ │ ├── bert │ │ └── pretrain_bert_distributed_test.sh │ │ ├── gpt3 │ │ ├── pretrain_gpt3_distributed_test.sh │ │ └── pretrain_gpt3_nemo_test.sh │ │ ├── multimodal │ │ └── pretrain_llava_distributed_test.sh │ │ ├── retro │ │ └── pretrain_retro_distributed_test.sh │ │ └── t5 │ │ └── pretrain_t5_distributed_test.sh └── unit_tests │ ├── __init__.py │ ├── data │ ├── __init__.py │ ├── test_bin_reader.py │ ├── test_builder.py │ ├── test_gpt_dataset.py │ ├── test_multimodal_dataset.py │ ├── test_preprocess_data.py │ └── test_preprocess_mmdata.py │ ├── dist_checkpointing │ ├── __init__.py │ ├── conftest.py │ ├── models │ │ ├── __init__.py │ │ ├── common.py │ │ ├── test_bert_model.py │ │ ├── test_gpt_model.py │ │ ├── test_grouped_mlp.py │ │ ├── test_mlp_glu.py │ │ ├── test_retro_model.py │ │ ├── test_sequential_mlp.py │ │ └── test_t5_model.py │ ├── test_async_save.py │ ├── test_cached_metadata.py │ ├── test_flattened_resharding.py │ ├── test_fully_parallel.py │ ├── test_mapping.py │ ├── test_optimizer.py │ └── test_serialization.py │ ├── distributed │ └── test_param_and_grad_buffer.py │ ├── fusions │ └── test_torch_softmax.py │ ├── inference │ ├── __init__.py │ ├── engines │ │ ├── __init__.py │ │ └── test_mcore_engine.py │ ├── model_inference_wrappers │ │ ├── __init__.py │ │ ├── gpt │ │ │ └── test_gpt_inference_wrapper.py │ │ └── test_model_inference_wrapper_config.py │ ├── test_common_inference_params.py │ ├── test_inference_utils.py │ ├── test_modelopt_gpt_model.py │ ├── test_scheduler.py │ └── text_generation_controllers │ │ ├── __init__.py │ │ └── test_simple_text_generation_controller.py │ ├── models │ ├── __init__.py │ ├── test_base_embedding.py │ ├── test_bert_model.py │ ├── test_clip_vit_model.py │ ├── test_gpt_model.py │ ├── test_llava_model.py │ ├── test_mamba_model.py │ ├── test_multimodal_projector.py │ └── test_t5_model.py │ ├── pipeline_parallel │ ├── __init__.py │ └── test_schedules.py │ ├── tensor_parallel │ ├── __init__.py │ ├── test_cross_entropy.py │ ├── test_data.py │ ├── test_initialization.py │ ├── test_layers.py │ ├── test_mappings.py │ ├── test_random.py │ └── test_tensor_parallel_utils.py │ ├── test_basic.py │ ├── test_imports.py │ ├── test_local_multi_tensor_fns.py │ ├── test_num_microbatches_calculator.py │ ├── test_optimizer.py │ ├── test_parallel_state.py │ ├── test_training.py │ ├── test_utilities.py │ ├── test_utils.py │ └── transformer │ ├── __init__.py │ ├── moe │ ├── __init__.py │ ├── test_a2a_token_dispatcher.py │ ├── test_aux_loss.py │ ├── test_grouped_mlp.py │ ├── test_routers.py │ ├── test_sequential_mlp.py │ └── test_token_dispatcher.py │ ├── test_attention.py │ ├── test_attention_packed_seq.py │ ├── test_core_attention.py │ ├── test_mlp.py │ ├── test_module.py │ ├── test_retro_attention.py │ ├── test_spec_customization.py │ ├── test_transformer_block.py │ └── test_transformer_layer.py └── tools ├── autoformat.sh ├── bert_embedding ├── __init__.py ├── dataset.py ├── embed.py ├── external_libs.py └── huggingface.py ├── checkpoint ├── convert.py ├── hybrid_conversion.py ├── loader_llama_mistral.py ├── loader_mcore.py ├── loader_megatron.py ├── loader_mixtral_hf.py ├── saver_mcore.py ├── saver_megatron.py ├── setter.py └── utils.py ├── linter.py ├── merge_datasets.py ├── openwebtext ├── README.md ├── add_id.py ├── blacklist_urls.py ├── cleanup_dataset.py ├── cleanup_fix_dataset.py ├── filter_ngrams.py ├── find_duplicates.py ├── group_duplicate_url.py ├── merge_jsons.py └── remove_group_duplicates.py ├── preprocess_data.py ├── preprocess_data_nmt.py ├── preprocess_mmdata.py ├── report_theoretical_memory.py ├── retro ├── README.md ├── build_db.md ├── cli │ ├── __init__.py │ ├── __main__.py │ └── cli.py ├── config_utils.py ├── docker │ └── Dockerfile ├── preprocess_data.py ├── sft │ ├── README.md │ ├── dataset_conv.py │ ├── open_inst.sh │ ├── sft_retro.py │ └── sft_retro_lm.sh └── text_generation │ ├── evaluate.py │ ├── metrics.py │ ├── retro_api.py │ ├── retro_generate.sh │ ├── retro_generation.py │ └── retro_text_generation.py ├── run_mamba_text_generation_server.py ├── run_text_generation_server.py ├── run_vlm_text_generation.py └── text_generation_cli.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.coveragerc -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.github/ISSUE_TEMPLATE/bug.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.github/ISSUE_TEMPLATE/enhancement.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.github/ISSUE_TEMPLATE/question.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/regression.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.github/ISSUE_TEMPLATE/regression.md -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.github/workflows/stale.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/.gitlab-ci.yml -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/CODEOWNERS -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Dockerfile.ci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/Dockerfile.ci -------------------------------------------------------------------------------- /Dockerfile.linting: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/Dockerfile.linting -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include megatron/core/requirements.txt 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/README.md -------------------------------------------------------------------------------- /artifact/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/README.md -------------------------------------------------------------------------------- /artifact/example-results/full-exp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/example-results/full-exp.txt -------------------------------------------------------------------------------- /artifact/example-results/quick-exp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/example-results/quick-exp.txt -------------------------------------------------------------------------------- /artifact/exp_one_host.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/exp_one_host.csv -------------------------------------------------------------------------------- /artifact/full_exp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/full_exp.sh -------------------------------------------------------------------------------- /artifact/quick_exp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/quick_exp.sh -------------------------------------------------------------------------------- /artifact/show_result_full_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/artifact/show_result_full_exp.py -------------------------------------------------------------------------------- /docs/llama_mistral.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/llama_mistral.md -------------------------------------------------------------------------------- /docs/source/api-guide/context_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/context_parallel.rst -------------------------------------------------------------------------------- /docs/source/api-guide/datasets.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/datasets.rst -------------------------------------------------------------------------------- /docs/source/api-guide/dist_checkpointing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/dist_checkpointing.rst -------------------------------------------------------------------------------- /docs/source/api-guide/dist_checkpointing.strategies.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/dist_checkpointing.strategies.rst -------------------------------------------------------------------------------- /docs/source/api-guide/distributed.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/distributed.rst -------------------------------------------------------------------------------- /docs/source/api-guide/fusions.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/fusions.rst -------------------------------------------------------------------------------- /docs/source/api-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/index.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.bert.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/models.bert.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.gpt.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/models.gpt.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/models.rst -------------------------------------------------------------------------------- /docs/source/api-guide/models.t5.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/models.t5.rst -------------------------------------------------------------------------------- /docs/source/api-guide/moe.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/moe.rst -------------------------------------------------------------------------------- /docs/source/api-guide/num_microbatches_calculator.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/num_microbatches_calculator.rst -------------------------------------------------------------------------------- /docs/source/api-guide/pipeline_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/pipeline_parallel.rst -------------------------------------------------------------------------------- /docs/source/api-guide/tensor_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/tensor_parallel.rst -------------------------------------------------------------------------------- /docs/source/api-guide/transformer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/api-guide/transformer.rst -------------------------------------------------------------------------------- /docs/source/distrib_optimizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/distrib_optimizer.md -------------------------------------------------------------------------------- /docs/source/images/context_parallel/CP_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/images/context_parallel/CP_overview.png -------------------------------------------------------------------------------- /docs/source/images/context_parallel/CP_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/images/context_parallel/CP_results.png -------------------------------------------------------------------------------- /docs/source/images/distrib_optimizer/data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/images/distrib_optimizer/data_flow.png -------------------------------------------------------------------------------- /docs/source/images/distrib_optimizer/sharding_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/images/distrib_optimizer/sharding_scheme.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/user-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/docs/source/user-guide/index.rst -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/README.md -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/annotations/filter-selfgeneration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/annotations/filter-selfgeneration.py -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/annotations/perspective_api_annotate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/annotations/perspective_api_annotate.py -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/annotations/preprocess.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/annotations/preprocess.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/finetune_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/finetune_gpt.py -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/finetune_gpt_distributed-1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/finetune_gpt_distributed-1.3b.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/generate-1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/generate-1.3b.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/generate_samples_gpt.py -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/perspective_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/perspective_api.py -------------------------------------------------------------------------------- /examples/academic_paper_scripts/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/README.md -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/data_processing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/data_processing.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/eval_knwl_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/eval_knwl_generation.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/eval_resp_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/eval_resp_generation.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/prep_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/prep_resp_gen.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/prompt_knwl_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/prompt_knwl_gen.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/msdp/prompt_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/msdp/prompt_resp_gen.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/CONFIG.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/README.md -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/SBATCH.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/SRUN.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /examples/academic_paper_scripts/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/academic_paper_scripts/sc21/run_table_1.sh -------------------------------------------------------------------------------- /examples/bert/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/bert/README.md -------------------------------------------------------------------------------- /examples/bert/train_bert_340m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/bert/train_bert_340m_distributed.sh -------------------------------------------------------------------------------- /examples/gpt3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/gpt3/README.md -------------------------------------------------------------------------------- /examples/gpt3/gpt_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/gpt3/gpt_config.yaml -------------------------------------------------------------------------------- /examples/gpt3/train_gpt3_175b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/gpt3/train_gpt3_175b_distributed.sh -------------------------------------------------------------------------------- /examples/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/README.md -------------------------------------------------------------------------------- /examples/inference/gpt/simple_gpt_batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/gpt/simple_gpt_batch_inference.py -------------------------------------------------------------------------------- /examples/inference/quantization/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/quantization/README.md -------------------------------------------------------------------------------- /examples/inference/quantization/ptq_trtllm_llama_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/quantization/ptq_trtllm_llama_7b.sh -------------------------------------------------------------------------------- /examples/inference/quantization/ptq_trtllm_nemotron3_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/quantization/ptq_trtllm_nemotron3_8b.sh -------------------------------------------------------------------------------- /examples/inference/quantization/text_generation_ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/quantization/text_generation_ptq.py -------------------------------------------------------------------------------- /examples/inference/quantization/trtllm_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/quantization/trtllm_text_generation.py -------------------------------------------------------------------------------- /examples/inference/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /examples/inference/run_text_generation_server_345M_8_tensor_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/inference/run_text_generation_server_345M_8_tensor_parallel.sh -------------------------------------------------------------------------------- /examples/mamba/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mamba/.gitignore -------------------------------------------------------------------------------- /examples/mamba/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mamba/Dockerfile -------------------------------------------------------------------------------- /examples/mamba/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mamba/README.md -------------------------------------------------------------------------------- /examples/mamba/run_text_gen_server_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mamba/run_text_gen_server_8b.sh -------------------------------------------------------------------------------- /examples/mamba/run_text_gen_server_8b_gpt3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mamba/run_text_gen_server_8b_gpt3.sh -------------------------------------------------------------------------------- /examples/mamba/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mamba/train.sh -------------------------------------------------------------------------------- /examples/mixtral/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mixtral/README.md -------------------------------------------------------------------------------- /examples/mixtral/train_mixtral_8x7b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/mixtral/train_mixtral_8x7b_distributed.sh -------------------------------------------------------------------------------- /examples/multimodal/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/Dockerfile -------------------------------------------------------------------------------- /examples/multimodal/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/README.md -------------------------------------------------------------------------------- /examples/multimodal/assets/pretrain_curves.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/assets/pretrain_curves.png -------------------------------------------------------------------------------- /examples/multimodal/clip_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/clip_converter.py -------------------------------------------------------------------------------- /examples/multimodal/combine_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/combine_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/combine_state_dicts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/combine_state_dicts.py -------------------------------------------------------------------------------- /examples/multimodal/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/config.py -------------------------------------------------------------------------------- /examples/multimodal/convert_llava_pretrain_to_wds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/convert_llava_pretrain_to_wds.py -------------------------------------------------------------------------------- /examples/multimodal/dataloader_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/dataloader_provider.py -------------------------------------------------------------------------------- /examples/multimodal/dataset_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/dataset_helpers.py -------------------------------------------------------------------------------- /examples/multimodal/evaluate_coco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/evaluate_coco.py -------------------------------------------------------------------------------- /examples/multimodal/evaluate_mmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/evaluate_mmmu.py -------------------------------------------------------------------------------- /examples/multimodal/evaluate_textvqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/evaluate_textvqa.py -------------------------------------------------------------------------------- /examples/multimodal/evaluate_vqav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/evaluate_vqav2.py -------------------------------------------------------------------------------- /examples/multimodal/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/layer_specs.py -------------------------------------------------------------------------------- /examples/multimodal/manual_prompts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/manual_prompts.json -------------------------------------------------------------------------------- /examples/multimodal/pretrain_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/pretrain_dataset.yaml -------------------------------------------------------------------------------- /examples/multimodal/pretrain_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/pretrain_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/run_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/run_text_generation.py -------------------------------------------------------------------------------- /examples/multimodal/sft_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/sft_dataset.yaml -------------------------------------------------------------------------------- /examples/multimodal/sft_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/sft_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/text_generation_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/text_generation_mistral_clip.sh -------------------------------------------------------------------------------- /examples/multimodal/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/multimodal/train.py -------------------------------------------------------------------------------- /examples/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/retro/README.md -------------------------------------------------------------------------------- /examples/retro/preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/retro/preprocess_data.sh -------------------------------------------------------------------------------- /examples/retro/train_retro_2b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/retro/train_retro_2b_distributed.sh -------------------------------------------------------------------------------- /examples/run_simple_mcore_train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/run_simple_mcore_train_loop.py -------------------------------------------------------------------------------- /examples/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/t5/README.md -------------------------------------------------------------------------------- /examples/t5/t5_mcore_train_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/t5/t5_mcore_train_curve.png -------------------------------------------------------------------------------- /examples/t5/train_t5_220m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/examples/t5/train_t5_220m_distributed.sh -------------------------------------------------------------------------------- /images/expt-pp32-flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/expt-pp32-flops.png -------------------------------------------------------------------------------- /images/expt-pp32-mem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/expt-pp32-mem.png -------------------------------------------------------------------------------- /images/model_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/model_table.png -------------------------------------------------------------------------------- /images/schedule-interlaced.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/schedule-interlaced.png -------------------------------------------------------------------------------- /images/schedule-vocab-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/schedule-vocab-1.png -------------------------------------------------------------------------------- /images/schedule-vocab-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/schedule-vocab-2.png -------------------------------------------------------------------------------- /images/st-passes-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/st-passes-1.png -------------------------------------------------------------------------------- /images/st-passes-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/st-passes-2.png -------------------------------------------------------------------------------- /images/strong_scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/strong_scaling.png -------------------------------------------------------------------------------- /images/weak_scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/images/weak_scaling.png -------------------------------------------------------------------------------- /input_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/input_store.py -------------------------------------------------------------------------------- /jet-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/jet-tests.yml -------------------------------------------------------------------------------- /megatron/core/QuickStart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/QuickStart.md -------------------------------------------------------------------------------- /megatron/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/README.md -------------------------------------------------------------------------------- /megatron/core/README_STRAGGLER.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/README_STRAGGLER.md -------------------------------------------------------------------------------- /megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/Makefile -------------------------------------------------------------------------------- /megatron/core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/datasets/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/bert_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/blended_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/blended_megatron_dataset_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/blended_megatron_dataset_builder.py -------------------------------------------------------------------------------- /megatron/core/datasets/blended_megatron_dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/blended_megatron_dataset_config.py -------------------------------------------------------------------------------- /megatron/core/datasets/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/gpt_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/helpers.cpp -------------------------------------------------------------------------------- /megatron/core/datasets/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/indexed_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/masked_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/masked_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/megatron_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/megatron_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/megatron_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/megatron_tokenizer.py -------------------------------------------------------------------------------- /megatron/core/datasets/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/multimodal_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/readme.md -------------------------------------------------------------------------------- /megatron/core/datasets/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/config/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/bert_embedders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/config/bert_embedders.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/config/config.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/gpt_chunk_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/config/gpt_chunk_datasets.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/config/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/config/tokenizers.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/db/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/db/build.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/db/dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/db/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/db/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/external_libs.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/build.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/factory.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/index.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/indexes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/indexes/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/indexes/faiss_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/indexes/faiss_base.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/indexes/faiss_par_add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/indexes/faiss_par_add.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/index/validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/index/validate.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/query/__init__.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/gpt_chunk_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/query/gpt_chunk_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/multi_split_gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/query/query.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/retro_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/query/retro_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/query/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/query/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/retro/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/t5_dataset.py -------------------------------------------------------------------------------- /megatron/core/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/utils.py -------------------------------------------------------------------------------- /megatron/core/datasets/utils_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/datasets/utils_s3.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/core.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/dict_utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/mapping.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/optimizer.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/serialization.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/__init__.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/async_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/async_utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/base.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/common.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/filesystem_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/filesystem_async.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/fully_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/fully_parallel.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/resharding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/resharding.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/state_dict_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/state_dict_saver.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/tensorstore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/tensorstore.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/torch.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/two_stage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/two_stage.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/strategies/zarr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/strategies/zarr.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/utils.py -------------------------------------------------------------------------------- /megatron/core/dist_checkpointing/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/dist_checkpointing/validation.py -------------------------------------------------------------------------------- /megatron/core/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/distributed/__init__.py -------------------------------------------------------------------------------- /megatron/core/distributed/distributed_data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/distributed/distributed_data_parallel.py -------------------------------------------------------------------------------- /megatron/core/distributed/distributed_data_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/distributed/distributed_data_parallel_config.py -------------------------------------------------------------------------------- /megatron/core/distributed/finalize_model_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/distributed/finalize_model_grads.py -------------------------------------------------------------------------------- /megatron/core/distributed/param_and_grad_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/distributed/param_and_grad_buffer.py -------------------------------------------------------------------------------- /megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/enums.py -------------------------------------------------------------------------------- /megatron/core/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_bias_dropout.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_geglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_bias_geglu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_swiglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_bias_swiglu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/fusions/fused_softmax.py -------------------------------------------------------------------------------- /megatron/core/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/__init__.py -------------------------------------------------------------------------------- /megatron/core/inference/ammo_support/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/inference/ammo_support/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/ammo_support/gpt/__init__.py -------------------------------------------------------------------------------- /megatron/core/inference/ammo_support/gpt/model_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/ammo_support/gpt/model_specs.py -------------------------------------------------------------------------------- /megatron/core/inference/ammo_support/gpt/state_dict_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/ammo_support/gpt/state_dict_hooks.py -------------------------------------------------------------------------------- /megatron/core/inference/common_inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/common_inference_params.py -------------------------------------------------------------------------------- /megatron/core/inference/communication_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/communication_utils.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/inference/engines/abstract_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/engines/abstract_engine.py -------------------------------------------------------------------------------- /megatron/core/inference/engines/mcore_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/engines/mcore_engine.py -------------------------------------------------------------------------------- /megatron/core/inference/inference_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/inference_request.py -------------------------------------------------------------------------------- /megatron/core/inference/model_inference_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py -------------------------------------------------------------------------------- /megatron/core/inference/model_inference_wrappers/gpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py -------------------------------------------------------------------------------- /megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py -------------------------------------------------------------------------------- /megatron/core/inference/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/scheduler.py -------------------------------------------------------------------------------- /megatron/core/inference/text_generation_controllers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py -------------------------------------------------------------------------------- /megatron/core/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference/utils.py -------------------------------------------------------------------------------- /megatron/core/inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/inference_params.py -------------------------------------------------------------------------------- /megatron/core/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/jit.py -------------------------------------------------------------------------------- /megatron/core/model_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/model_parallel_config.py -------------------------------------------------------------------------------- /megatron/core/models/T5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/T5/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/T5/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/T5/t5_model.py -------------------------------------------------------------------------------- /megatron/core/models/T5/t5_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/T5/t5_spec.py -------------------------------------------------------------------------------- /megatron/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/bert/bert_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/bert/bert_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/models/bert/bert_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/bert/bert_lm_head.py -------------------------------------------------------------------------------- /megatron/core/models/bert/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/bert/bert_model.py -------------------------------------------------------------------------------- /megatron/core/models/bert/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/bert/pooler.py -------------------------------------------------------------------------------- /megatron/core/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/common/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/common/embeddings/language_model_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/common/embeddings/language_model_embedding.py -------------------------------------------------------------------------------- /megatron/core/models/common/embeddings/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/common/embeddings/rotary_pos_embedding.py -------------------------------------------------------------------------------- /megatron/core/models/common/language_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/common/language_module/language_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/common/language_module/language_module.py -------------------------------------------------------------------------------- /megatron/core/models/common/vision_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/common/vision_module/vision_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/common/vision_module/vision_module.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/gpt/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/gpt_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/gpt/gpt_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/gpt/gpt_model.py -------------------------------------------------------------------------------- /megatron/core/models/mamba/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/mamba/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/mamba/mamba_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/mamba/mamba_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/models/mamba/mamba_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/mamba/mamba_model.py -------------------------------------------------------------------------------- /megatron/core/models/multimodal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/multimodal/llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/multimodal/llava_model.py -------------------------------------------------------------------------------- /megatron/core/models/multimodal/llava_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/multimodal/llava_spec.py -------------------------------------------------------------------------------- /megatron/core/models/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/retro/base_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/base_attention.py -------------------------------------------------------------------------------- /megatron/core/models/retro/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/config.py -------------------------------------------------------------------------------- /megatron/core/models/retro/decoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/decoder_attention.py -------------------------------------------------------------------------------- /megatron/core/models/retro/decoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/decoder_spec.py -------------------------------------------------------------------------------- /megatron/core/models/retro/encoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/encoder_attention.py -------------------------------------------------------------------------------- /megatron/core/models/retro/encoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/encoder_spec.py -------------------------------------------------------------------------------- /megatron/core/models/retro/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/model.py -------------------------------------------------------------------------------- /megatron/core/models/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/retro/utils.py -------------------------------------------------------------------------------- /megatron/core/models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/vision/clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/vision/clip_vit_model.py -------------------------------------------------------------------------------- /megatron/core/models/vision/multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/vision/multimodal_projector.py -------------------------------------------------------------------------------- /megatron/core/models/vision/vit_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/models/vision/vit_layer_specs.py -------------------------------------------------------------------------------- /megatron/core/num_microbatches_calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/num_microbatches_calculator.py -------------------------------------------------------------------------------- /megatron/core/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/optimizer/__init__.py -------------------------------------------------------------------------------- /megatron/core/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/optimizer/clip_grads.py -------------------------------------------------------------------------------- /megatron/core/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /megatron/core/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /megatron/core/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/optimizer/optimizer.py -------------------------------------------------------------------------------- /megatron/core/optimizer/optimizer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/optimizer/optimizer_config.py -------------------------------------------------------------------------------- /megatron/core/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/package_info.py -------------------------------------------------------------------------------- /megatron/core/packed_seq_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/packed_seq_params.py -------------------------------------------------------------------------------- /megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/pipeline_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/interlaced_schedule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/pipeline_parallel/interlaced_schedule.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/pipeline_parallel/p2p_communication.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/schedule_timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/pipeline_parallel/schedule_timers.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/pipeline_parallel/schedules.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/vocab_parallel_schedule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/pipeline_parallel/vocab_parallel_schedule.py -------------------------------------------------------------------------------- /megatron/core/requirements.txt: -------------------------------------------------------------------------------- 1 | torch -------------------------------------------------------------------------------- /megatron/core/ssm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/ssm/mamba_block.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_hybrid_layer_allocation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/ssm/mamba_hybrid_layer_allocation.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/ssm/mamba_layer.py -------------------------------------------------------------------------------- /megatron/core/ssm/mamba_mixer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/ssm/mamba_mixer.py -------------------------------------------------------------------------------- /megatron/core/ssm/triton_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/ssm/triton_cache_manager.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/vocab_input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/vocab_input.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/vocab_input_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/vocab_input_store.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/vocab_output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/vocab_output.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/vocab_output_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/tensor_parallel/vocab_output_store.py -------------------------------------------------------------------------------- /megatron/core/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/timers.py -------------------------------------------------------------------------------- /megatron/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/__init__.py -------------------------------------------------------------------------------- /megatron/core/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/custom_layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/transformer/custom_layers/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/custom_layers/transformer_engine.py -------------------------------------------------------------------------------- /megatron/core/transformer/dot_product_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/dot_product_attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/enums.py -------------------------------------------------------------------------------- /megatron/core/transformer/identity_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/identity_op.py -------------------------------------------------------------------------------- /megatron/core/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/mlp.py -------------------------------------------------------------------------------- /megatron/core/transformer/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/module.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/README.md -------------------------------------------------------------------------------- /megatron/core/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/transformer/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/experts.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/grouped_gemm_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/grouped_gemm_util.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/moe_layer.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/moe_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/moe_utils.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/router.py -------------------------------------------------------------------------------- /megatron/core/transformer/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/moe/token_dispatcher.py -------------------------------------------------------------------------------- /megatron/core/transformer/spec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/spec_utils.py -------------------------------------------------------------------------------- /megatron/core/transformer/torch_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/torch_layer_norm.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/transformer_block.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/transformer_config.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/transformer_layer.py -------------------------------------------------------------------------------- /megatron/core/transformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/transformer/utils.py -------------------------------------------------------------------------------- /megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/core/utils.py -------------------------------------------------------------------------------- /megatron/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/__init__.py -------------------------------------------------------------------------------- /megatron/inference/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/arguments.py -------------------------------------------------------------------------------- /megatron/inference/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/checkpointing.py -------------------------------------------------------------------------------- /megatron/inference/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/gpt/__init__.py -------------------------------------------------------------------------------- /megatron/inference/gpt/model_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/gpt/model_provider.py -------------------------------------------------------------------------------- /megatron/inference/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/static/index.html -------------------------------------------------------------------------------- /megatron/inference/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/__init__.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/api.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/beam_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/beam_utils.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/communication.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/forward_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/forward_step.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/generation.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/sampling.py -------------------------------------------------------------------------------- /megatron/inference/text_generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation/tokenization.py -------------------------------------------------------------------------------- /megatron/inference/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/inference/text_generation_server.py -------------------------------------------------------------------------------- /megatron/legacy/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/legacy/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/autoaugment.py -------------------------------------------------------------------------------- /megatron/legacy/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/data_samplers.py -------------------------------------------------------------------------------- /megatron/legacy/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/dataset_utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/ict_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/image_folder.py -------------------------------------------------------------------------------- /megatron/legacy/data/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/multimodal_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /megatron/legacy/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/realm_index.py -------------------------------------------------------------------------------- /megatron/legacy/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/data/vit_dataset.py -------------------------------------------------------------------------------- /megatron/legacy/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/fused_kernels/__init__.py -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/fused_kernels/compat.h -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /megatron/legacy/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /megatron/legacy/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/indexer.py -------------------------------------------------------------------------------- /megatron/legacy/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/__init__.py -------------------------------------------------------------------------------- /megatron/legacy/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/bert_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/biencoder_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/classification.py -------------------------------------------------------------------------------- /megatron/legacy/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/enums.py -------------------------------------------------------------------------------- /megatron/legacy/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/legacy/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/legacy/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/fused_softmax.py -------------------------------------------------------------------------------- /megatron/legacy/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/gpt_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/language_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/module.py -------------------------------------------------------------------------------- /megatron/legacy/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/multiple_choice.py -------------------------------------------------------------------------------- /megatron/legacy/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/realm_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/rms_norm.py -------------------------------------------------------------------------------- /megatron/legacy/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/t5_model.py -------------------------------------------------------------------------------- /megatron/legacy/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/transformer.py -------------------------------------------------------------------------------- /megatron/legacy/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/utils.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/classification.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/dino.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/esvit_swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/esvit_swin_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/inpainting.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/knn_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/knn_monitor.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/mit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/mit_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/swin_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/utils.py -------------------------------------------------------------------------------- /megatron/legacy/model/vision/vit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/model/vision/vit_backbone.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/mpu/tests/commons.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/mpu/tests/test_data.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /megatron/legacy/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/legacy/mpu/tests/test_random.py -------------------------------------------------------------------------------- /megatron/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/__init__.py -------------------------------------------------------------------------------- /megatron/training/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/activations.py -------------------------------------------------------------------------------- /megatron/training/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/arguments.py -------------------------------------------------------------------------------- /megatron/training/async_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/async_utils.py -------------------------------------------------------------------------------- /megatron/training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/checkpointing.py -------------------------------------------------------------------------------- /megatron/training/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/dist_signal_handler.py -------------------------------------------------------------------------------- /megatron/training/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/global_vars.py -------------------------------------------------------------------------------- /megatron/training/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/initialize.py -------------------------------------------------------------------------------- /megatron/training/log_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/log_handler.py -------------------------------------------------------------------------------- /megatron/training/one_logger_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/one_logger_utils.py -------------------------------------------------------------------------------- /megatron/training/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /megatron/training/theoretical_memory_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/theoretical_memory_usage.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/tokenizer/__init__.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /megatron/training/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /megatron/training/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/training.py -------------------------------------------------------------------------------- /megatron/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/utils.py -------------------------------------------------------------------------------- /megatron/training/yaml_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/megatron/training/yaml_arguments.py -------------------------------------------------------------------------------- /pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_bert.py -------------------------------------------------------------------------------- /pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_gpt.py -------------------------------------------------------------------------------- /pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_gpt.sh -------------------------------------------------------------------------------- /pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_ict.py -------------------------------------------------------------------------------- /pretrain_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_mamba.py -------------------------------------------------------------------------------- /pretrain_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_retro.py -------------------------------------------------------------------------------- /pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_t5.py -------------------------------------------------------------------------------- /pretrain_vision_classify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_vision_classify.py -------------------------------------------------------------------------------- /pretrain_vision_dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_vision_dino.py -------------------------------------------------------------------------------- /pretrain_vision_inpaint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_vision_inpaint.py -------------------------------------------------------------------------------- /pretrain_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pretrain_vlm.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/data_utils.py -------------------------------------------------------------------------------- /tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/eval_utils.py -------------------------------------------------------------------------------- /tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/finetune_utils.py -------------------------------------------------------------------------------- /tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/glue/data.py -------------------------------------------------------------------------------- /tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/glue/finetune.py -------------------------------------------------------------------------------- /tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/glue/mnli.py -------------------------------------------------------------------------------- /tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/glue/qqp.py -------------------------------------------------------------------------------- /tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/main.py -------------------------------------------------------------------------------- /tasks/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/msdp/README.md -------------------------------------------------------------------------------- /tasks/msdp/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/msdp/evaluate.py -------------------------------------------------------------------------------- /tasks/msdp/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/msdp/main.py -------------------------------------------------------------------------------- /tasks/msdp/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/msdp/metrics.py -------------------------------------------------------------------------------- /tasks/msdp/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/msdp/preprocessing.py -------------------------------------------------------------------------------- /tasks/msdp/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/msdp/prompt.py -------------------------------------------------------------------------------- /tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/README.md -------------------------------------------------------------------------------- /tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/race/data.py -------------------------------------------------------------------------------- /tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/race/finetune.py -------------------------------------------------------------------------------- /tasks/vision/classification/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/classification/classification.py -------------------------------------------------------------------------------- /tasks/vision/classification/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/classification/eval_utils.py -------------------------------------------------------------------------------- /tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/main.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/cityscapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/cityscapes.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/data.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/finetune_segformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/finetune_segformer.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/finetune_setr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/finetune_setr.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/metrics.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/seg_heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/seg_heads.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/seg_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/seg_models.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/transforms.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/vision/segmentation/utils.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/MR-bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/MR-bert.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/MR-gpt-nemo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/MR-gpt-nemo.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/MR-gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/MR-gpt.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/MR-multimodal.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/MR-multimodal.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/MR-t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/MR-t5.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/build-pyt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/build-pyt.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/local-generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/local-generator.py -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/nightly-bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/nightly-bert.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/nightly-gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/nightly-gpt.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/weekly-gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/weekly-gpt.yaml -------------------------------------------------------------------------------- /tests/functional_tests/jet_recipes/weekly-t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/jet_recipes/weekly-t5.yaml -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/common.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/jet_test_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/jet_test_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/multitest_ci_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/multitest_ci_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/test_ci_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/test_ci_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/test_fp8_ci_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/test_fp8_ci_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/shell_test_utils/_run_local_training.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/shell_test_utils/_run_local_training.sh -------------------------------------------------------------------------------- /tests/functional_tests/shell_test_utils/restart_jet_log_jobs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/shell_test_utils/restart_jet_log_jobs.sh -------------------------------------------------------------------------------- /tests/functional_tests/shell_test_utils/run_release_record.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/shell_test_utils/run_release_record.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_tp1_pp2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_tp1_pp2.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_tp4_pp1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_345m_nightly_dgx_a100_1N8G_tp4_pp1.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_mr_resume_tp1_pp2dgx_a100_1N8G_.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_mr_resume_tp1_pp2dgx_a100_1N8G_.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_mr_tp1_pp4_vp2_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_mr_tp1_pp4_vp2_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/bert_mr_tp2_pp2_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/bert_mr_tp2_pp2_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_4experts2parallel.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_4experts2parallel.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_4experts2parallel.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_4experts2parallel.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_resume_dgx_a100_1N8G_tp1_pp2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_resume_dgx_a100_1N8G_tp1_pp2.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/gpt3_mr_tp2_pp2_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/gpt3_mr_tp2_pp2_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/multimodal_llava_mr_mcore_te_tp2_pp4_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/multimodal_llava_mr_mcore_te_tp2_pp4_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/jet/t5_220m_mr_mcore_tp2_pp4_dgx_a100_1N8G.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_results/jet/t5_220m_mr_mcore_tp2_pp4_dgx_a100_1N8G.json -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_nemo_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_nemo_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/multimodal/pretrain_llava_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_scripts/multimodal/pretrain_llava_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/retro/pretrain_retro_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_scripts/retro/pretrain_retro_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/t5/pretrain_t5_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/functional_tests/test_scripts/t5/pretrain_t5_distributed_test.sh -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/data/test_bin_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/data/test_bin_reader.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/data/test_builder.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/data/test_gpt_dataset.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/data/test_multimodal_dataset.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/data/test_preprocess_data.py -------------------------------------------------------------------------------- /tests/unit_tests/data/test_preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/data/test_preprocess_mmdata.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/conftest.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/common.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_bert_model.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_gpt_model.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_grouped_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_grouped_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_retro_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_retro_model.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_sequential_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_sequential_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/models/test_t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/models/test_t5_model.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_async_save.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_async_save.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_cached_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_cached_metadata.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_flattened_resharding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_flattened_resharding.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_fully_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_fully_parallel.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_mapping.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_optimizer.py -------------------------------------------------------------------------------- /tests/unit_tests/dist_checkpointing/test_serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/dist_checkpointing/test_serialization.py -------------------------------------------------------------------------------- /tests/unit_tests/distributed/test_param_and_grad_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/distributed/test_param_and_grad_buffer.py -------------------------------------------------------------------------------- /tests/unit_tests/fusions/test_torch_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/fusions/test_torch_softmax.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/engines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/engines/test_mcore_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/engines/test_mcore_engine.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/model_inference_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/model_inference_wrappers/test_model_inference_wrapper_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/model_inference_wrappers/test_model_inference_wrapper_config.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_common_inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/test_common_inference_params.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_inference_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/test_inference_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_modelopt_gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/test_modelopt_gpt_model.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/test_scheduler.py -------------------------------------------------------------------------------- /tests/unit_tests/inference/text_generation_controllers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/inference/text_generation_controllers/test_simple_text_generation_controller.py -------------------------------------------------------------------------------- /tests/unit_tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/models/test_base_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_base_embedding.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_bert_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_clip_vit_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_gpt_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_llava_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_mamba_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_mamba_model.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_multimodal_projector.py -------------------------------------------------------------------------------- /tests/unit_tests/models/test_t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/models/test_t5_model.py -------------------------------------------------------------------------------- /tests/unit_tests/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/pipeline_parallel/test_schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/pipeline_parallel/test_schedules.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_data.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_initialization.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_layers.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_mappings.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_random.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_basic.py -------------------------------------------------------------------------------- /tests/unit_tests/test_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_imports.py -------------------------------------------------------------------------------- /tests/unit_tests/test_local_multi_tensor_fns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_local_multi_tensor_fns.py -------------------------------------------------------------------------------- /tests/unit_tests/test_num_microbatches_calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_num_microbatches_calculator.py -------------------------------------------------------------------------------- /tests/unit_tests/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_optimizer.py -------------------------------------------------------------------------------- /tests/unit_tests/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_parallel_state.py -------------------------------------------------------------------------------- /tests/unit_tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_training.py -------------------------------------------------------------------------------- /tests/unit_tests/test_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_utilities.py -------------------------------------------------------------------------------- /tests/unit_tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/test_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_a2a_token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/moe/test_a2a_token_dispatcher.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_aux_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/moe/test_aux_loss.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_grouped_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/moe/test_grouped_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_routers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/moe/test_routers.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_sequential_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/moe/test_sequential_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/moe/test_token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/moe/test_token_dispatcher.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_attention.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_attention_packed_seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_attention_packed_seq.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_core_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_core_attention.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_mlp.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_module.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_retro_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_retro_attention.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_spec_customization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_spec_customization.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_transformer_block.py -------------------------------------------------------------------------------- /tests/unit_tests/transformer/test_transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tests/unit_tests/transformer/test_transformer_layer.py -------------------------------------------------------------------------------- /tools/autoformat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/autoformat.sh -------------------------------------------------------------------------------- /tools/bert_embedding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/bert_embedding/__init__.py -------------------------------------------------------------------------------- /tools/bert_embedding/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/bert_embedding/dataset.py -------------------------------------------------------------------------------- /tools/bert_embedding/embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/bert_embedding/embed.py -------------------------------------------------------------------------------- /tools/bert_embedding/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/bert_embedding/external_libs.py -------------------------------------------------------------------------------- /tools/bert_embedding/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/bert_embedding/huggingface.py -------------------------------------------------------------------------------- /tools/checkpoint/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/convert.py -------------------------------------------------------------------------------- /tools/checkpoint/hybrid_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/hybrid_conversion.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_llama_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/loader_llama_mistral.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/loader_mcore.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/loader_megatron.py -------------------------------------------------------------------------------- /tools/checkpoint/loader_mixtral_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/loader_mixtral_hf.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/saver_mcore.py -------------------------------------------------------------------------------- /tools/checkpoint/saver_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/saver_megatron.py -------------------------------------------------------------------------------- /tools/checkpoint/setter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/setter.py -------------------------------------------------------------------------------- /tools/checkpoint/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/checkpoint/utils.py -------------------------------------------------------------------------------- /tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/linter.py -------------------------------------------------------------------------------- /tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/merge_datasets.py -------------------------------------------------------------------------------- /tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/README.md -------------------------------------------------------------------------------- /tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/preprocess_data.py -------------------------------------------------------------------------------- /tools/preprocess_data_nmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/preprocess_data_nmt.py -------------------------------------------------------------------------------- /tools/preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/preprocess_mmdata.py -------------------------------------------------------------------------------- /tools/report_theoretical_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/report_theoretical_memory.py -------------------------------------------------------------------------------- /tools/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/README.md -------------------------------------------------------------------------------- /tools/retro/build_db.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/build_db.md -------------------------------------------------------------------------------- /tools/retro/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/cli/__init__.py -------------------------------------------------------------------------------- /tools/retro/cli/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/cli/__main__.py -------------------------------------------------------------------------------- /tools/retro/cli/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/cli/cli.py -------------------------------------------------------------------------------- /tools/retro/config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/config_utils.py -------------------------------------------------------------------------------- /tools/retro/docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/docker/Dockerfile -------------------------------------------------------------------------------- /tools/retro/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/preprocess_data.py -------------------------------------------------------------------------------- /tools/retro/sft/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/sft/README.md -------------------------------------------------------------------------------- /tools/retro/sft/dataset_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/sft/dataset_conv.py -------------------------------------------------------------------------------- /tools/retro/sft/open_inst.sh: -------------------------------------------------------------------------------- 1 | DATA_BLEND="1.0 open_inst" 2 | -------------------------------------------------------------------------------- /tools/retro/sft/sft_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/sft/sft_retro.py -------------------------------------------------------------------------------- /tools/retro/sft/sft_retro_lm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/sft/sft_retro_lm.sh -------------------------------------------------------------------------------- /tools/retro/text_generation/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/text_generation/evaluate.py -------------------------------------------------------------------------------- /tools/retro/text_generation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/text_generation/metrics.py -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/text_generation/retro_api.py -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_generate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/text_generation/retro_generate.sh -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/text_generation/retro_generation.py -------------------------------------------------------------------------------- /tools/retro/text_generation/retro_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/retro/text_generation/retro_text_generation.py -------------------------------------------------------------------------------- /tools/run_mamba_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/run_mamba_text_generation_server.py -------------------------------------------------------------------------------- /tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /tools/run_vlm_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/run_vlm_text_generation.py -------------------------------------------------------------------------------- /tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/VocabularyParallelism/HEAD/tools/text_generation_cli.py --------------------------------------------------------------------------------