├── .DS_Store ├── .idea ├── .gitignore ├── DivScene_release.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── jupyter-settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── agent_inference ├── .DS_Store ├── hf_idefics2_gpt4o.py ├── hf_idefics2_metric.py ├── hf_idefics2_online_client.py ├── hf_idefics2_server.py ├── run_client.sh ├── run_server.sh └── utils.py ├── agent_training ├── .DS_Store ├── LICENSE ├── Megatron-LM-240424 │ ├── .DS_Store │ ├── CODEOWNERS │ ├── CONTRIBUTING.md │ ├── Dockerfile.ci │ ├── Dockerfile.test │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── docs │ │ ├── llama2.md │ │ └── source │ │ │ ├── api-guide │ │ │ ├── context_parallel.rst │ │ │ ├── datasets.rst │ │ │ ├── dist_checkpointing.rst │ │ │ ├── dist_checkpointing.strategies.rst │ │ │ ├── distributed.rst │ │ │ ├── fusions.rst │ │ │ ├── index.rst │ │ │ ├── models.bert.rst │ │ │ ├── models.gpt.rst │ │ │ ├── models.rst │ │ │ ├── models.t5.rst │ │ │ ├── moe.rst │ │ │ ├── pipeline_parallel.rst │ │ │ ├── tensor_parallel.rst │ │ │ └── transformer.rst │ │ │ ├── distrib_optimizer.md │ │ │ ├── images │ │ │ ├── context_parallel │ │ │ │ ├── CP_overview.png │ │ │ │ └── CP_results.png │ │ │ └── distrib_optimizer │ │ │ │ ├── data_flow.png │ │ │ │ └── sharding_scheme.png │ │ │ ├── index.rst │ │ │ └── user-guide │ │ │ └── index.rst │ ├── examples │ │ ├── bert │ │ │ ├── README.md │ │ │ └── train_bert_340m_distributed.sh │ │ ├── detxoify_lm │ │ │ ├── README.md │ │ │ ├── annotations │ │ │ │ ├── filter-selfgeneration.py │ │ │ │ ├── perspective_api_annotate.py │ │ │ │ └── preprocess.sh │ │ │ ├── finetune_gpt.py │ │ │ ├── finetune_gpt_distributed-1.3b.sh │ │ │ ├── generate-1.3b.sh │ │ │ ├── generate_samples_gpt.py │ │ │ ├── perspective_api.py │ │ │ └── self_generation │ │ │ │ └── selfgenerate-1.3b-unconditional.sh │ │ ├── evaluate_retriever_nq.sh │ │ ├── evaluate_zeroshot_gpt.sh │ │ ├── finetune_mnli_distributed.sh │ │ ├── finetune_race_distributed.sh │ │ ├── finetune_retriever_distributed.sh │ │ ├── gpt3 │ │ │ ├── README.md │ │ │ ├── gpt_config.yaml │ │ │ └── train_gpt3_175b_distributed.sh │ │ ├── inference │ │ │ ├── README.md │ │ │ ├── ptq_trtllm_llama_7b.sh │ │ │ ├── ptq_trtllm_nemotron3_8b.sh │ │ │ ├── text_generation_ptq.py │ │ │ └── trtllm_text_generation.py │ │ ├── merge_mp_bert.sh │ │ ├── msdp │ │ │ ├── README.md │ │ │ ├── data_processing.sh │ │ │ ├── eval_knwl_generation.sh │ │ │ ├── eval_resp_generation.sh │ │ │ ├── prep_resp_gen.sh │ │ │ ├── prompt_knwl_gen.sh │ │ │ └── prompt_resp_gen.sh │ │ ├── pretrain_bert.sh │ │ ├── pretrain_bert_distributed.sh │ │ ├── pretrain_bert_distributed_with_mp.sh │ │ ├── pretrain_gpt.sh │ │ ├── pretrain_gpt3_175B.sh │ │ ├── pretrain_gpt_distributed.sh │ │ ├── pretrain_gpt_distributed_with_mp.sh │ │ ├── pretrain_ict.sh │ │ ├── pretrain_t5.sh │ │ ├── pretrain_t5_distributed.sh │ │ ├── pretrain_t5_distributed_with_mp.sh │ │ ├── pretrain_vision_classify.sh │ │ ├── pretrain_vision_dino.sh │ │ ├── pretrain_vision_inpaint.sh │ │ ├── pretrain_vlm.sh │ │ ├── retro │ │ │ ├── README.md │ │ │ ├── preprocess_data.sh │ │ │ └── train_retro_2b_distributed.sh │ │ ├── run_simple_mcore_train_loop.py │ │ ├── run_text_generation_server_345M.sh │ │ ├── run_text_generation_server_345M_8_tensor_parallel.sh │ │ ├── sc21 │ │ │ ├── CONFIG.sh │ │ │ ├── README.md │ │ │ ├── SBATCH.sh │ │ │ ├── SRUN.sh │ │ │ ├── run_figure_11.sh │ │ │ ├── run_figure_12.sh │ │ │ ├── run_figure_13.sh │ │ │ ├── run_figure_14.sh │ │ │ ├── run_figure_15.sh │ │ │ ├── run_figure_16.sh │ │ │ ├── run_figure_17.sh │ │ │ ├── run_figure_18.sh │ │ │ └── run_table_1.sh │ │ └── t5 │ │ │ ├── README.md │ │ │ ├── t5_mcore_train_curve.png │ │ │ └── train_t5_220m_distributed.sh │ ├── images │ │ ├── Achieved_petaFLOPs.png │ │ └── cases_april2021.png │ ├── jet-tests.yml │ ├── megatron │ │ ├── core │ │ │ ├── QuickStart.md │ │ │ ├── README.md │ │ │ ├── README_STRAGGLER.md │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── enums.cpython-310.pyc │ │ │ │ ├── enums.cpython-38.pyc │ │ │ │ ├── inference_params.cpython-310.pyc │ │ │ │ ├── inference_params.cpython-38.pyc │ │ │ │ ├── jit.cpython-310.pyc │ │ │ │ ├── jit.cpython-38.pyc │ │ │ │ ├── model_parallel_config.cpython-310.pyc │ │ │ │ ├── model_parallel_config.cpython-38.pyc │ │ │ │ ├── packed_seq_params.cpython-310.pyc │ │ │ │ ├── packed_seq_params.cpython-38.pyc │ │ │ │ ├── parallel_state.cpython-310.pyc │ │ │ │ ├── parallel_state.cpython-38.pyc │ │ │ │ ├── timers.cpython-310.pyc │ │ │ │ ├── timers.cpython-38.pyc │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ └── utils.cpython-38.pyc │ │ │ ├── datasets │ │ │ │ ├── Makefile │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── megatron_tokenizer.cpython-310.pyc │ │ │ │ │ ├── megatron_tokenizer.cpython-38.pyc │ │ │ │ │ └── utils.cpython-310.pyc │ │ │ │ ├── bert_dataset.py │ │ │ │ ├── blended_dataset.py │ │ │ │ ├── blended_megatron_dataset_builder.py │ │ │ │ ├── blended_megatron_dataset_config.py │ │ │ │ ├── gpt_dataset.py │ │ │ │ ├── helpers.cpp │ │ │ │ ├── helpers.cpython-310-x86_64-linux-gnu.so │ │ │ │ ├── indexed_dataset.py │ │ │ │ ├── masked_dataset.py │ │ │ │ ├── megatron_dataset.py │ │ │ │ ├── megatron_tokenizer.py │ │ │ │ ├── multimodal_dataset.py │ │ │ │ ├── readme.md │ │ │ │ ├── retro │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bert_embedders.py │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── gpt_chunk_datasets.py │ │ │ │ │ │ └── tokenizers.py │ │ │ │ │ ├── db │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── dataset.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── external_libs.py │ │ │ │ │ ├── index │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── factory.py │ │ │ │ │ │ ├── index.py │ │ │ │ │ │ ├── indexes │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── faiss_base.py │ │ │ │ │ │ │ └── faiss_par_add.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── validate.py │ │ │ │ │ ├── query │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── gpt_chunk_dataset.py │ │ │ │ │ │ ├── multi_split_gpt_dataset.py │ │ │ │ │ │ ├── query.py │ │ │ │ │ │ ├── retro_dataset.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ └── utils.py │ │ │ │ ├── t5_dataset.py │ │ │ │ └── utils.py │ │ │ ├── dist_checkpointing │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── core.cpython-310.pyc │ │ │ │ │ ├── core.cpython-38.pyc │ │ │ │ │ ├── dict_utils.cpython-310.pyc │ │ │ │ │ ├── dict_utils.cpython-38.pyc │ │ │ │ │ ├── mapping.cpython-310.pyc │ │ │ │ │ ├── mapping.cpython-38.pyc │ │ │ │ │ ├── optimizer.cpython-310.pyc │ │ │ │ │ ├── serialization.cpython-310.pyc │ │ │ │ │ ├── serialization.cpython-38.pyc │ │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ │ └── utils.cpython-38.pyc │ │ │ │ ├── core.py │ │ │ │ ├── dict_utils.py │ │ │ │ ├── mapping.py │ │ │ │ ├── optimizer.py │ │ │ │ ├── serialization.py │ │ │ │ ├── strategies │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ ├── base.cpython-310.pyc │ │ │ │ │ │ └── base.cpython-38.pyc │ │ │ │ │ ├── base.py │ │ │ │ │ ├── filesystem_async.py │ │ │ │ │ ├── state_dict_saver.py │ │ │ │ │ ├── tensorstore.py │ │ │ │ │ ├── torch.py │ │ │ │ │ ├── two_stage.py │ │ │ │ │ └── zarr.py │ │ │ │ └── utils.py │ │ │ ├── distributed │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── distributed_data_parallel.cpython-310.pyc │ │ │ │ │ ├── distributed_data_parallel.cpython-38.pyc │ │ │ │ │ ├── distributed_data_parallel_config.cpython-310.pyc │ │ │ │ │ ├── distributed_data_parallel_config.cpython-38.pyc │ │ │ │ │ ├── finalize_model_grads.cpython-310.pyc │ │ │ │ │ ├── finalize_model_grads.cpython-38.pyc │ │ │ │ │ ├── param_and_grad_buffer.cpython-310.pyc │ │ │ │ │ └── param_and_grad_buffer.cpython-38.pyc │ │ │ │ ├── distributed_data_parallel.py │ │ │ │ ├── distributed_data_parallel_config.py │ │ │ │ ├── finalize_model_grads.py │ │ │ │ └── param_and_grad_buffer.py │ │ │ ├── enums.py │ │ │ ├── fusions │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── fused_bias_dropout.cpython-310.pyc │ │ │ │ │ ├── fused_bias_geglu.cpython-310.pyc │ │ │ │ │ ├── fused_bias_gelu.cpython-310.pyc │ │ │ │ │ ├── fused_bias_swiglu.cpython-310.pyc │ │ │ │ │ ├── fused_layer_norm.cpython-310.pyc │ │ │ │ │ ├── fused_layer_norm.cpython-38.pyc │ │ │ │ │ └── fused_softmax.cpython-310.pyc │ │ │ │ ├── fused_bias_dropout.py │ │ │ │ ├── fused_bias_geglu.py │ │ │ │ ├── fused_bias_gelu.py │ │ │ │ ├── fused_bias_swiglu.py │ │ │ │ ├── fused_layer_norm.py │ │ │ │ └── fused_softmax.py │ │ │ ├── inference │ │ │ │ ├── __init__.py │ │ │ │ └── gpt │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── model_specs.py │ │ │ │ │ └── state_dict_hooks.py │ │ │ ├── inference_params.py │ │ │ ├── jit.py │ │ │ ├── model_parallel_config.py │ │ │ ├── models │ │ │ │ ├── T5 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── t5_model.py │ │ │ │ │ └── t5_spec.py │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ └── __init__.cpython-38.pyc │ │ │ │ ├── bert │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bert_layer_specs.py │ │ │ │ │ ├── bert_lm_head.py │ │ │ │ │ ├── bert_model.py │ │ │ │ │ └── pooler.py │ │ │ │ ├── common │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ └── __init__.cpython-38.pyc │ │ │ │ │ ├── embeddings │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ │ ├── language_model_embedding.cpython-310.pyc │ │ │ │ │ │ │ ├── language_model_embedding.cpython-38.pyc │ │ │ │ │ │ │ ├── rotary_pos_embedding.cpython-310.pyc │ │ │ │ │ │ │ └── rotary_pos_embedding.cpython-38.pyc │ │ │ │ │ │ ├── language_model_embedding.py │ │ │ │ │ │ └── rotary_pos_embedding.py │ │ │ │ │ ├── language_module │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ │ ├── language_module.cpython-310.pyc │ │ │ │ │ │ │ └── language_module.cpython-38.pyc │ │ │ │ │ │ └── language_module.py │ │ │ │ │ └── vision_module │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ └── vision_module.cpython-310.pyc │ │ │ │ │ │ └── vision_module.py │ │ │ │ ├── gpt │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ ├── gpt_layer_specs.cpython-310.pyc │ │ │ │ │ │ ├── gpt_model.cpython-310.pyc │ │ │ │ │ │ └── gpt_model.cpython-38.pyc │ │ │ │ │ ├── gpt_layer_specs.py │ │ │ │ │ └── gpt_model.py │ │ │ │ ├── multimodal │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── llava_model.py │ │ │ │ ├── retro │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ ├── base_attention.cpython-310.pyc │ │ │ │ │ │ ├── config.cpython-310.pyc │ │ │ │ │ │ ├── config.cpython-38.pyc │ │ │ │ │ │ ├── decoder_attention.cpython-310.pyc │ │ │ │ │ │ ├── decoder_spec.cpython-310.pyc │ │ │ │ │ │ ├── decoder_spec.cpython-38.pyc │ │ │ │ │ │ ├── encoder_attention.cpython-310.pyc │ │ │ │ │ │ ├── encoder_spec.cpython-310.pyc │ │ │ │ │ │ ├── model.cpython-310.pyc │ │ │ │ │ │ └── utils.cpython-310.pyc │ │ │ │ │ ├── base_attention.py │ │ │ │ │ ├── config.py │ │ │ │ │ ├── decoder_attention.py │ │ │ │ │ ├── decoder_spec.py │ │ │ │ │ ├── encoder_attention.py │ │ │ │ │ ├── encoder_spec.py │ │ │ │ │ ├── model.py │ │ │ │ │ └── utils.py │ │ │ │ └── vision │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── clip_vit_model.cpython-310.pyc │ │ │ │ │ ├── multimodal_projector.cpython-310.pyc │ │ │ │ │ └── vit_layer_specs.cpython-310.pyc │ │ │ │ │ ├── clip_vit_model.py │ │ │ │ │ ├── multimodal_projector.py │ │ │ │ │ └── vit_layer_specs.py │ │ │ ├── optimizer │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── clip_grads.cpython-310.pyc │ │ │ │ │ ├── distrib_optimizer.cpython-310.pyc │ │ │ │ │ ├── grad_scaler.cpython-310.pyc │ │ │ │ │ ├── optimizer.cpython-310.pyc │ │ │ │ │ └── optimizer_config.cpython-310.pyc │ │ │ │ ├── clip_grads.py │ │ │ │ ├── distrib_optimizer.py │ │ │ │ ├── grad_scaler.py │ │ │ │ ├── optimizer.py │ │ │ │ └── optimizer_config.py │ │ │ ├── package_info.py │ │ │ ├── packed_seq_params.py │ │ │ ├── parallel_state.py │ │ │ ├── pipeline_parallel │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── p2p_communication.cpython-310.pyc │ │ │ │ │ └── schedules.cpython-310.pyc │ │ │ │ ├── p2p_communication.py │ │ │ │ └── schedules.py │ │ │ ├── requirements.txt │ │ │ ├── tensor_parallel │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── cross_entropy.cpython-310.pyc │ │ │ │ │ ├── cross_entropy.cpython-38.pyc │ │ │ │ │ ├── data.cpython-310.pyc │ │ │ │ │ ├── data.cpython-38.pyc │ │ │ │ │ ├── layers.cpython-310.pyc │ │ │ │ │ ├── layers.cpython-38.pyc │ │ │ │ │ ├── mappings.cpython-310.pyc │ │ │ │ │ ├── mappings.cpython-38.pyc │ │ │ │ │ ├── random.cpython-310.pyc │ │ │ │ │ ├── random.cpython-38.pyc │ │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ │ └── utils.cpython-38.pyc │ │ │ │ ├── cross_entropy.py │ │ │ │ ├── data.py │ │ │ │ ├── layers.py │ │ │ │ ├── mappings.py │ │ │ │ ├── random.py │ │ │ │ └── utils.py │ │ │ ├── timers.py │ │ │ ├── transformer │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ ├── attention.cpython-310.pyc │ │ │ │ │ ├── dot_product_attention.cpython-310.pyc │ │ │ │ │ ├── enums.cpython-310.pyc │ │ │ │ │ ├── enums.cpython-38.pyc │ │ │ │ │ ├── identity_op.cpython-310.pyc │ │ │ │ │ ├── identity_op.cpython-38.pyc │ │ │ │ │ ├── mlp.cpython-310.pyc │ │ │ │ │ ├── module.cpython-310.pyc │ │ │ │ │ ├── module.cpython-38.pyc │ │ │ │ │ ├── spec_utils.cpython-310.pyc │ │ │ │ │ ├── spec_utils.cpython-38.pyc │ │ │ │ │ ├── transformer_block.cpython-310.pyc │ │ │ │ │ ├── transformer_block.cpython-38.pyc │ │ │ │ │ ├── transformer_config.cpython-310.pyc │ │ │ │ │ ├── transformer_config.cpython-38.pyc │ │ │ │ │ ├── transformer_layer.cpython-310.pyc │ │ │ │ │ ├── transformer_layer.cpython-38.pyc │ │ │ │ │ ├── utils.cpython-310.pyc │ │ │ │ │ └── utils.cpython-38.pyc │ │ │ │ ├── attention.py │ │ │ │ ├── custom_layers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ │ ├── transformer_engine.cpython-310.pyc │ │ │ │ │ │ └── transformer_engine.cpython-38.pyc │ │ │ │ │ └── transformer_engine.py │ │ │ │ ├── dot_product_attention.py │ │ │ │ ├── enums.py │ │ │ │ ├── identity_op.py │ │ │ │ ├── mlp.py │ │ │ │ ├── module.py │ │ │ │ ├── moe │ │ │ │ │ ├── README.md │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __pycache__ │ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ │ ├── experts.cpython-310.pyc │ │ │ │ │ │ ├── grouped_gemm_util.cpython-310.pyc │ │ │ │ │ │ ├── moe_layer.cpython-310.pyc │ │ │ │ │ │ ├── moe_utils.cpython-310.pyc │ │ │ │ │ │ ├── router.cpython-310.pyc │ │ │ │ │ │ └── token_dispatcher.cpython-310.pyc │ │ │ │ │ ├── experts.py │ │ │ │ │ ├── grouped_gemm_util.py │ │ │ │ │ ├── moe_layer.py │ │ │ │ │ ├── moe_utils.py │ │ │ │ │ ├── router.py │ │ │ │ │ └── token_dispatcher.py │ │ │ │ ├── spec_utils.py │ │ │ │ ├── transformer_block.py │ │ │ │ ├── transformer_config.py │ │ │ │ ├── transformer_layer.py │ │ │ │ └── utils.py │ │ │ └── utils.py │ │ ├── inference │ │ │ ├── __init__.py │ │ │ ├── arguments.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ └── model_provider.py │ │ │ ├── static │ │ │ │ └── index.html │ │ │ ├── text_generation │ │ │ │ ├── __init__.py │ │ │ │ ├── api.py │ │ │ │ ├── beam_utils.py │ │ │ │ ├── communication.py │ │ │ │ ├── forward_step.py │ │ │ │ ├── generation.py │ │ │ │ ├── sampling.py │ │ │ │ └── tokenization.py │ │ │ └── text_generation_server.py │ │ ├── legacy │ │ │ ├── data │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── autoaugment.cpython-310.pyc │ │ │ │ │ ├── data_samplers.cpython-310.pyc │ │ │ │ │ ├── image_folder.cpython-310.pyc │ │ │ │ │ └── vit_dataset.cpython-310.pyc │ │ │ │ ├── autoaugment.py │ │ │ │ ├── biencoder_dataset_utils.py │ │ │ │ ├── data_samplers.py │ │ │ │ ├── dataset_utils.py │ │ │ │ ├── ict_dataset.py │ │ │ │ ├── image_folder.py │ │ │ │ ├── multimodal_dataset.py │ │ │ │ ├── orqa_wiki_dataset.py │ │ │ │ ├── realm_dataset_utils.py │ │ │ │ ├── realm_index.py │ │ │ │ └── vit_dataset.py │ │ │ ├── fp16_deprecated │ │ │ │ └── loss_scaler.py │ │ │ ├── fused_kernels │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ └── __init__.cpython-38.pyc │ │ │ │ ├── compat.h │ │ │ │ ├── tests │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_fused_kernels.py │ │ │ │ └── type_shim.h │ │ │ ├── indexer.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ │ ├── bert_model.cpython-310.pyc │ │ │ │ │ ├── enums.cpython-310.pyc │ │ │ │ │ ├── fused_bias_gelu.cpython-310.pyc │ │ │ │ │ ├── fused_layer_norm.cpython-310.pyc │ │ │ │ │ ├── fused_softmax.cpython-310.pyc │ │ │ │ │ ├── gpt_model.cpython-310.pyc │ │ │ │ │ ├── language_model.cpython-310.pyc │ │ │ │ │ ├── module.cpython-310.pyc │ │ │ │ │ ├── rms_norm.cpython-310.pyc │ │ │ │ │ ├── t5_model.cpython-310.pyc │ │ │ │ │ ├── transformer.cpython-310.pyc │ │ │ │ │ └── utils.cpython-310.pyc │ │ │ │ ├── bert_model.py │ │ │ │ ├── biencoder_model.py │ │ │ │ ├── classification.py │ │ │ │ ├── enums.py │ │ │ │ ├── fused_bias_gelu.py │ │ │ │ ├── fused_layer_norm.py │ │ │ │ ├── fused_softmax.py │ │ │ │ ├── gpt_model.py │ │ │ │ ├── language_model.py │ │ │ │ ├── module.py │ │ │ │ ├── multiple_choice.py │ │ │ │ ├── realm_model.py │ │ │ │ ├── rms_norm.py │ │ │ │ ├── t5_model.py │ │ │ │ ├── transformer.py │ │ │ │ ├── utils.py │ │ │ │ └── vision │ │ │ │ │ ├── __pycache__ │ │ │ │ │ └── knn_monitor.cpython-310.pyc │ │ │ │ │ ├── classification.py │ │ │ │ │ ├── dino.py │ │ │ │ │ ├── esvit_swin_backbone.py │ │ │ │ │ ├── inpainting.py │ │ │ │ │ ├── knn_monitor.py │ │ │ │ │ ├── mit_backbone.py │ │ │ │ │ ├── swin_backbone.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── vit_backbone.py │ │ │ └── mpu │ │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── commons.py │ │ │ │ ├── test_cross_entropy.py │ │ │ │ ├── test_data.py │ │ │ │ ├── test_initialize.py │ │ │ │ ├── test_layers.py │ │ │ │ └── test_random.py │ │ └── training │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── arguments.cpython-310.pyc │ │ │ ├── arguments.cpython-38.pyc │ │ │ ├── checkpointing.cpython-310.pyc │ │ │ ├── dist_signal_handler.cpython-310.pyc │ │ │ ├── dist_signal_handler.cpython-38.pyc │ │ │ ├── global_vars.cpython-310.pyc │ │ │ ├── global_vars.cpython-38.pyc │ │ │ ├── initialize.cpython-310.pyc │ │ │ ├── initialize.cpython-38.pyc │ │ │ ├── log_handler.cpython-310.pyc │ │ │ ├── microbatches.cpython-310.pyc │ │ │ ├── microbatches.cpython-38.pyc │ │ │ ├── optimizer_param_scheduler.cpython-310.pyc │ │ │ ├── theoretical_memory_usage.cpython-310.pyc │ │ │ ├── training.cpython-310.pyc │ │ │ ├── utils.cpython-310.pyc │ │ │ └── yaml_arguments.cpython-310.pyc │ │ │ ├── arguments.py │ │ │ ├── checkpointing.py │ │ │ ├── dist_signal_handler.py │ │ │ ├── global_vars.py │ │ │ ├── initialize.py │ │ │ ├── log_handler.py │ │ │ ├── microbatches.py │ │ │ ├── optimizer_param_scheduler.py │ │ │ ├── theoretical_memory_usage.py │ │ │ ├── tokenizer │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── bert_tokenization.cpython-310.pyc │ │ │ │ ├── bert_tokenization.cpython-38.pyc │ │ │ │ ├── gpt2_tokenization.cpython-310.pyc │ │ │ │ ├── gpt2_tokenization.cpython-38.pyc │ │ │ │ ├── tokenizer.cpython-310.pyc │ │ │ │ └── tokenizer.cpython-38.pyc │ │ │ ├── bert_tokenization.py │ │ │ ├── gpt2_tokenization.py │ │ │ └── tokenizer.py │ │ │ ├── training.py │ │ │ ├── utils.py │ │ │ └── yaml_arguments.py │ ├── pretrain_bert.py │ ├── pretrain_gpt.py │ ├── pretrain_ict.py │ ├── pretrain_retro.py │ ├── pretrain_t5.py │ ├── pretrain_vision_classify.py │ ├── pretrain_vision_dino.py │ ├── pretrain_vision_inpaint.py │ ├── pretrain_vlm.py │ ├── pyproject.toml │ ├── report_theoretical_memory.py │ ├── setup.py │ ├── tasks │ │ ├── data_utils.py │ │ ├── ensemble_classifier.py │ │ ├── eval_utils.py │ │ ├── finetune_utils.py │ │ ├── glue │ │ │ ├── data.py │ │ │ ├── finetune.py │ │ │ ├── mnli.py │ │ │ └── qqp.py │ │ ├── main.py │ │ ├── msdp │ │ │ ├── README.md │ │ │ ├── evaluate.py │ │ │ ├── main.py │ │ │ ├── metrics.py │ │ │ ├── preprocessing.py │ │ │ └── prompt.py │ │ ├── orqa │ │ │ ├── README.md │ │ │ ├── evaluate_orqa.py │ │ │ ├── evaluate_utils.py │ │ │ ├── supervised │ │ │ │ ├── data.py │ │ │ │ ├── eval_utils.py │ │ │ │ └── finetune.py │ │ │ └── unsupervised │ │ │ │ ├── nq.py │ │ │ │ ├── qa_utils.py │ │ │ │ └── tokenizers.py │ │ ├── race │ │ │ ├── data.py │ │ │ └── finetune.py │ │ ├── vision │ │ │ ├── classification │ │ │ │ ├── classification.py │ │ │ │ └── eval_utils.py │ │ │ ├── finetune_utils.py │ │ │ ├── main.py │ │ │ └── segmentation │ │ │ │ ├── cityscapes.py │ │ │ │ ├── data.py │ │ │ │ ├── finetune_segformer.py │ │ │ │ ├── finetune_setr.py │ │ │ │ ├── metrics.py │ │ │ │ ├── seg_heads.py │ │ │ │ ├── seg_models.py │ │ │ │ ├── transforms.py │ │ │ │ └── utils.py │ │ └── zeroshot_gpt │ │ │ ├── datasets.py │ │ │ ├── detokenizer.py │ │ │ └── evaluate.py │ ├── tests │ │ ├── __init__.py │ │ ├── functional_tests │ │ │ ├── __init__.py │ │ │ ├── jet_recipes │ │ │ │ ├── MR-bert.yaml │ │ │ │ ├── MR-gpt.yaml │ │ │ │ ├── MR-multimodal.yaml │ │ │ │ ├── MR-t5.yaml │ │ │ │ ├── build-pyt.yaml │ │ │ │ ├── local-generator.py │ │ │ │ ├── monthly-t5.yaml │ │ │ │ ├── nightly-bert.yaml │ │ │ │ ├── nightly-gpt.yaml │ │ │ │ └── weekly-gpt.yaml │ │ │ ├── python_test_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── check_slurm_job_completion.py │ │ │ │ ├── common.py │ │ │ │ ├── get_test_results_from_tensorboard_logs.py │ │ │ │ ├── jet_test_pipeline.py │ │ │ │ ├── multitest_ci_pipeline.py │ │ │ │ ├── test_ci_pipeline.py │ │ │ │ ├── test_fp8_ci_pipeline.py │ │ │ │ └── test_resume_checkpoint_pipeline.py │ │ │ ├── shell_test_utils │ │ │ │ ├── jobwait.sh │ │ │ │ ├── run_selene_test_launcher_script.sh │ │ │ │ └── run_selene_test_resume_checkpoint_launcher_script.sh │ │ │ ├── test_results │ │ │ │ ├── bert │ │ │ │ │ ├── bert_tp1_pp2_1nodes_50steps.json │ │ │ │ │ ├── bert_tp1_pp2_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── bert_tp1_pp2_1nodes_50steps_core_enabled_rope_embeddings.json │ │ │ │ │ ├── bert_tp1_pp2_1nodes_50steps_core_enabled_sequence_parallel.json │ │ │ │ │ ├── bert_tp1_pp4_1nodes_50steps.json │ │ │ │ │ ├── bert_tp1_pp4_interleaved_1nodes_50steps.json │ │ │ │ │ ├── bert_tp1_pp4_interleaved_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── bert_tp2_pp2_1nodes_50steps.json │ │ │ │ │ ├── bert_tp2_pp2_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── bert_tp2_pp2_1nodes_50steps_core_enabled_local_spec.json │ │ │ │ │ ├── bert_tp4_pp1_1nodes_50steps.json │ │ │ │ │ └── bert_tp4_pp1_1nodes_50steps_core_enabled.json │ │ │ │ ├── gpt3 │ │ │ │ │ ├── gpt3_tp1_pp1_1nodes_50steps_dist_optimizer.json │ │ │ │ │ ├── gpt3_tp1_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp1_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce_param_gather.json │ │ │ │ │ ├── gpt3_tp1_pp1_1nodes_50steps_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp1_pp2_1nodes_50steps.json │ │ │ │ │ ├── gpt3_tp1_pp2_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── gpt3_tp1_pp2_1nodes_50steps_core_enabled_rope_embeddings.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_disable_bias_linear.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_sequence_parallel.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_swiglu.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_untie_embeddings_and_outputs.json │ │ │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp1_pp4_interleaved_1nodes_50steps.json │ │ │ │ │ ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_dist_optimizer_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_dist_optimizer_overlap_grad_reduce_param_gather.json │ │ │ │ │ ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_context_parallelism_cp2.json │ │ │ │ │ ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_te_8experts2parallel.json │ │ │ │ │ ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_te_8experts2parallel_groupedGEMM.json │ │ │ │ │ ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_te_8experts2parallel_top2router.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_4experts.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled_context_parallelism_cp2.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled_te_2experts.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled_te_4experts2parallel.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps_te_enabled.json │ │ │ │ │ ├── gpt3_tp4_pp1_1nodes_50steps.json │ │ │ │ │ ├── gpt3_tp4_pp1_1nodes_50steps_core_enabled.json │ │ │ │ │ ├── gpt3_tp4_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce.json │ │ │ │ │ ├── gpt3_tp4_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce_param_gather.json │ │ │ │ │ └── gpt3_tp4_pp1_1nodes_50steps_overlap_grad_reduce.json │ │ │ │ ├── jet │ │ │ │ │ ├── bert-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2-local-spec.json │ │ │ │ │ ├── bert-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2.json │ │ │ │ │ ├── bert-345m-merge-request-dgx-a100-1n8g-tp1-pp4-vp2.json │ │ │ │ │ ├── bert-345m-merge-request-dgx-a100-1n8g-tp2-pp2.json │ │ │ │ │ ├── bert-345m-merge-request-resume-dgx-a100-1n8g-tp1-pp2.json │ │ │ │ │ ├── bert-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp2.json │ │ │ │ │ ├── bert-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp4-vp2.json │ │ │ │ │ ├── bert-345m-nightly-dgx-a100-1n8g-mcore-tp4-pp1.json │ │ │ │ │ ├── bert-345m-nightly-dgx-a100-1n8g-tp1-pp2.json │ │ │ │ │ ├── bert-345m-nightly-dgx-a100-1n8g-tp4-pp1.json │ │ │ │ │ ├── dgx_h100 │ │ │ │ │ │ ├── bert_345m_mcore-pyt_merge-request-resume_bf16_nodes-1_gpus-8_bs-128_steps-100_tp-1_pp-2_mcore-false_te-false.json │ │ │ │ │ │ ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-1_pp-4_mcore-false_te-false_vp-2.json │ │ │ │ │ │ ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-2_pp-2_args-local-spec_mcore-true_te-false.json │ │ │ │ │ │ ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-2_pp-2_mcore-false_te-false.json │ │ │ │ │ │ ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-2_pp-2_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request-resume_bf16_nodes-1_gpus-8_bs-32_steps-100_tp-1_pp-2_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args--recompute-granularity-full-recompute-method-uniform-recompute-num-layers-1-_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args-dist-optimizer-no-mmap-bin-files_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args-dist-optimizer_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args-uniform-full-recompute_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_args--position-embedding-type-rope-_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_args-rope-embeddings-interleaved-no-fusion_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_args-rope-embeddings_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--disable-bias-linear_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--sequence-parallel_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--swiglu_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--untie-embeddings-and-output-weights_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-disable-bias-linear_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-dist-optimizer-overlap-grad-reduce-param-gather_mcore-true_te-false_vp-1.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-dist-optimizer-overlap-grad-reduce-untied_mcore-true_te-false_vp-1.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-dist-optimizer-overlap-grad-reduce_mcore-true_te-false_vp-1.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-sequence-parallel_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-swiglu_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-untie-embeddings-and-outputs_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-false_te-false_vp-1.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-true_te-false_vp-1.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args--sequence-parallel-num-experts-8-expert-model-parallel-size-2-_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel-dist-optimizer_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel-groupedgemm_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel-top2router_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args-no-mmap-bin-files_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_mcore-false_te-true.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_args-dist-optimizer-overlap-grad-reduce-param-gather_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_args-dist-optimizer-overlap-grad-reduce_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args--overlap-grad-reduce_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args--use-distributed-optimizer-overlap-grad-reduce-_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--overlap-grad-reduce_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--overlap-grad-reduce_mcore-false_te-false_vp-1.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--num-experts-2-_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--num-experts-4-_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--overlap-grad-reduce_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--sequence-parallel-num-experts-4-expert-model-parallel-size-2-_mcore-true_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_args--overlap-grad-reduce_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_mcore-false_te-false.json │ │ │ │ │ │ ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_mcore-true_te-false.json │ │ │ │ │ │ └── t5_220m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-100_tp-1_pp-1_mcore-true_te-true_vp-1.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp1-dist-optimizer-no-mmap-bin-files.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp1-dist-optimizer.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp1-uniform-full-recompute.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp2-rope-embeddings-interleaved-no-fusion.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp2-rope-embeddings.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-disable-bias-linear.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-sequence-parallel.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-swiglu.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-untie-embeddings-and-outputs.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-decoupled-lr.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-dist-optimizer-overlap-grad-reduce-param-gather.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-dist-optimizer-overlap-grad-reduce-untied.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-dist-optimizer-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-dist-optimizer.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-groupedgemm.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-overlap-grad-reduce-param-gather-groupedgemm.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-top2router.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2-no-create-attention-mask-in-dataloader.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2-no-mmap-bin-files.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp4-pp1-dist-optimizer-overlap-grad-reduce-param-gather.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp4-pp1-dist-optimizer-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp4-pp1.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-te-tp2-pp2.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-tp1-pp4-vp1.json │ │ │ │ │ ├── gpt3-345m-merge-request-dgx-a100-1n8g-tp2-pp2.json │ │ │ │ │ ├── gpt3-345m-merge-request-resume-dgx-a100-1n8g-tp1-pp2.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp1-dist-optimizer-overlap-grad-reduce-param-gather.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp2.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp4.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp2-pp2-te-2experts.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp2-pp2-te-4experts2parallel.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp4-pp1.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp1-dist-optimizer-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp1-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp2.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp4-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp4-vp1-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp4.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp2-pp2-4experts.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp2-pp2-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp4-pp1-overlap-grad-reduce.json │ │ │ │ │ ├── gpt3-345m-nightly-dgx-a100-1n8g-tp4-pp1.json │ │ │ │ │ ├── gpt3-345m-weekly-dgx-h100-1n8g-mcore-tp1-pp1-bf16-baseline.json │ │ │ │ │ ├── gpt3_345m_mcore-pyt_func-train_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--sequence-parallel.json │ │ │ │ │ ├── multimodal-llava-merge-request-dgx-a100-1n8g-mcore-te-tp1-pp1.json │ │ │ │ │ └── t5-220m-merge-request-dgx-a100-1n8g-mcore-te-tp1-pp1-vp1.json │ │ │ │ ├── retro │ │ │ │ │ └── retro_tp1_pp1_1nodes_50steps_core_enabled.json │ │ │ │ └── t5 │ │ │ │ │ └── t5_tp1_pp1_interleaved_1nodes_100steps_te_enabled_core_enabled.json │ │ │ └── test_scripts │ │ │ │ ├── bert │ │ │ │ ├── pretrain_bert_distributed_test.sh │ │ │ │ ├── sbatch_bert_distributed_resume_checkpoint_test.sh │ │ │ │ └── sbatch_bert_distributed_test.sh │ │ │ │ ├── gpt3 │ │ │ │ ├── pretrain_gpt3_distributed_test.sh │ │ │ │ ├── sbatch_gpt3_distributed_resume_checkpoint_test.sh │ │ │ │ └── sbatch_gpt3_distributed_test.sh │ │ │ │ ├── multimodal │ │ │ │ └── pretrain_llava_distributed_test.sh │ │ │ │ ├── retro │ │ │ │ ├── pretrain_retro_distributed_test.sh │ │ │ │ ├── sbatch_retro_distributed_resume_checkpoint_test.sh │ │ │ │ └── sbatch_retro_distributed_test.sh │ │ │ │ └── t5 │ │ │ │ ├── pretrain_t5_distributed_test.sh │ │ │ │ ├── sbatch_t5_distributed_resume_checkpoint_test.sh │ │ │ │ └── sbatch_t5_distributed_test.sh │ │ └── unit_tests │ │ │ ├── __init__.py │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── test_builder.py │ │ │ ├── test_mock_gpt_dataset.py │ │ │ ├── test_multimodal_dataset.py │ │ │ ├── test_preprocess_data.py │ │ │ └── test_preprocess_mmdata.py │ │ │ ├── dist_checkpointing │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── common.py │ │ │ │ ├── test_bert_model.py │ │ │ │ ├── test_gpt_model.py │ │ │ │ ├── test_mlp_glu.py │ │ │ │ ├── test_retro_model.py │ │ │ │ ├── test_sequential_mlp.py │ │ │ │ └── test_t5_model.py │ │ │ ├── test_mapping.py │ │ │ ├── test_optimizer.py │ │ │ └── test_serialization.py │ │ │ ├── fusions │ │ │ └── test_torch_softmax.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── test_base_embedding.py │ │ │ ├── test_bert_model.py │ │ │ ├── test_clip_vit_model.py │ │ │ ├── test_gpt_model.py │ │ │ ├── test_llava_model.py │ │ │ ├── test_multimodal_projector.py │ │ │ └── test_t5_model.py │ │ │ ├── pipeline_parallel │ │ │ ├── __init__.py │ │ │ └── test_schedules.py │ │ │ ├── tensor_parallel │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialization.py │ │ │ ├── test_mappings.py │ │ │ ├── test_random.py │ │ │ └── test_tensor_parallel_utils.py │ │ │ ├── test_basic.py │ │ │ ├── test_imports.py │ │ │ ├── test_parallel_state.py │ │ │ ├── test_training.py │ │ │ ├── test_utilities.py │ │ │ ├── test_utils.py │ │ │ └── transformer │ │ │ ├── __init__.py │ │ │ ├── moe │ │ │ ├── __init__.py │ │ │ ├── test_grouped_mlp.py │ │ │ ├── test_routers.py │ │ │ ├── test_sequential_mlp.py │ │ │ └── test_token_dispatcher.py │ │ │ ├── test_attention.py │ │ │ ├── test_attention_packed_seq.py │ │ │ ├── test_core_attention.py │ │ │ ├── test_mlp.py │ │ │ ├── test_module.py │ │ │ ├── test_retro_attention.py │ │ │ ├── test_spec_customization.py │ │ │ ├── test_transformer_block.py │ │ │ └── test_transformer_layer.py │ └── tools │ │ ├── autoformat.sh │ │ ├── bert_embedding │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── embed.py │ │ ├── external_libs.py │ │ └── huggingface.py │ │ ├── checkpoint │ │ ├── convert.py │ │ ├── loader_llama2.py │ │ ├── loader_llama2_hf.py │ │ ├── loader_mcore.py │ │ ├── loader_megatron.py │ │ ├── saver_mcore.py │ │ ├── saver_megatron.py │ │ ├── setter.py │ │ └── utils.py │ │ ├── linter.py │ │ ├── merge_datasets.py │ │ ├── openwebtext │ │ ├── README.md │ │ ├── add_id.py │ │ ├── blacklist_urls.py │ │ ├── cleanup_dataset.py │ │ ├── cleanup_fix_dataset.py │ │ ├── filter_ngrams.py │ │ ├── find_duplicates.py │ │ ├── group_duplicate_url.py │ │ ├── merge_jsons.py │ │ └── remove_group_duplicates.py │ │ ├── preprocess_data.py │ │ ├── preprocess_data_nmt.py │ │ ├── preprocess_mmdata.py │ │ ├── retro │ │ ├── README.md │ │ ├── build_db.md │ │ ├── cli │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ └── cli.py │ │ ├── config_utils.py │ │ ├── docker │ │ │ └── Dockerfile │ │ ├── preprocess_data.py │ │ ├── sft │ │ │ ├── README.md │ │ │ ├── dataset_conv.py │ │ │ ├── open_inst.sh │ │ │ ├── sft_retro.py │ │ │ └── sft_retro_lm.sh │ │ └── text_generation │ │ │ ├── evaluate.py │ │ │ ├── metrics.py │ │ │ ├── retro_api.py │ │ │ ├── retro_generate.sh │ │ │ ├── retro_generation.py │ │ │ └── retro_text_generation.py │ │ ├── run_text_generation_server.py │ │ └── text_generation_cli.py ├── README.md ├── README_zh-CN.md ├── examples │ ├── .DS_Store │ └── idefics2 │ │ ├── .DS_Store │ │ ├── pretrain_megatron_idefics2.py │ │ ├── run_cot_cmd.sh │ │ └── train_llava_instruct_webdataset_cot.sh ├── megatron_patch │ ├── .DS_Store │ ├── __init__.py │ ├── arguments.py │ ├── data │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── bloom.py │ │ ├── data_sampler.py │ │ ├── glm.py │ │ ├── idefics2 │ │ │ ├── constants.py │ │ │ ├── idefics2_image_processor.py │ │ │ └── mm_pretrain_dataset.py │ │ ├── llama.py │ │ ├── llava │ │ │ ├── constants.py │ │ │ ├── conversation.py │ │ │ ├── cvcuda_image_processing_clip.py │ │ │ ├── mm_pretrain_dataset.py │ │ │ └── mm_utils.py │ │ ├── qwen_vl.py │ │ ├── starcoder.py │ │ └── utils.py │ ├── finetune_utils.py │ ├── generation │ │ ├── api.py │ │ ├── generation.py │ │ ├── gpt_predictor.py │ │ ├── megatron.md │ │ └── tokenization.py │ ├── initialize.py │ ├── lm_evaluate.py │ ├── model │ │ ├── __init__.py │ │ ├── baichuan │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── baichuan2 │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── layers.py │ │ │ └── transformer.py │ │ ├── bloom │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── layers.py │ │ │ ├── positional_embeddings.py │ │ │ └── transformer.py │ │ ├── chatglm │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── positional_embeddings.py │ │ │ └── transformer.py │ │ ├── falcon │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── falcon40b │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── galactica │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── glm130b │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── idefics2 │ │ │ ├── __init__.py │ │ │ ├── get_idefics2vit_layer_spec.py │ │ │ ├── gpt_model.py │ │ │ ├── idefics_vision_tower.py │ │ │ ├── idefics_vlm_model.py │ │ │ ├── language_model.py │ │ │ ├── language_model_llama3.py │ │ │ ├── perceiver_transformer.py │ │ │ ├── rotary_pos_embedding.py │ │ │ ├── rotary_pos_embedding_llama3.py │ │ │ └── transformer.py │ │ ├── llama │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── positional_embeddings.py │ │ │ └── transformer.py │ │ ├── llama2 │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── rotary_pos_embedding.py │ │ │ └── transformer.py │ │ ├── llama3 │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── llava │ │ │ ├── __init__.py │ │ │ ├── clip_encoder.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── mm_projector_builder.py │ │ │ ├── rotary_pos_embedding.py │ │ │ ├── transformer.py │ │ │ └── vlm_model.py │ │ ├── mistral │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── modeling_attn_mask_utils.py │ │ │ ├── rotary_pos_embedding.py │ │ │ └── transformer.py │ │ ├── mixtral │ │ │ ├── __init__.py │ │ │ ├── layer_specs.py │ │ │ ├── model.py │ │ │ ├── moe │ │ │ │ ├── __init__.py │ │ │ │ ├── experts.py │ │ │ │ ├── grouped_gemm_util.py │ │ │ │ ├── moe_layer.py │ │ │ │ ├── moe_utils.py │ │ │ │ ├── router.py │ │ │ │ └── token_dispatcher.py │ │ │ ├── transformer │ │ │ │ ├── attention.py │ │ │ │ └── mlp.py │ │ │ └── transformer_config.py │ │ ├── qwen │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ │ ├── qwen1_5 │ │ │ ├── __init__.py │ │ │ ├── layer_specs.py │ │ │ ├── model.py │ │ │ ├── moe │ │ │ │ ├── __init__.py │ │ │ │ ├── experts.py │ │ │ │ ├── moe_layer.py │ │ │ │ ├── router.py │ │ │ │ └── token_dispatcher.py │ │ │ └── transformer │ │ │ │ ├── attention.py │ │ │ │ └── mlp.py │ │ ├── qwen1_5_megablocks │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── rotary_pos_embedding.py │ │ │ └── transformer.py │ │ ├── qwen_vl │ │ │ ├── __init__.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── transformer.py │ │ │ └── visual.py │ │ └── starcoder │ │ │ ├── __init__.py │ │ │ ├── enums.py │ │ │ ├── glu_activations.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ └── transformer.py │ ├── tokenizer │ │ ├── __init__.py │ │ ├── icetk_glm130b_tokenizer.py │ │ ├── jiebabpe_tokenizer.py │ │ ├── tokenization_baichuan.py │ │ ├── tokenization_qwen_vl.py │ │ └── tokenization_yi.py │ └── training.py └── toolkits │ ├── .DS_Store │ ├── model_checkpoints_convertor │ ├── .DS_Store │ └── idefics2 │ │ ├── clip_convertor.py │ │ ├── idefics2_hf2mg.py │ │ ├── idefics2_hf2mg_llama3.py │ │ ├── megatron_2hf.sh │ │ ├── mg2hf.sh │ │ ├── mg2hf_base_idefics_instruct.sh │ │ ├── model_convertor.sh │ │ └── model_convertor_llama3.sh │ └── pretrain_data_preprocessing │ ├── .DS_Store │ └── move_bulk_data.py ├── method_figure.jpg ├── reformat_data ├── .DS_Store ├── convert_to_llava_format.sh ├── convert_to_llava_format_with_gold_label_diff.py ├── convert_to_llava_format_with_pos_cot.py ├── convert_to_llava_format_with_pos_diff_equation.py ├── diff_utils.py └── utils.py ├── requirement.txt └── sample_data ├── .DS_Store ├── __init__.py ├── gather_gpt4_prompt ├── .DS_Store ├── completion_utils.py ├── get_prompt_phrase.py └── room_constant.py ├── generate_trajectories.py ├── regenerate_init_position.py └── utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.DS_Store -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/.gitignore -------------------------------------------------------------------------------- /.idea/DivScene_release.iml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/DivScene_release.iml -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/inspectionProfiles/Project_Default.xml -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/inspectionProfiles/profiles_settings.xml -------------------------------------------------------------------------------- /.idea/jupyter-settings.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/jupyter-settings.xml -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/misc.xml -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/modules.xml -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/vcs.xml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/README.md -------------------------------------------------------------------------------- /agent_inference/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/.DS_Store -------------------------------------------------------------------------------- /agent_inference/hf_idefics2_gpt4o.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_gpt4o.py -------------------------------------------------------------------------------- /agent_inference/hf_idefics2_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_metric.py -------------------------------------------------------------------------------- /agent_inference/hf_idefics2_online_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_online_client.py -------------------------------------------------------------------------------- /agent_inference/hf_idefics2_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_server.py -------------------------------------------------------------------------------- /agent_inference/run_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/run_client.sh -------------------------------------------------------------------------------- /agent_inference/run_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/run_server.sh -------------------------------------------------------------------------------- /agent_inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/utils.py -------------------------------------------------------------------------------- /agent_training/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/.DS_Store -------------------------------------------------------------------------------- /agent_training/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/LICENSE -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/.DS_Store -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/CODEOWNERS -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/CONTRIBUTING.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/Dockerfile.ci: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/Dockerfile.ci -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/Dockerfile.test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/Dockerfile.test -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/LICENSE -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include megatron/core/requirements.txt 2 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/llama2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/llama2.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/context_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/context_parallel.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/datasets.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/datasets.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/dist_checkpointing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/dist_checkpointing.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/distributed.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/distributed.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/fusions.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/fusions.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/index.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/models.bert.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.bert.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/models.gpt.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.gpt.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/models.t5.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.t5.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/moe.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/moe.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/pipeline_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/pipeline_parallel.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/tensor_parallel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/tensor_parallel.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/api-guide/transformer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/transformer.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/distrib_optimizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/distrib_optimizer.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_overview.png -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_results.png -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/images/distrib_optimizer/data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/images/distrib_optimizer/data_flow.png -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/index.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/docs/source/user-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/user-guide/index.rst -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/bert/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/bert/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/bert/train_bert_340m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/bert/train_bert_340m_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/detxoify_lm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/detxoify_lm/annotations/preprocess.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/annotations/preprocess.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/detxoify_lm/finetune_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/finetune_gpt.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/detxoify_lm/generate-1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/generate-1.3b.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/detxoify_lm/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/generate_samples_gpt.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/detxoify_lm/perspective_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/perspective_api.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/evaluate_retriever_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/evaluate_retriever_nq.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/finetune_retriever_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/finetune_retriever_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/gpt3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/gpt3/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/gpt3/gpt_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/gpt3/gpt_config.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/gpt3/train_gpt3_175b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/gpt3/train_gpt3_175b_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_llama_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_llama_7b.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_nemotron3_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_nemotron3_8b.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/inference/text_generation_ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/text_generation_ptq.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/inference/trtllm_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/trtllm_text_generation.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/data_processing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/data_processing.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/eval_knwl_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/eval_knwl_generation.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/eval_resp_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/eval_resp_generation.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/prep_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/prep_resp_gen.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/prompt_knwl_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/prompt_knwl_gen.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/msdp/prompt_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/prompt_resp_gen.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_vision_classify.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vision_classify.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_vision_dino.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vision_dino.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_vision_inpaint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vision_inpaint.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/pretrain_vlm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vlm.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/retro/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/retro/preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/retro/preprocess_data.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/retro/train_retro_2b_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/retro/train_retro_2b_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/run_simple_mcore_train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/run_simple_mcore_train_loop.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/CONFIG.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/SBATCH.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/SRUN.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_table_1.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/t5/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/t5/t5_mcore_train_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/t5/t5_mcore_train_curve.png -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/examples/t5/train_t5_220m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/t5/train_t5_220m_distributed.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/images/Achieved_petaFLOPs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/images/Achieved_petaFLOPs.png -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/images/cases_april2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/images/cases_april2021.png -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/jet-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/jet-tests.yml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/QuickStart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/QuickStart.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/README_STRAGGLER.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/README_STRAGGLER.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-310.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-38.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-310.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-38.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-310.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-38.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/Makefile -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/bert_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/blended_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/gpt_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/helpers.cpp -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/indexed_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/masked_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/masked_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_tokenizer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/multimodal_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/readme.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/config.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/tokenizers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/build.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/external_libs.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/build.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/factory.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/index.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/validate.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/query.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/retro_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/retro_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/t5_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/core.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/dict_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/mapping.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/optimizer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/serialization.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/base.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/torch.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/zarr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/zarr.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/distributed/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/distributed/finalize_model_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/distributed/finalize_model_grads.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/distributed/param_and_grad_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/distributed/param_and_grad_buffer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/enums.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_dropout.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_geglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_geglu.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_gelu.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_swiglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_swiglu.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_layer_norm.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/fusions/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_softmax.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/inference/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/inference/gpt/model_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/model_specs.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/inference/gpt/state_dict_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/state_dict_hooks.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference_params.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/jit.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/model_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/model_parallel_config.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/T5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/T5/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_spec.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_layer_specs.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_lm_head.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/bert/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/pooler.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/common/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/common/language_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/common/vision_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/gpt/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_layer_specs.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/multimodal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/multimodal/llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/multimodal/llava_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/base_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/base_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/config.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_spec.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_spec.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/vision/clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/vision/clip_vit_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/vision/multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/vision/multimodal_projector.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/models/vision/vit_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/vision/vit_layer_specs.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/clip_grads.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer_config.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/package_info.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/packed_seq_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/packed_seq_params.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/p2p_communication.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/schedules.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/requirements.txt: -------------------------------------------------------------------------------- 1 | torch -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/timers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/custom_layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/dot_product_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/dot_product_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/enums.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/identity_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/identity_op.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/mlp.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/module.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/experts.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/grouped_gemm_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/grouped_gemm_util.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_layer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/router.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/token_dispatcher.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/spec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/spec_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_block.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_config.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_layer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/transformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/arguments.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/gpt/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/gpt/model_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/gpt/model_provider.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/static/index.html -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/api.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/beam_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/beam_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/communication.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/forward_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/forward_step.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/generation.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/sampling.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/tokenization.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/inference/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation_server.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/autoaugment.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/data_samplers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/dataset_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/ict_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/image_folder.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/multimodal_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/realm_index.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/vit_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/compat.h -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/indexer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/bert_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/biencoder_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/classification.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/enums.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_layer_norm.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_softmax.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/gpt_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/language_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/module.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/multiple_choice.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/realm_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/rms_norm.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/t5_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/transformer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/classification.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/dino.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/esvit_swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/esvit_swin_backbone.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/inpainting.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/knn_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/knn_monitor.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/mit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/mit_backbone.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/swin_backbone.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/model/vision/vit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/vit_backbone.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/commons.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_random.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/arguments.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/checkpointing.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/dist_signal_handler.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/global_vars.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/initialize.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/log_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/log_handler.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/microbatches.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/theoretical_memory_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/theoretical_memory_usage.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/training.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/megatron/training/yaml_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/yaml_arguments.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_bert.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_gpt.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_ict.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_retro.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_t5.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_vision_classify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vision_classify.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_vision_dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vision_dino.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_vision_inpaint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vision_inpaint.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pretrain_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vlm.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pyproject.toml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/report_theoretical_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/report_theoretical_memory.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/setup.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/data_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/eval_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/finetune_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/finetune.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/mnli.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/qqp.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/main.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/msdp/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/evaluate.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/msdp/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/main.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/msdp/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/metrics.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/msdp/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/preprocessing.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/msdp/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/prompt.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/race/data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/race/finetune.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/classification/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/classification/classification.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/classification/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/classification/eval_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/main.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/cityscapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/cityscapes.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_segformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_segformer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_setr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_setr.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/metrics.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_heads.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_models.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/transforms.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/vision/segmentation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-bert.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-gpt.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-t5.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/build-pyt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/build-pyt.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/monthly-t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/monthly-t5.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-bert.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-gpt.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/weekly-gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/weekly-gpt.yaml -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/python_test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/python_test_utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/python_test_utils/common.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/functional_tests/shell_test_utils/jobwait.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/shell_test_utils/jobwait.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/data/test_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_builder.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/data/test_mock_gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_mock_gpt_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/data/test_multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_multimodal_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_mmdata.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/conftest.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/models/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/models/common.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/test_mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/test_mapping.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/fusions/test_torch_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/fusions/test_torch_softmax.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_base_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_base_embedding.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_bert_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_clip_vit_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_gpt_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_llava_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_multimodal_projector.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/models/test_t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_t5_model.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/pipeline_parallel/test_schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/pipeline_parallel/test_schedules.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_mappings.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_random.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_basic.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/test_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_imports.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_parallel_state.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_training.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/test_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_utilities.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_grouped_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_grouped_mlp.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_routers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_routers.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_core_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_core_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_mlp.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_module.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_retro_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_retro_attention.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/autoformat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/autoformat.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/bert_embedding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/bert_embedding/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/bert_embedding/embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/embed.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/bert_embedding/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/external_libs.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/bert_embedding/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/huggingface.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/convert.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2_hf.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/loader_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_mcore.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/loader_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_megatron.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/saver_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/saver_mcore.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/saver_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/saver_megatron.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/setter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/setter.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/checkpoint/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/linter.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/merge_datasets.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/preprocess_data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/preprocess_data_nmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/preprocess_data_nmt.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/preprocess_mmdata.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/build_db.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/build_db.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/cli/__init__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/cli/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/cli/__main__.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/cli/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/cli/cli.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/config_utils.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/docker/Dockerfile -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/preprocess_data.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/sft/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/README.md -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/sft/dataset_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/dataset_conv.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/sft/open_inst.sh: -------------------------------------------------------------------------------- 1 | DATA_BLEND="1.0 open_inst" 2 | -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro_lm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro_lm.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/text_generation/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/evaluate.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/text_generation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/metrics.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_api.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generate.sh -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generation.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /agent_training/Megatron-LM-240424/tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/text_generation_cli.py -------------------------------------------------------------------------------- /agent_training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/README.md -------------------------------------------------------------------------------- /agent_training/README_zh-CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/README_zh-CN.md -------------------------------------------------------------------------------- /agent_training/examples/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/.DS_Store -------------------------------------------------------------------------------- /agent_training/examples/idefics2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/.DS_Store -------------------------------------------------------------------------------- /agent_training/examples/idefics2/pretrain_megatron_idefics2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/pretrain_megatron_idefics2.py -------------------------------------------------------------------------------- /agent_training/examples/idefics2/run_cot_cmd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/run_cot_cmd.sh -------------------------------------------------------------------------------- /agent_training/examples/idefics2/train_llava_instruct_webdataset_cot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/train_llava_instruct_webdataset_cot.sh -------------------------------------------------------------------------------- /agent_training/megatron_patch/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/.DS_Store -------------------------------------------------------------------------------- /agent_training/megatron_patch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/arguments.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/.DS_Store -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/bloom.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/data_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/data_sampler.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/glm.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/idefics2/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/idefics2/constants.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/idefics2/idefics2_image_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/idefics2/idefics2_image_processor.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/idefics2/mm_pretrain_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/idefics2/mm_pretrain_dataset.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llama.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/llava/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/constants.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/llava/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/conversation.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/llava/cvcuda_image_processing_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/cvcuda_image_processing_clip.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/llava/mm_pretrain_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/mm_pretrain_dataset.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/llava/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/mm_utils.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/qwen_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/qwen_vl.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/starcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/starcoder.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/utils.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/finetune_utils.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/api.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/generation.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/generation/gpt_predictor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/gpt_predictor.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/generation/megatron.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/megatron.md -------------------------------------------------------------------------------- /agent_training/megatron_patch/generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/tokenization.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/initialize.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/lm_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/lm_evaluate.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan2/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan2/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan2/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/layers.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/baichuan2/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/bloom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/bloom/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/bloom/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/bloom/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/layers.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/bloom/positional_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/positional_embeddings.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/bloom/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/chatglm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/chatglm/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/chatglm/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/chatglm/positional_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/positional_embeddings.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/chatglm/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon40b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon40b/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon40b/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/falcon40b/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/galactica/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/galactica/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/galactica/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/galactica/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/glm130b/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/glm130b/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/glm130b/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/glm130b/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/get_idefics2vit_layer_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/get_idefics2vit_layer_spec.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/idefics_vision_tower.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/idefics_vision_tower.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/idefics_vlm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/idefics_vlm_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/language_model_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/language_model_llama3.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/perceiver_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/perceiver_transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/rotary_pos_embedding.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/rotary_pos_embedding_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/rotary_pos_embedding_llama3.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/idefics2/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama/positional_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/positional_embeddings.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama2/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama2/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama2/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/rotary_pos_embedding.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama2/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama3/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama3/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama3/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama3/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llama3/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama3/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/clip_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/clip_encoder.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/mm_projector_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/mm_projector_builder.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/rotary_pos_embedding.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/llava/vlm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/vlm_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mistral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mistral/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mistral/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mistral/modeling_attn_mask_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/modeling_attn_mask_utils.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mistral/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/rotary_pos_embedding.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mistral/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/layer_specs.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/experts.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/grouped_gemm_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/grouped_gemm_util.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/moe_layer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/moe_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/moe_utils.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/router.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/token_dispatcher.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/transformer/attention.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/transformer/mlp.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/mixtral/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/transformer_config.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/layer_specs.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/experts.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/moe_layer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/router.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/token_dispatcher.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/transformer/attention.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/transformer/mlp.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5_megablocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5_megablocks/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5_megablocks/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5_megablocks/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/rotary_pos_embedding.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen1_5_megablocks/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen_vl/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen_vl/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen_vl/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/qwen_vl/visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/visual.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/starcoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/starcoder/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/enums.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/starcoder/glu_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/glu_activations.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/starcoder/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/gpt_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/starcoder/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/language_model.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/model/starcoder/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/transformer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/__init__.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/tokenizer/icetk_glm130b_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/icetk_glm130b_tokenizer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/tokenizer/jiebabpe_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/jiebabpe_tokenizer.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/tokenizer/tokenization_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/tokenization_baichuan.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/tokenizer/tokenization_qwen_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/tokenization_qwen_vl.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/tokenizer/tokenization_yi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/tokenization_yi.py -------------------------------------------------------------------------------- /agent_training/megatron_patch/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/training.py -------------------------------------------------------------------------------- /agent_training/toolkits/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/.DS_Store -------------------------------------------------------------------------------- /agent_training/toolkits/model_checkpoints_convertor/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/.DS_Store -------------------------------------------------------------------------------- /agent_training/toolkits/model_checkpoints_convertor/idefics2/clip_convertor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/clip_convertor.py -------------------------------------------------------------------------------- /agent_training/toolkits/model_checkpoints_convertor/idefics2/idefics2_hf2mg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/idefics2_hf2mg.py -------------------------------------------------------------------------------- /agent_training/toolkits/model_checkpoints_convertor/idefics2/megatron_2hf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/megatron_2hf.sh -------------------------------------------------------------------------------- /agent_training/toolkits/model_checkpoints_convertor/idefics2/mg2hf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/mg2hf.sh -------------------------------------------------------------------------------- /agent_training/toolkits/model_checkpoints_convertor/idefics2/model_convertor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/model_convertor.sh -------------------------------------------------------------------------------- /agent_training/toolkits/pretrain_data_preprocessing/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/pretrain_data_preprocessing/.DS_Store -------------------------------------------------------------------------------- /agent_training/toolkits/pretrain_data_preprocessing/move_bulk_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/pretrain_data_preprocessing/move_bulk_data.py -------------------------------------------------------------------------------- /method_figure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/method_figure.jpg -------------------------------------------------------------------------------- /reformat_data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/.DS_Store -------------------------------------------------------------------------------- /reformat_data/convert_to_llava_format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format.sh -------------------------------------------------------------------------------- /reformat_data/convert_to_llava_format_with_gold_label_diff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format_with_gold_label_diff.py -------------------------------------------------------------------------------- /reformat_data/convert_to_llava_format_with_pos_cot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format_with_pos_cot.py -------------------------------------------------------------------------------- /reformat_data/convert_to_llava_format_with_pos_diff_equation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format_with_pos_diff_equation.py -------------------------------------------------------------------------------- /reformat_data/diff_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/diff_utils.py -------------------------------------------------------------------------------- /reformat_data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/utils.py -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/requirement.txt -------------------------------------------------------------------------------- /sample_data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/.DS_Store -------------------------------------------------------------------------------- /sample_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sample_data/gather_gpt4_prompt/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/.DS_Store -------------------------------------------------------------------------------- /sample_data/gather_gpt4_prompt/completion_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/completion_utils.py -------------------------------------------------------------------------------- /sample_data/gather_gpt4_prompt/get_prompt_phrase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/get_prompt_phrase.py -------------------------------------------------------------------------------- /sample_data/gather_gpt4_prompt/room_constant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/room_constant.py -------------------------------------------------------------------------------- /sample_data/generate_trajectories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/generate_trajectories.py -------------------------------------------------------------------------------- /sample_data/regenerate_init_position.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/regenerate_init_position.py -------------------------------------------------------------------------------- /sample_data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/utils.py --------------------------------------------------------------------------------