├── .DS_Store
├── .idea
    ├── .gitignore
    ├── DivScene_release.iml
    ├── inspectionProfiles
    │   ├── Project_Default.xml
    │   └── profiles_settings.xml
    ├── jupyter-settings.xml
    ├── misc.xml
    ├── modules.xml
    └── vcs.xml
├── LICENSE
├── README.md
├── agent_inference
    ├── .DS_Store
    ├── hf_idefics2_gpt4o.py
    ├── hf_idefics2_metric.py
    ├── hf_idefics2_online_client.py
    ├── hf_idefics2_server.py
    ├── run_client.sh
    ├── run_server.sh
    └── utils.py
├── agent_training
    ├── .DS_Store
    ├── LICENSE
    ├── Megatron-LM-240424
    │   ├── .DS_Store
    │   ├── CODEOWNERS
    │   ├── CONTRIBUTING.md
    │   ├── Dockerfile.ci
    │   ├── Dockerfile.test
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── README.md
    │   ├── docs
    │   │   ├── llama2.md
    │   │   └── source
    │   │   │   ├── api-guide
    │   │   │       ├── context_parallel.rst
    │   │   │       ├── datasets.rst
    │   │   │       ├── dist_checkpointing.rst
    │   │   │       ├── dist_checkpointing.strategies.rst
    │   │   │       ├── distributed.rst
    │   │   │       ├── fusions.rst
    │   │   │       ├── index.rst
    │   │   │       ├── models.bert.rst
    │   │   │       ├── models.gpt.rst
    │   │   │       ├── models.rst
    │   │   │       ├── models.t5.rst
    │   │   │       ├── moe.rst
    │   │   │       ├── pipeline_parallel.rst
    │   │   │       ├── tensor_parallel.rst
    │   │   │       └── transformer.rst
    │   │   │   ├── distrib_optimizer.md
    │   │   │   ├── images
    │   │   │       ├── context_parallel
    │   │   │       │   ├── CP_overview.png
    │   │   │       │   └── CP_results.png
    │   │   │       └── distrib_optimizer
    │   │   │       │   ├── data_flow.png
    │   │   │       │   └── sharding_scheme.png
    │   │   │   ├── index.rst
    │   │   │   └── user-guide
    │   │   │       └── index.rst
    │   ├── examples
    │   │   ├── bert
    │   │   │   ├── README.md
    │   │   │   └── train_bert_340m_distributed.sh
    │   │   ├── detxoify_lm
    │   │   │   ├── README.md
    │   │   │   ├── annotations
    │   │   │   │   ├── filter-selfgeneration.py
    │   │   │   │   ├── perspective_api_annotate.py
    │   │   │   │   └── preprocess.sh
    │   │   │   ├── finetune_gpt.py
    │   │   │   ├── finetune_gpt_distributed-1.3b.sh
    │   │   │   ├── generate-1.3b.sh
    │   │   │   ├── generate_samples_gpt.py
    │   │   │   ├── perspective_api.py
    │   │   │   └── self_generation
    │   │   │   │   └── selfgenerate-1.3b-unconditional.sh
    │   │   ├── evaluate_retriever_nq.sh
    │   │   ├── evaluate_zeroshot_gpt.sh
    │   │   ├── finetune_mnli_distributed.sh
    │   │   ├── finetune_race_distributed.sh
    │   │   ├── finetune_retriever_distributed.sh
    │   │   ├── gpt3
    │   │   │   ├── README.md
    │   │   │   ├── gpt_config.yaml
    │   │   │   └── train_gpt3_175b_distributed.sh
    │   │   ├── inference
    │   │   │   ├── README.md
    │   │   │   ├── ptq_trtllm_llama_7b.sh
    │   │   │   ├── ptq_trtllm_nemotron3_8b.sh
    │   │   │   ├── text_generation_ptq.py
    │   │   │   └── trtllm_text_generation.py
    │   │   ├── merge_mp_bert.sh
    │   │   ├── msdp
    │   │   │   ├── README.md
    │   │   │   ├── data_processing.sh
    │   │   │   ├── eval_knwl_generation.sh
    │   │   │   ├── eval_resp_generation.sh
    │   │   │   ├── prep_resp_gen.sh
    │   │   │   ├── prompt_knwl_gen.sh
    │   │   │   └── prompt_resp_gen.sh
    │   │   ├── pretrain_bert.sh
    │   │   ├── pretrain_bert_distributed.sh
    │   │   ├── pretrain_bert_distributed_with_mp.sh
    │   │   ├── pretrain_gpt.sh
    │   │   ├── pretrain_gpt3_175B.sh
    │   │   ├── pretrain_gpt_distributed.sh
    │   │   ├── pretrain_gpt_distributed_with_mp.sh
    │   │   ├── pretrain_ict.sh
    │   │   ├── pretrain_t5.sh
    │   │   ├── pretrain_t5_distributed.sh
    │   │   ├── pretrain_t5_distributed_with_mp.sh
    │   │   ├── pretrain_vision_classify.sh
    │   │   ├── pretrain_vision_dino.sh
    │   │   ├── pretrain_vision_inpaint.sh
    │   │   ├── pretrain_vlm.sh
    │   │   ├── retro
    │   │   │   ├── README.md
    │   │   │   ├── preprocess_data.sh
    │   │   │   └── train_retro_2b_distributed.sh
    │   │   ├── run_simple_mcore_train_loop.py
    │   │   ├── run_text_generation_server_345M.sh
    │   │   ├── run_text_generation_server_345M_8_tensor_parallel.sh
    │   │   ├── sc21
    │   │   │   ├── CONFIG.sh
    │   │   │   ├── README.md
    │   │   │   ├── SBATCH.sh
    │   │   │   ├── SRUN.sh
    │   │   │   ├── run_figure_11.sh
    │   │   │   ├── run_figure_12.sh
    │   │   │   ├── run_figure_13.sh
    │   │   │   ├── run_figure_14.sh
    │   │   │   ├── run_figure_15.sh
    │   │   │   ├── run_figure_16.sh
    │   │   │   ├── run_figure_17.sh
    │   │   │   ├── run_figure_18.sh
    │   │   │   └── run_table_1.sh
    │   │   └── t5
    │   │   │   ├── README.md
    │   │   │   ├── t5_mcore_train_curve.png
    │   │   │   └── train_t5_220m_distributed.sh
    │   ├── images
    │   │   ├── Achieved_petaFLOPs.png
    │   │   └── cases_april2021.png
    │   ├── jet-tests.yml
    │   ├── megatron
    │   │   ├── core
    │   │   │   ├── QuickStart.md
    │   │   │   ├── README.md
    │   │   │   ├── README_STRAGGLER.md
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   ├── enums.cpython-310.pyc
    │   │   │   │   ├── enums.cpython-38.pyc
    │   │   │   │   ├── inference_params.cpython-310.pyc
    │   │   │   │   ├── inference_params.cpython-38.pyc
    │   │   │   │   ├── jit.cpython-310.pyc
    │   │   │   │   ├── jit.cpython-38.pyc
    │   │   │   │   ├── model_parallel_config.cpython-310.pyc
    │   │   │   │   ├── model_parallel_config.cpython-38.pyc
    │   │   │   │   ├── packed_seq_params.cpython-310.pyc
    │   │   │   │   ├── packed_seq_params.cpython-38.pyc
    │   │   │   │   ├── parallel_state.cpython-310.pyc
    │   │   │   │   ├── parallel_state.cpython-38.pyc
    │   │   │   │   ├── timers.cpython-310.pyc
    │   │   │   │   ├── timers.cpython-38.pyc
    │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   └── utils.cpython-38.pyc
    │   │   │   ├── datasets
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   ├── megatron_tokenizer.cpython-310.pyc
    │   │   │   │   │   ├── megatron_tokenizer.cpython-38.pyc
    │   │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   │   ├── bert_dataset.py
    │   │   │   │   ├── blended_dataset.py
    │   │   │   │   ├── blended_megatron_dataset_builder.py
    │   │   │   │   ├── blended_megatron_dataset_config.py
    │   │   │   │   ├── gpt_dataset.py
    │   │   │   │   ├── helpers.cpp
    │   │   │   │   ├── helpers.cpython-310-x86_64-linux-gnu.so
    │   │   │   │   ├── indexed_dataset.py
    │   │   │   │   ├── masked_dataset.py
    │   │   │   │   ├── megatron_dataset.py
    │   │   │   │   ├── megatron_tokenizer.py
    │   │   │   │   ├── multimodal_dataset.py
    │   │   │   │   ├── readme.md
    │   │   │   │   ├── retro
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── config
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bert_embedders.py
    │   │   │   │   │   │   ├── config.py
    │   │   │   │   │   │   ├── gpt_chunk_datasets.py
    │   │   │   │   │   │   └── tokenizers.py
    │   │   │   │   │   ├── db
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   ├── dataset.py
    │   │   │   │   │   │   └── utils.py
    │   │   │   │   │   ├── external_libs.py
    │   │   │   │   │   ├── index
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   ├── factory.py
    │   │   │   │   │   │   ├── index.py
    │   │   │   │   │   │   ├── indexes
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── faiss_base.py
    │   │   │   │   │   │   │   └── faiss_par_add.py
    │   │   │   │   │   │   ├── utils.py
    │   │   │   │   │   │   └── validate.py
    │   │   │   │   │   ├── query
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── gpt_chunk_dataset.py
    │   │   │   │   │   │   ├── multi_split_gpt_dataset.py
    │   │   │   │   │   │   ├── query.py
    │   │   │   │   │   │   ├── retro_dataset.py
    │   │   │   │   │   │   └── utils.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── t5_dataset.py
    │   │   │   │   └── utils.py
    │   │   │   ├── dist_checkpointing
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   ├── core.cpython-310.pyc
    │   │   │   │   │   ├── core.cpython-38.pyc
    │   │   │   │   │   ├── dict_utils.cpython-310.pyc
    │   │   │   │   │   ├── dict_utils.cpython-38.pyc
    │   │   │   │   │   ├── mapping.cpython-310.pyc
    │   │   │   │   │   ├── mapping.cpython-38.pyc
    │   │   │   │   │   ├── optimizer.cpython-310.pyc
    │   │   │   │   │   ├── serialization.cpython-310.pyc
    │   │   │   │   │   ├── serialization.cpython-38.pyc
    │   │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   │   └── utils.cpython-38.pyc
    │   │   │   │   ├── core.py
    │   │   │   │   ├── dict_utils.py
    │   │   │   │   ├── mapping.py
    │   │   │   │   ├── optimizer.py
    │   │   │   │   ├── serialization.py
    │   │   │   │   ├── strategies
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   │   ├── base.cpython-310.pyc
    │   │   │   │   │   │   └── base.cpython-38.pyc
    │   │   │   │   │   ├── base.py
    │   │   │   │   │   ├── filesystem_async.py
    │   │   │   │   │   ├── state_dict_saver.py
    │   │   │   │   │   ├── tensorstore.py
    │   │   │   │   │   ├── torch.py
    │   │   │   │   │   ├── two_stage.py
    │   │   │   │   │   └── zarr.py
    │   │   │   │   └── utils.py
    │   │   │   ├── distributed
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   ├── distributed_data_parallel.cpython-310.pyc
    │   │   │   │   │   ├── distributed_data_parallel.cpython-38.pyc
    │   │   │   │   │   ├── distributed_data_parallel_config.cpython-310.pyc
    │   │   │   │   │   ├── distributed_data_parallel_config.cpython-38.pyc
    │   │   │   │   │   ├── finalize_model_grads.cpython-310.pyc
    │   │   │   │   │   ├── finalize_model_grads.cpython-38.pyc
    │   │   │   │   │   ├── param_and_grad_buffer.cpython-310.pyc
    │   │   │   │   │   └── param_and_grad_buffer.cpython-38.pyc
    │   │   │   │   ├── distributed_data_parallel.py
    │   │   │   │   ├── distributed_data_parallel_config.py
    │   │   │   │   ├── finalize_model_grads.py
    │   │   │   │   └── param_and_grad_buffer.py
    │   │   │   ├── enums.py
    │   │   │   ├── fusions
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   ├── fused_bias_dropout.cpython-310.pyc
    │   │   │   │   │   ├── fused_bias_geglu.cpython-310.pyc
    │   │   │   │   │   ├── fused_bias_gelu.cpython-310.pyc
    │   │   │   │   │   ├── fused_bias_swiglu.cpython-310.pyc
    │   │   │   │   │   ├── fused_layer_norm.cpython-310.pyc
    │   │   │   │   │   ├── fused_layer_norm.cpython-38.pyc
    │   │   │   │   │   └── fused_softmax.cpython-310.pyc
    │   │   │   │   ├── fused_bias_dropout.py
    │   │   │   │   ├── fused_bias_geglu.py
    │   │   │   │   ├── fused_bias_gelu.py
    │   │   │   │   ├── fused_bias_swiglu.py
    │   │   │   │   ├── fused_layer_norm.py
    │   │   │   │   └── fused_softmax.py
    │   │   │   ├── inference
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── gpt
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── model_specs.py
    │   │   │   │   │   └── state_dict_hooks.py
    │   │   │   ├── inference_params.py
    │   │   │   ├── jit.py
    │   │   │   ├── model_parallel_config.py
    │   │   │   ├── models
    │   │   │   │   ├── T5
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── t5_model.py
    │   │   │   │   │   └── t5_spec.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   └── __init__.cpython-38.pyc
    │   │   │   │   ├── bert
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── bert_layer_specs.py
    │   │   │   │   │   ├── bert_lm_head.py
    │   │   │   │   │   ├── bert_model.py
    │   │   │   │   │   └── pooler.py
    │   │   │   │   ├── common
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   └── __init__.cpython-38.pyc
    │   │   │   │   │   ├── embeddings
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   │   │   ├── language_model_embedding.cpython-310.pyc
    │   │   │   │   │   │   │   ├── language_model_embedding.cpython-38.pyc
    │   │   │   │   │   │   │   ├── rotary_pos_embedding.cpython-310.pyc
    │   │   │   │   │   │   │   └── rotary_pos_embedding.cpython-38.pyc
    │   │   │   │   │   │   ├── language_model_embedding.py
    │   │   │   │   │   │   └── rotary_pos_embedding.py
    │   │   │   │   │   ├── language_module
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   │   │   ├── language_module.cpython-310.pyc
    │   │   │   │   │   │   │   └── language_module.cpython-38.pyc
    │   │   │   │   │   │   └── language_module.py
    │   │   │   │   │   └── vision_module
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │       ├── __init__.cpython-310.pyc
    │   │   │   │   │   │       └── vision_module.cpython-310.pyc
    │   │   │   │   │   │   └── vision_module.py
    │   │   │   │   ├── gpt
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   │   ├── gpt_layer_specs.cpython-310.pyc
    │   │   │   │   │   │   ├── gpt_model.cpython-310.pyc
    │   │   │   │   │   │   └── gpt_model.cpython-38.pyc
    │   │   │   │   │   ├── gpt_layer_specs.py
    │   │   │   │   │   └── gpt_model.py
    │   │   │   │   ├── multimodal
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── llava_model.py
    │   │   │   │   ├── retro
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   │   ├── base_attention.cpython-310.pyc
    │   │   │   │   │   │   ├── config.cpython-310.pyc
    │   │   │   │   │   │   ├── config.cpython-38.pyc
    │   │   │   │   │   │   ├── decoder_attention.cpython-310.pyc
    │   │   │   │   │   │   ├── decoder_spec.cpython-310.pyc
    │   │   │   │   │   │   ├── decoder_spec.cpython-38.pyc
    │   │   │   │   │   │   ├── encoder_attention.cpython-310.pyc
    │   │   │   │   │   │   ├── encoder_spec.cpython-310.pyc
    │   │   │   │   │   │   ├── model.cpython-310.pyc
    │   │   │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   │   │   ├── base_attention.py
    │   │   │   │   │   ├── config.py
    │   │   │   │   │   ├── decoder_attention.py
    │   │   │   │   │   ├── decoder_spec.py
    │   │   │   │   │   ├── encoder_attention.py
    │   │   │   │   │   ├── encoder_spec.py
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   └── vision
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │       ├── __init__.cpython-310.pyc
    │   │   │   │   │       ├── clip_vit_model.cpython-310.pyc
    │   │   │   │   │       ├── multimodal_projector.cpython-310.pyc
    │   │   │   │   │       └── vit_layer_specs.cpython-310.pyc
    │   │   │   │   │   ├── clip_vit_model.py
    │   │   │   │   │   ├── multimodal_projector.py
    │   │   │   │   │   └── vit_layer_specs.py
    │   │   │   ├── optimizer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── clip_grads.cpython-310.pyc
    │   │   │   │   │   ├── distrib_optimizer.cpython-310.pyc
    │   │   │   │   │   ├── grad_scaler.cpython-310.pyc
    │   │   │   │   │   ├── optimizer.cpython-310.pyc
    │   │   │   │   │   └── optimizer_config.cpython-310.pyc
    │   │   │   │   ├── clip_grads.py
    │   │   │   │   ├── distrib_optimizer.py
    │   │   │   │   ├── grad_scaler.py
    │   │   │   │   ├── optimizer.py
    │   │   │   │   └── optimizer_config.py
    │   │   │   ├── package_info.py
    │   │   │   ├── packed_seq_params.py
    │   │   │   ├── parallel_state.py
    │   │   │   ├── pipeline_parallel
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── p2p_communication.cpython-310.pyc
    │   │   │   │   │   └── schedules.cpython-310.pyc
    │   │   │   │   ├── p2p_communication.py
    │   │   │   │   └── schedules.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── tensor_parallel
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   ├── cross_entropy.cpython-310.pyc
    │   │   │   │   │   ├── cross_entropy.cpython-38.pyc
    │   │   │   │   │   ├── data.cpython-310.pyc
    │   │   │   │   │   ├── data.cpython-38.pyc
    │   │   │   │   │   ├── layers.cpython-310.pyc
    │   │   │   │   │   ├── layers.cpython-38.pyc
    │   │   │   │   │   ├── mappings.cpython-310.pyc
    │   │   │   │   │   ├── mappings.cpython-38.pyc
    │   │   │   │   │   ├── random.cpython-310.pyc
    │   │   │   │   │   ├── random.cpython-38.pyc
    │   │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   │   └── utils.cpython-38.pyc
    │   │   │   │   ├── cross_entropy.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── layers.py
    │   │   │   │   ├── mappings.py
    │   │   │   │   ├── random.py
    │   │   │   │   └── utils.py
    │   │   │   ├── timers.py
    │   │   │   ├── transformer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   ├── attention.cpython-310.pyc
    │   │   │   │   │   ├── dot_product_attention.cpython-310.pyc
    │   │   │   │   │   ├── enums.cpython-310.pyc
    │   │   │   │   │   ├── enums.cpython-38.pyc
    │   │   │   │   │   ├── identity_op.cpython-310.pyc
    │   │   │   │   │   ├── identity_op.cpython-38.pyc
    │   │   │   │   │   ├── mlp.cpython-310.pyc
    │   │   │   │   │   ├── module.cpython-310.pyc
    │   │   │   │   │   ├── module.cpython-38.pyc
    │   │   │   │   │   ├── spec_utils.cpython-310.pyc
    │   │   │   │   │   ├── spec_utils.cpython-38.pyc
    │   │   │   │   │   ├── transformer_block.cpython-310.pyc
    │   │   │   │   │   ├── transformer_block.cpython-38.pyc
    │   │   │   │   │   ├── transformer_config.cpython-310.pyc
    │   │   │   │   │   ├── transformer_config.cpython-38.pyc
    │   │   │   │   │   ├── transformer_layer.cpython-310.pyc
    │   │   │   │   │   ├── transformer_layer.cpython-38.pyc
    │   │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   │   └── utils.cpython-38.pyc
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── custom_layers
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   │   │   ├── transformer_engine.cpython-310.pyc
    │   │   │   │   │   │   └── transformer_engine.cpython-38.pyc
    │   │   │   │   │   └── transformer_engine.py
    │   │   │   │   ├── dot_product_attention.py
    │   │   │   │   ├── enums.py
    │   │   │   │   ├── identity_op.py
    │   │   │   │   ├── mlp.py
    │   │   │   │   ├── module.py
    │   │   │   │   ├── moe
    │   │   │   │   │   ├── README.md
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   │   ├── experts.cpython-310.pyc
    │   │   │   │   │   │   ├── grouped_gemm_util.cpython-310.pyc
    │   │   │   │   │   │   ├── moe_layer.cpython-310.pyc
    │   │   │   │   │   │   ├── moe_utils.cpython-310.pyc
    │   │   │   │   │   │   ├── router.cpython-310.pyc
    │   │   │   │   │   │   └── token_dispatcher.cpython-310.pyc
    │   │   │   │   │   ├── experts.py
    │   │   │   │   │   ├── grouped_gemm_util.py
    │   │   │   │   │   ├── moe_layer.py
    │   │   │   │   │   ├── moe_utils.py
    │   │   │   │   │   ├── router.py
    │   │   │   │   │   └── token_dispatcher.py
    │   │   │   │   ├── spec_utils.py
    │   │   │   │   ├── transformer_block.py
    │   │   │   │   ├── transformer_config.py
    │   │   │   │   ├── transformer_layer.py
    │   │   │   │   └── utils.py
    │   │   │   └── utils.py
    │   │   ├── inference
    │   │   │   ├── __init__.py
    │   │   │   ├── arguments.py
    │   │   │   ├── gpt
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── model_provider.py
    │   │   │   ├── static
    │   │   │   │   └── index.html
    │   │   │   ├── text_generation
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── api.py
    │   │   │   │   ├── beam_utils.py
    │   │   │   │   ├── communication.py
    │   │   │   │   ├── forward_step.py
    │   │   │   │   ├── generation.py
    │   │   │   │   ├── sampling.py
    │   │   │   │   └── tokenization.py
    │   │   │   └── text_generation_server.py
    │   │   ├── legacy
    │   │   │   ├── data
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── autoaugment.cpython-310.pyc
    │   │   │   │   │   ├── data_samplers.cpython-310.pyc
    │   │   │   │   │   ├── image_folder.cpython-310.pyc
    │   │   │   │   │   └── vit_dataset.cpython-310.pyc
    │   │   │   │   ├── autoaugment.py
    │   │   │   │   ├── biencoder_dataset_utils.py
    │   │   │   │   ├── data_samplers.py
    │   │   │   │   ├── dataset_utils.py
    │   │   │   │   ├── ict_dataset.py
    │   │   │   │   ├── image_folder.py
    │   │   │   │   ├── multimodal_dataset.py
    │   │   │   │   ├── orqa_wiki_dataset.py
    │   │   │   │   ├── realm_dataset_utils.py
    │   │   │   │   ├── realm_index.py
    │   │   │   │   └── vit_dataset.py
    │   │   │   ├── fp16_deprecated
    │   │   │   │   └── loss_scaler.py
    │   │   │   ├── fused_kernels
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   └── __init__.cpython-38.pyc
    │   │   │   │   ├── compat.h
    │   │   │   │   ├── tests
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── test_fused_kernels.py
    │   │   │   │   └── type_shim.h
    │   │   │   ├── indexer.py
    │   │   │   ├── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   │   ├── bert_model.cpython-310.pyc
    │   │   │   │   │   ├── enums.cpython-310.pyc
    │   │   │   │   │   ├── fused_bias_gelu.cpython-310.pyc
    │   │   │   │   │   ├── fused_layer_norm.cpython-310.pyc
    │   │   │   │   │   ├── fused_softmax.cpython-310.pyc
    │   │   │   │   │   ├── gpt_model.cpython-310.pyc
    │   │   │   │   │   ├── language_model.cpython-310.pyc
    │   │   │   │   │   ├── module.cpython-310.pyc
    │   │   │   │   │   ├── rms_norm.cpython-310.pyc
    │   │   │   │   │   ├── t5_model.cpython-310.pyc
    │   │   │   │   │   ├── transformer.cpython-310.pyc
    │   │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   │   ├── bert_model.py
    │   │   │   │   ├── biencoder_model.py
    │   │   │   │   ├── classification.py
    │   │   │   │   ├── enums.py
    │   │   │   │   ├── fused_bias_gelu.py
    │   │   │   │   ├── fused_layer_norm.py
    │   │   │   │   ├── fused_softmax.py
    │   │   │   │   ├── gpt_model.py
    │   │   │   │   ├── language_model.py
    │   │   │   │   ├── module.py
    │   │   │   │   ├── multiple_choice.py
    │   │   │   │   ├── realm_model.py
    │   │   │   │   ├── rms_norm.py
    │   │   │   │   ├── t5_model.py
    │   │   │   │   ├── transformer.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── vision
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │       └── knn_monitor.cpython-310.pyc
    │   │   │   │   │   ├── classification.py
    │   │   │   │   │   ├── dino.py
    │   │   │   │   │   ├── esvit_swin_backbone.py
    │   │   │   │   │   ├── inpainting.py
    │   │   │   │   │   ├── knn_monitor.py
    │   │   │   │   │   ├── mit_backbone.py
    │   │   │   │   │   ├── swin_backbone.py
    │   │   │   │   │   ├── utils.py
    │   │   │   │   │   └── vit_backbone.py
    │   │   │   └── mpu
    │   │   │   │   └── tests
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── commons.py
    │   │   │   │       ├── test_cross_entropy.py
    │   │   │   │       ├── test_data.py
    │   │   │   │       ├── test_initialize.py
    │   │   │   │       ├── test_layers.py
    │   │   │   │       └── test_random.py
    │   │   └── training
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-310.pyc
    │   │   │       ├── __init__.cpython-38.pyc
    │   │   │       ├── arguments.cpython-310.pyc
    │   │   │       ├── arguments.cpython-38.pyc
    │   │   │       ├── checkpointing.cpython-310.pyc
    │   │   │       ├── dist_signal_handler.cpython-310.pyc
    │   │   │       ├── dist_signal_handler.cpython-38.pyc
    │   │   │       ├── global_vars.cpython-310.pyc
    │   │   │       ├── global_vars.cpython-38.pyc
    │   │   │       ├── initialize.cpython-310.pyc
    │   │   │       ├── initialize.cpython-38.pyc
    │   │   │       ├── log_handler.cpython-310.pyc
    │   │   │       ├── microbatches.cpython-310.pyc
    │   │   │       ├── microbatches.cpython-38.pyc
    │   │   │       ├── optimizer_param_scheduler.cpython-310.pyc
    │   │   │       ├── theoretical_memory_usage.cpython-310.pyc
    │   │   │       ├── training.cpython-310.pyc
    │   │   │       ├── utils.cpython-310.pyc
    │   │   │       └── yaml_arguments.cpython-310.pyc
    │   │   │   ├── arguments.py
    │   │   │   ├── checkpointing.py
    │   │   │   ├── dist_signal_handler.py
    │   │   │   ├── global_vars.py
    │   │   │   ├── initialize.py
    │   │   │   ├── log_handler.py
    │   │   │   ├── microbatches.py
    │   │   │   ├── optimizer_param_scheduler.py
    │   │   │   ├── theoretical_memory_usage.py
    │   │   │   ├── tokenizer
    │   │   │       ├── __init__.py
    │   │   │       ├── __pycache__
    │   │   │       │   ├── __init__.cpython-310.pyc
    │   │   │       │   ├── __init__.cpython-38.pyc
    │   │   │       │   ├── bert_tokenization.cpython-310.pyc
    │   │   │       │   ├── bert_tokenization.cpython-38.pyc
    │   │   │       │   ├── gpt2_tokenization.cpython-310.pyc
    │   │   │       │   ├── gpt2_tokenization.cpython-38.pyc
    │   │   │       │   ├── tokenizer.cpython-310.pyc
    │   │   │       │   └── tokenizer.cpython-38.pyc
    │   │   │       ├── bert_tokenization.py
    │   │   │       ├── gpt2_tokenization.py
    │   │   │       └── tokenizer.py
    │   │   │   ├── training.py
    │   │   │   ├── utils.py
    │   │   │   └── yaml_arguments.py
    │   ├── pretrain_bert.py
    │   ├── pretrain_gpt.py
    │   ├── pretrain_ict.py
    │   ├── pretrain_retro.py
    │   ├── pretrain_t5.py
    │   ├── pretrain_vision_classify.py
    │   ├── pretrain_vision_dino.py
    │   ├── pretrain_vision_inpaint.py
    │   ├── pretrain_vlm.py
    │   ├── pyproject.toml
    │   ├── report_theoretical_memory.py
    │   ├── setup.py
    │   ├── tasks
    │   │   ├── data_utils.py
    │   │   ├── ensemble_classifier.py
    │   │   ├── eval_utils.py
    │   │   ├── finetune_utils.py
    │   │   ├── glue
    │   │   │   ├── data.py
    │   │   │   ├── finetune.py
    │   │   │   ├── mnli.py
    │   │   │   └── qqp.py
    │   │   ├── main.py
    │   │   ├── msdp
    │   │   │   ├── README.md
    │   │   │   ├── evaluate.py
    │   │   │   ├── main.py
    │   │   │   ├── metrics.py
    │   │   │   ├── preprocessing.py
    │   │   │   └── prompt.py
    │   │   ├── orqa
    │   │   │   ├── README.md
    │   │   │   ├── evaluate_orqa.py
    │   │   │   ├── evaluate_utils.py
    │   │   │   ├── supervised
    │   │   │   │   ├── data.py
    │   │   │   │   ├── eval_utils.py
    │   │   │   │   └── finetune.py
    │   │   │   └── unsupervised
    │   │   │   │   ├── nq.py
    │   │   │   │   ├── qa_utils.py
    │   │   │   │   └── tokenizers.py
    │   │   ├── race
    │   │   │   ├── data.py
    │   │   │   └── finetune.py
    │   │   ├── vision
    │   │   │   ├── classification
    │   │   │   │   ├── classification.py
    │   │   │   │   └── eval_utils.py
    │   │   │   ├── finetune_utils.py
    │   │   │   ├── main.py
    │   │   │   └── segmentation
    │   │   │   │   ├── cityscapes.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── finetune_segformer.py
    │   │   │   │   ├── finetune_setr.py
    │   │   │   │   ├── metrics.py
    │   │   │   │   ├── seg_heads.py
    │   │   │   │   ├── seg_models.py
    │   │   │   │   ├── transforms.py
    │   │   │   │   └── utils.py
    │   │   └── zeroshot_gpt
    │   │   │   ├── datasets.py
    │   │   │   ├── detokenizer.py
    │   │   │   └── evaluate.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── functional_tests
    │   │   │   ├── __init__.py
    │   │   │   ├── jet_recipes
    │   │   │   │   ├── MR-bert.yaml
    │   │   │   │   ├── MR-gpt.yaml
    │   │   │   │   ├── MR-multimodal.yaml
    │   │   │   │   ├── MR-t5.yaml
    │   │   │   │   ├── build-pyt.yaml
    │   │   │   │   ├── local-generator.py
    │   │   │   │   ├── monthly-t5.yaml
    │   │   │   │   ├── nightly-bert.yaml
    │   │   │   │   ├── nightly-gpt.yaml
    │   │   │   │   └── weekly-gpt.yaml
    │   │   │   ├── python_test_utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── check_slurm_job_completion.py
    │   │   │   │   ├── common.py
    │   │   │   │   ├── get_test_results_from_tensorboard_logs.py
    │   │   │   │   ├── jet_test_pipeline.py
    │   │   │   │   ├── multitest_ci_pipeline.py
    │   │   │   │   ├── test_ci_pipeline.py
    │   │   │   │   ├── test_fp8_ci_pipeline.py
    │   │   │   │   └── test_resume_checkpoint_pipeline.py
    │   │   │   ├── shell_test_utils
    │   │   │   │   ├── jobwait.sh
    │   │   │   │   ├── run_selene_test_launcher_script.sh
    │   │   │   │   └── run_selene_test_resume_checkpoint_launcher_script.sh
    │   │   │   ├── test_results
    │   │   │   │   ├── bert
    │   │   │   │   │   ├── bert_tp1_pp2_1nodes_50steps.json
    │   │   │   │   │   ├── bert_tp1_pp2_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── bert_tp1_pp2_1nodes_50steps_core_enabled_rope_embeddings.json
    │   │   │   │   │   ├── bert_tp1_pp2_1nodes_50steps_core_enabled_sequence_parallel.json
    │   │   │   │   │   ├── bert_tp1_pp4_1nodes_50steps.json
    │   │   │   │   │   ├── bert_tp1_pp4_interleaved_1nodes_50steps.json
    │   │   │   │   │   ├── bert_tp1_pp4_interleaved_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── bert_tp2_pp2_1nodes_50steps.json
    │   │   │   │   │   ├── bert_tp2_pp2_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── bert_tp2_pp2_1nodes_50steps_core_enabled_local_spec.json
    │   │   │   │   │   ├── bert_tp4_pp1_1nodes_50steps.json
    │   │   │   │   │   └── bert_tp4_pp1_1nodes_50steps_core_enabled.json
    │   │   │   │   ├── gpt3
    │   │   │   │   │   ├── gpt3_tp1_pp1_1nodes_50steps_dist_optimizer.json
    │   │   │   │   │   ├── gpt3_tp1_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp1_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce_param_gather.json
    │   │   │   │   │   ├── gpt3_tp1_pp1_1nodes_50steps_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp1_pp2_1nodes_50steps.json
    │   │   │   │   │   ├── gpt3_tp1_pp2_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── gpt3_tp1_pp2_1nodes_50steps_core_enabled_rope_embeddings.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_disable_bias_linear.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_sequence_parallel.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_swiglu.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps_core_enabled_untie_embeddings_and_outputs.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_1nodes_50steps_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_interleaved_1nodes_50steps.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_dist_optimizer_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_dist_optimizer_overlap_grad_reduce_param_gather.json
    │   │   │   │   │   ├── gpt3_tp1_pp4_interleaved_1nodes_50steps_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_context_parallelism_cp2.json
    │   │   │   │   │   ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_te_8experts2parallel.json
    │   │   │   │   │   ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_te_8experts2parallel_groupedGEMM.json
    │   │   │   │   │   ├── gpt3_tp2_pp1_1nodes_50steps_core_enabled_te_8experts2parallel_top2router.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_4experts.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled_context_parallelism_cp2.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled_te_2experts.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_core_enabled_te_4experts2parallel.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp2_pp2_1nodes_50steps_te_enabled.json
    │   │   │   │   │   ├── gpt3_tp4_pp1_1nodes_50steps.json
    │   │   │   │   │   ├── gpt3_tp4_pp1_1nodes_50steps_core_enabled.json
    │   │   │   │   │   ├── gpt3_tp4_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce.json
    │   │   │   │   │   ├── gpt3_tp4_pp1_1nodes_50steps_dist_optimizer_overlap_grad_reduce_param_gather.json
    │   │   │   │   │   └── gpt3_tp4_pp1_1nodes_50steps_overlap_grad_reduce.json
    │   │   │   │   ├── jet
    │   │   │   │   │   ├── bert-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2-local-spec.json
    │   │   │   │   │   ├── bert-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2.json
    │   │   │   │   │   ├── bert-345m-merge-request-dgx-a100-1n8g-tp1-pp4-vp2.json
    │   │   │   │   │   ├── bert-345m-merge-request-dgx-a100-1n8g-tp2-pp2.json
    │   │   │   │   │   ├── bert-345m-merge-request-resume-dgx-a100-1n8g-tp1-pp2.json
    │   │   │   │   │   ├── bert-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp2.json
    │   │   │   │   │   ├── bert-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp4-vp2.json
    │   │   │   │   │   ├── bert-345m-nightly-dgx-a100-1n8g-mcore-tp4-pp1.json
    │   │   │   │   │   ├── bert-345m-nightly-dgx-a100-1n8g-tp1-pp2.json
    │   │   │   │   │   ├── bert-345m-nightly-dgx-a100-1n8g-tp4-pp1.json
    │   │   │   │   │   ├── dgx_h100
    │   │   │   │   │   │   ├── bert_345m_mcore-pyt_merge-request-resume_bf16_nodes-1_gpus-8_bs-128_steps-100_tp-1_pp-2_mcore-false_te-false.json
    │   │   │   │   │   │   ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-1_pp-4_mcore-false_te-false_vp-2.json
    │   │   │   │   │   │   ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-2_pp-2_args-local-spec_mcore-true_te-false.json
    │   │   │   │   │   │   ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-2_pp-2_mcore-false_te-false.json
    │   │   │   │   │   │   ├── bert_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-128_steps-50_tp-2_pp-2_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request-resume_bf16_nodes-1_gpus-8_bs-32_steps-100_tp-1_pp-2_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args--recompute-granularity-full-recompute-method-uniform-recompute-num-layers-1-_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args-dist-optimizer-no-mmap-bin-files_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args-dist-optimizer_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args-uniform-full-recompute_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_args--position-embedding-type-rope-_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_args-rope-embeddings-interleaved-no-fusion_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_args-rope-embeddings_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--disable-bias-linear_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--sequence-parallel_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--swiglu_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--untie-embeddings-and-output-weights_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-disable-bias-linear_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-dist-optimizer-overlap-grad-reduce-param-gather_mcore-true_te-false_vp-1.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-dist-optimizer-overlap-grad-reduce-untied_mcore-true_te-false_vp-1.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-dist-optimizer-overlap-grad-reduce_mcore-true_te-false_vp-1.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-sequence-parallel_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-swiglu_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args-untie-embeddings-and-outputs_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-false_te-false_vp-1.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-true_te-false_vp-1.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args--sequence-parallel-num-experts-8-expert-model-parallel-size-2-_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel-dist-optimizer_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel-groupedgemm_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel-top2router_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-1_args-te-8experts2parallel_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args-no-mmap-bin-files_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_mcore-false_te-true.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_args-dist-optimizer-overlap-grad-reduce-param-gather_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_args-dist-optimizer-overlap-grad-reduce_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args--overlap-grad-reduce_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-1_args--use-distributed-optimizer-overlap-grad-reduce-_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-2_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--overlap-grad-reduce_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--overlap-grad-reduce_mcore-false_te-false_vp-1.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--num-experts-2-_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--num-experts-4-_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--overlap-grad-reduce_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-2_pp-2_args--sequence-parallel-num-experts-4-expert-model-parallel-size-2-_mcore-true_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_args--overlap-grad-reduce_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_mcore-false_te-false.json
    │   │   │   │   │   │   ├── gpt3_345m_mcore-pyt_nightly_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-4_pp-1_mcore-true_te-false.json
    │   │   │   │   │   │   └── t5_220m_mcore-pyt_merge-request_bf16_nodes-1_gpus-8_bs-32_steps-100_tp-1_pp-1_mcore-true_te-true_vp-1.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp1-dist-optimizer-no-mmap-bin-files.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp1-dist-optimizer.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp1-uniform-full-recompute.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp2-rope-embeddings-interleaved-no-fusion.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp2-rope-embeddings.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-disable-bias-linear.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-sequence-parallel.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-swiglu.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-untie-embeddings-and-outputs.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-decoupled-lr.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-dist-optimizer-overlap-grad-reduce-param-gather.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-dist-optimizer-overlap-grad-reduce-untied.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1-dist-optimizer-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp1-pp4-vp1.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-dist-optimizer.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-groupedgemm.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-overlap-grad-reduce-param-gather-groupedgemm.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel-top2router.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp1-te-8experts2parallel.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2-no-create-attention-mask-in-dataloader.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2-no-mmap-bin-files.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp2-pp2.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp4-pp1-dist-optimizer-overlap-grad-reduce-param-gather.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp4-pp1-dist-optimizer-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-mcore-tp4-pp1.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-te-tp2-pp2.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-tp1-pp4-vp1.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-dgx-a100-1n8g-tp2-pp2.json
    │   │   │   │   │   ├── gpt3-345m-merge-request-resume-dgx-a100-1n8g-tp1-pp2.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp1-dist-optimizer-overlap-grad-reduce-param-gather.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp2.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp1-pp4.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp2-pp2-te-2experts.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp2-pp2-te-4experts2parallel.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-mcore-tp4-pp1.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp1-dist-optimizer-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp1-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp2.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp4-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp4-vp1-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp1-pp4.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp2-pp2-4experts.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp2-pp2-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp4-pp1-overlap-grad-reduce.json
    │   │   │   │   │   ├── gpt3-345m-nightly-dgx-a100-1n8g-tp4-pp1.json
    │   │   │   │   │   ├── gpt3-345m-weekly-dgx-h100-1n8g-mcore-tp1-pp1-bf16-baseline.json
    │   │   │   │   │   ├── gpt3_345m_mcore-pyt_func-train_bf16_nodes-1_gpus-8_bs-32_steps-50_tp-1_pp-4_args--sequence-parallel.json
    │   │   │   │   │   ├── multimodal-llava-merge-request-dgx-a100-1n8g-mcore-te-tp1-pp1.json
    │   │   │   │   │   └── t5-220m-merge-request-dgx-a100-1n8g-mcore-te-tp1-pp1-vp1.json
    │   │   │   │   ├── retro
    │   │   │   │   │   └── retro_tp1_pp1_1nodes_50steps_core_enabled.json
    │   │   │   │   └── t5
    │   │   │   │   │   └── t5_tp1_pp1_interleaved_1nodes_100steps_te_enabled_core_enabled.json
    │   │   │   └── test_scripts
    │   │   │   │   ├── bert
    │   │   │   │       ├── pretrain_bert_distributed_test.sh
    │   │   │   │       ├── sbatch_bert_distributed_resume_checkpoint_test.sh
    │   │   │   │       └── sbatch_bert_distributed_test.sh
    │   │   │   │   ├── gpt3
    │   │   │   │       ├── pretrain_gpt3_distributed_test.sh
    │   │   │   │       ├── sbatch_gpt3_distributed_resume_checkpoint_test.sh
    │   │   │   │       └── sbatch_gpt3_distributed_test.sh
    │   │   │   │   ├── multimodal
    │   │   │   │       └── pretrain_llava_distributed_test.sh
    │   │   │   │   ├── retro
    │   │   │   │       ├── pretrain_retro_distributed_test.sh
    │   │   │   │       ├── sbatch_retro_distributed_resume_checkpoint_test.sh
    │   │   │   │       └── sbatch_retro_distributed_test.sh
    │   │   │   │   └── t5
    │   │   │   │       ├── pretrain_t5_distributed_test.sh
    │   │   │   │       ├── sbatch_t5_distributed_resume_checkpoint_test.sh
    │   │   │   │       └── sbatch_t5_distributed_test.sh
    │   │   └── unit_tests
    │   │   │   ├── __init__.py
    │   │   │   ├── data
    │   │   │       ├── __init__.py
    │   │   │       ├── test_builder.py
    │   │   │       ├── test_mock_gpt_dataset.py
    │   │   │       ├── test_multimodal_dataset.py
    │   │   │       ├── test_preprocess_data.py
    │   │   │       └── test_preprocess_mmdata.py
    │   │   │   ├── dist_checkpointing
    │   │   │       ├── __init__.py
    │   │   │       ├── conftest.py
    │   │   │       ├── models
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── common.py
    │   │   │       │   ├── test_bert_model.py
    │   │   │       │   ├── test_gpt_model.py
    │   │   │       │   ├── test_mlp_glu.py
    │   │   │       │   ├── test_retro_model.py
    │   │   │       │   ├── test_sequential_mlp.py
    │   │   │       │   └── test_t5_model.py
    │   │   │       ├── test_mapping.py
    │   │   │       ├── test_optimizer.py
    │   │   │       └── test_serialization.py
    │   │   │   ├── fusions
    │   │   │       └── test_torch_softmax.py
    │   │   │   ├── models
    │   │   │       ├── __init__.py
    │   │   │       ├── test_base_embedding.py
    │   │   │       ├── test_bert_model.py
    │   │   │       ├── test_clip_vit_model.py
    │   │   │       ├── test_gpt_model.py
    │   │   │       ├── test_llava_model.py
    │   │   │       ├── test_multimodal_projector.py
    │   │   │       └── test_t5_model.py
    │   │   │   ├── pipeline_parallel
    │   │   │       ├── __init__.py
    │   │   │       └── test_schedules.py
    │   │   │   ├── tensor_parallel
    │   │   │       ├── test_cross_entropy.py
    │   │   │       ├── test_data.py
    │   │   │       ├── test_initialization.py
    │   │   │       ├── test_mappings.py
    │   │   │       ├── test_random.py
    │   │   │       └── test_tensor_parallel_utils.py
    │   │   │   ├── test_basic.py
    │   │   │   ├── test_imports.py
    │   │   │   ├── test_parallel_state.py
    │   │   │   ├── test_training.py
    │   │   │   ├── test_utilities.py
    │   │   │   ├── test_utils.py
    │   │   │   └── transformer
    │   │   │       ├── __init__.py
    │   │   │       ├── moe
    │   │   │           ├── __init__.py
    │   │   │           ├── test_grouped_mlp.py
    │   │   │           ├── test_routers.py
    │   │   │           ├── test_sequential_mlp.py
    │   │   │           └── test_token_dispatcher.py
    │   │   │       ├── test_attention.py
    │   │   │       ├── test_attention_packed_seq.py
    │   │   │       ├── test_core_attention.py
    │   │   │       ├── test_mlp.py
    │   │   │       ├── test_module.py
    │   │   │       ├── test_retro_attention.py
    │   │   │       ├── test_spec_customization.py
    │   │   │       ├── test_transformer_block.py
    │   │   │       └── test_transformer_layer.py
    │   └── tools
    │   │   ├── autoformat.sh
    │   │   ├── bert_embedding
    │   │       ├── __init__.py
    │   │       ├── dataset.py
    │   │       ├── embed.py
    │   │       ├── external_libs.py
    │   │       └── huggingface.py
    │   │   ├── checkpoint
    │   │       ├── convert.py
    │   │       ├── loader_llama2.py
    │   │       ├── loader_llama2_hf.py
    │   │       ├── loader_mcore.py
    │   │       ├── loader_megatron.py
    │   │       ├── saver_mcore.py
    │   │       ├── saver_megatron.py
    │   │       ├── setter.py
    │   │       └── utils.py
    │   │   ├── linter.py
    │   │   ├── merge_datasets.py
    │   │   ├── openwebtext
    │   │       ├── README.md
    │   │       ├── add_id.py
    │   │       ├── blacklist_urls.py
    │   │       ├── cleanup_dataset.py
    │   │       ├── cleanup_fix_dataset.py
    │   │       ├── filter_ngrams.py
    │   │       ├── find_duplicates.py
    │   │       ├── group_duplicate_url.py
    │   │       ├── merge_jsons.py
    │   │       └── remove_group_duplicates.py
    │   │   ├── preprocess_data.py
    │   │   ├── preprocess_data_nmt.py
    │   │   ├── preprocess_mmdata.py
    │   │   ├── retro
    │   │       ├── README.md
    │   │       ├── build_db.md
    │   │       ├── cli
    │   │       │   ├── __init__.py
    │   │       │   ├── __main__.py
    │   │       │   └── cli.py
    │   │       ├── config_utils.py
    │   │       ├── docker
    │   │       │   └── Dockerfile
    │   │       ├── preprocess_data.py
    │   │       ├── sft
    │   │       │   ├── README.md
    │   │       │   ├── dataset_conv.py
    │   │       │   ├── open_inst.sh
    │   │       │   ├── sft_retro.py
    │   │       │   └── sft_retro_lm.sh
    │   │       └── text_generation
    │   │       │   ├── evaluate.py
    │   │       │   ├── metrics.py
    │   │       │   ├── retro_api.py
    │   │       │   ├── retro_generate.sh
    │   │       │   ├── retro_generation.py
    │   │       │   └── retro_text_generation.py
    │   │   ├── run_text_generation_server.py
    │   │   └── text_generation_cli.py
    ├── README.md
    ├── README_zh-CN.md
    ├── examples
    │   ├── .DS_Store
    │   └── idefics2
    │   │   ├── .DS_Store
    │   │   ├── pretrain_megatron_idefics2.py
    │   │   ├── run_cot_cmd.sh
    │   │   └── train_llava_instruct_webdataset_cot.sh
    ├── megatron_patch
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── arguments.py
    │   ├── data
    │   │   ├── .DS_Store
    │   │   ├── __init__.py
    │   │   ├── bloom.py
    │   │   ├── data_sampler.py
    │   │   ├── glm.py
    │   │   ├── idefics2
    │   │   │   ├── constants.py
    │   │   │   ├── idefics2_image_processor.py
    │   │   │   └── mm_pretrain_dataset.py
    │   │   ├── llama.py
    │   │   ├── llava
    │   │   │   ├── constants.py
    │   │   │   ├── conversation.py
    │   │   │   ├── cvcuda_image_processing_clip.py
    │   │   │   ├── mm_pretrain_dataset.py
    │   │   │   └── mm_utils.py
    │   │   ├── qwen_vl.py
    │   │   ├── starcoder.py
    │   │   └── utils.py
    │   ├── finetune_utils.py
    │   ├── generation
    │   │   ├── api.py
    │   │   ├── generation.py
    │   │   ├── gpt_predictor.py
    │   │   ├── megatron.md
    │   │   └── tokenization.py
    │   ├── initialize.py
    │   ├── lm_evaluate.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── baichuan
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── baichuan2
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── layers.py
    │   │   │   └── transformer.py
    │   │   ├── bloom
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── layers.py
    │   │   │   ├── positional_embeddings.py
    │   │   │   └── transformer.py
    │   │   ├── chatglm
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── positional_embeddings.py
    │   │   │   └── transformer.py
    │   │   ├── falcon
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── falcon40b
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── galactica
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── glm130b
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── idefics2
    │   │   │   ├── __init__.py
    │   │   │   ├── get_idefics2vit_layer_spec.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── idefics_vision_tower.py
    │   │   │   ├── idefics_vlm_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── language_model_llama3.py
    │   │   │   ├── perceiver_transformer.py
    │   │   │   ├── rotary_pos_embedding.py
    │   │   │   ├── rotary_pos_embedding_llama3.py
    │   │   │   └── transformer.py
    │   │   ├── llama
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── positional_embeddings.py
    │   │   │   └── transformer.py
    │   │   ├── llama2
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── rotary_pos_embedding.py
    │   │   │   └── transformer.py
    │   │   ├── llama3
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── llava
    │   │   │   ├── __init__.py
    │   │   │   ├── clip_encoder.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── mm_projector_builder.py
    │   │   │   ├── rotary_pos_embedding.py
    │   │   │   ├── transformer.py
    │   │   │   └── vlm_model.py
    │   │   ├── mistral
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── modeling_attn_mask_utils.py
    │   │   │   ├── rotary_pos_embedding.py
    │   │   │   └── transformer.py
    │   │   ├── mixtral
    │   │   │   ├── __init__.py
    │   │   │   ├── layer_specs.py
    │   │   │   ├── model.py
    │   │   │   ├── moe
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── experts.py
    │   │   │   │   ├── grouped_gemm_util.py
    │   │   │   │   ├── moe_layer.py
    │   │   │   │   ├── moe_utils.py
    │   │   │   │   ├── router.py
    │   │   │   │   └── token_dispatcher.py
    │   │   │   ├── transformer
    │   │   │   │   ├── attention.py
    │   │   │   │   └── mlp.py
    │   │   │   └── transformer_config.py
    │   │   ├── qwen
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   │   ├── qwen1_5
    │   │   │   ├── __init__.py
    │   │   │   ├── layer_specs.py
    │   │   │   ├── model.py
    │   │   │   ├── moe
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── experts.py
    │   │   │   │   ├── moe_layer.py
    │   │   │   │   ├── router.py
    │   │   │   │   └── token_dispatcher.py
    │   │   │   └── transformer
    │   │   │   │   ├── attention.py
    │   │   │   │   └── mlp.py
    │   │   ├── qwen1_5_megablocks
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── rotary_pos_embedding.py
    │   │   │   └── transformer.py
    │   │   ├── qwen_vl
    │   │   │   ├── __init__.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   ├── transformer.py
    │   │   │   └── visual.py
    │   │   └── starcoder
    │   │   │   ├── __init__.py
    │   │   │   ├── enums.py
    │   │   │   ├── glu_activations.py
    │   │   │   ├── gpt_model.py
    │   │   │   ├── language_model.py
    │   │   │   └── transformer.py
    │   ├── tokenizer
    │   │   ├── __init__.py
    │   │   ├── icetk_glm130b_tokenizer.py
    │   │   ├── jiebabpe_tokenizer.py
    │   │   ├── tokenization_baichuan.py
    │   │   ├── tokenization_qwen_vl.py
    │   │   └── tokenization_yi.py
    │   └── training.py
    └── toolkits
    │   ├── .DS_Store
    │   ├── model_checkpoints_convertor
    │       ├── .DS_Store
    │       └── idefics2
    │       │   ├── clip_convertor.py
    │       │   ├── idefics2_hf2mg.py
    │       │   ├── idefics2_hf2mg_llama3.py
    │       │   ├── megatron_2hf.sh
    │       │   ├── mg2hf.sh
    │       │   ├── mg2hf_base_idefics_instruct.sh
    │       │   ├── model_convertor.sh
    │       │   └── model_convertor_llama3.sh
    │   └── pretrain_data_preprocessing
    │       ├── .DS_Store
    │       └── move_bulk_data.py
├── method_figure.jpg
├── reformat_data
    ├── .DS_Store
    ├── convert_to_llava_format.sh
    ├── convert_to_llava_format_with_gold_label_diff.py
    ├── convert_to_llava_format_with_pos_cot.py
    ├── convert_to_llava_format_with_pos_diff_equation.py
    ├── diff_utils.py
    └── utils.py
├── requirement.txt
└── sample_data
    ├── .DS_Store
    ├── __init__.py
    ├── gather_gpt4_prompt
        ├── .DS_Store
        ├── completion_utils.py
        ├── get_prompt_phrase.py
        └── room_constant.py
    ├── generate_trajectories.py
    ├── regenerate_init_position.py
    └── utils.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.DS_Store


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/.gitignore


--------------------------------------------------------------------------------
/.idea/DivScene_release.iml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/DivScene_release.iml


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/inspectionProfiles/Project_Default.xml


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/inspectionProfiles/profiles_settings.xml


--------------------------------------------------------------------------------
/.idea/jupyter-settings.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/jupyter-settings.xml


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/misc.xml


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/modules.xml


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/.idea/vcs.xml


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/README.md


--------------------------------------------------------------------------------
/agent_inference/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/.DS_Store


--------------------------------------------------------------------------------
/agent_inference/hf_idefics2_gpt4o.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_gpt4o.py


--------------------------------------------------------------------------------
/agent_inference/hf_idefics2_metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_metric.py


--------------------------------------------------------------------------------
/agent_inference/hf_idefics2_online_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_online_client.py


--------------------------------------------------------------------------------
/agent_inference/hf_idefics2_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/hf_idefics2_server.py


--------------------------------------------------------------------------------
/agent_inference/run_client.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/run_client.sh


--------------------------------------------------------------------------------
/agent_inference/run_server.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/run_server.sh


--------------------------------------------------------------------------------
/agent_inference/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_inference/utils.py


--------------------------------------------------------------------------------
/agent_training/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/.DS_Store


--------------------------------------------------------------------------------
/agent_training/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/LICENSE


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/.DS_Store


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/CODEOWNERS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/CODEOWNERS


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/CONTRIBUTING.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/Dockerfile.ci:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/Dockerfile.ci


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/Dockerfile.test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/Dockerfile.test


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/LICENSE


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include megatron/core/requirements.txt
2 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/llama2.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/llama2.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/context_parallel.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/context_parallel.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/datasets.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/datasets.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/dist_checkpointing.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/dist_checkpointing.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/distributed.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/distributed.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/fusions.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/fusions.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/index.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/models.bert.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.bert.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/models.gpt.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.gpt.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/models.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/models.t5.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/models.t5.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/moe.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/moe.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/pipeline_parallel.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/pipeline_parallel.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/tensor_parallel.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/tensor_parallel.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/api-guide/transformer.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/api-guide/transformer.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/distrib_optimizer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/distrib_optimizer.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_overview.png


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/images/context_parallel/CP_results.png


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/images/distrib_optimizer/data_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/images/distrib_optimizer/data_flow.png


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/index.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/docs/source/user-guide/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/docs/source/user-guide/index.rst


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/bert/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/bert/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/bert/train_bert_340m_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/bert/train_bert_340m_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/detxoify_lm/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/detxoify_lm/annotations/preprocess.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/annotations/preprocess.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/detxoify_lm/finetune_gpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/finetune_gpt.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/detxoify_lm/generate-1.3b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/generate-1.3b.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/detxoify_lm/generate_samples_gpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/generate_samples_gpt.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/detxoify_lm/perspective_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/detxoify_lm/perspective_api.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/evaluate_retriever_nq.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/evaluate_retriever_nq.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/evaluate_zeroshot_gpt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/evaluate_zeroshot_gpt.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/finetune_mnli_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/finetune_mnli_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/finetune_race_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/finetune_race_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/finetune_retriever_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/finetune_retriever_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/gpt3/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/gpt3/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/gpt3/gpt_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/gpt3/gpt_config.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/gpt3/train_gpt3_175b_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/gpt3/train_gpt3_175b_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/inference/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_llama_7b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_llama_7b.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_nemotron3_8b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/ptq_trtllm_nemotron3_8b.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/inference/text_generation_ptq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/text_generation_ptq.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/inference/trtllm_text_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/inference/trtllm_text_generation.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/merge_mp_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/merge_mp_bert.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/data_processing.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/data_processing.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/eval_knwl_generation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/eval_knwl_generation.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/eval_resp_generation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/eval_resp_generation.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/prep_resp_gen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/prep_resp_gen.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/prompt_knwl_gen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/prompt_knwl_gen.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/msdp/prompt_resp_gen.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/msdp/prompt_resp_gen.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_bert.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_bert.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed_with_mp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_bert_distributed_with_mp.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_gpt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_gpt3_175B.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt3_175B.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed_with_mp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_gpt_distributed_with_mp.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_ict.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_ict.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_t5.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_t5.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed_with_mp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_t5_distributed_with_mp.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_vision_classify.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vision_classify.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_vision_dino.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vision_dino.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_vision_inpaint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vision_inpaint.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/pretrain_vlm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/pretrain_vlm.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/retro/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/retro/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/retro/preprocess_data.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/retro/preprocess_data.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/retro/train_retro_2b_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/retro/train_retro_2b_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/run_simple_mcore_train_loop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/run_simple_mcore_train_loop.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/run_text_generation_server_345M.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/run_text_generation_server_345M.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/CONFIG.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/CONFIG.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/SBATCH.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/SBATCH.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/SRUN.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/SRUN.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_11.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_11.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_12.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_12.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_13.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_13.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_14.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_14.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_15.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_15.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_16.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_16.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_17.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_17.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_figure_18.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_figure_18.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/sc21/run_table_1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/sc21/run_table_1.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/t5/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/t5/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/t5/t5_mcore_train_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/t5/t5_mcore_train_curve.png


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/examples/t5/train_t5_220m_distributed.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/examples/t5/train_t5_220m_distributed.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/images/Achieved_petaFLOPs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/images/Achieved_petaFLOPs.png


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/images/cases_april2021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/images/cases_april2021.png


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/jet-tests.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/jet-tests.yml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/QuickStart.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/QuickStart.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/README_STRAGGLER.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/README_STRAGGLER.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-310.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/enums.cpython-38.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-310.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/jit.cpython-38.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-310.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/timers.cpython-38.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/Makefile


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/bert_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/bert_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/blended_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/blended_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/gpt_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/gpt_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/helpers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/helpers.cpp


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/indexed_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/indexed_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/masked_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/masked_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/megatron_tokenizer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/multimodal_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/multimodal_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/readme.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/config.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/tokenizers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/config/tokenizers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/build.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/build.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/db/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/external_libs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/external_libs.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/build.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/build.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/factory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/factory.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/index.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/validate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/index/validate.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/query.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/query.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/retro_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/retro_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/query/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/retro/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/t5_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/t5_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/datasets/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/datasets/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/core.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/core.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/dict_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/dict_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/mapping.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/mapping.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/optimizer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/serialization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/serialization.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/base.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/torch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/torch.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/zarr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/strategies/zarr.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/dist_checkpointing/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/distributed/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/distributed/finalize_model_grads.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/distributed/finalize_model_grads.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/distributed/param_and_grad_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/distributed/param_and_grad_buffer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/enums.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/enums.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_dropout.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_dropout.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_geglu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_geglu.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_gelu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_gelu.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_swiglu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_bias_swiglu.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_layer_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_layer_norm.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/fusions/fused_softmax.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/inference/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/model_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/model_specs.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/state_dict_hooks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference/gpt/state_dict_hooks.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/inference_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/inference_params.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/jit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/jit.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/model_parallel_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/model_parallel_config.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/T5/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/T5/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/T5/t5_spec.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/bert/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_layer_specs.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_lm_head.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_lm_head.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/bert_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/bert/pooler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/bert/pooler.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/common/embeddings/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/common/language_module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/common/vision_module/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/gpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/gpt/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_layer_specs.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/gpt/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/multimodal/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/multimodal/llava_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/multimodal/llava_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/base_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/base_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/config.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/decoder_spec.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/encoder_spec.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/retro/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/retro/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/vision/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/vision/clip_vit_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/vision/clip_vit_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/vision/multimodal_projector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/vision/multimodal_projector.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/models/vision/vit_layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/models/vision/vit_layer_specs.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/optimizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/optimizer/clip_grads.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/clip_grads.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/optimizer/distrib_optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/distrib_optimizer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/optimizer/grad_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/grad_scaler.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/optimizer/optimizer_config.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/package_info.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/package_info.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/packed_seq_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/packed_seq_params.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/parallel_state.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/p2p_communication.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/p2p_communication.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/schedules.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/pipeline_parallel/schedules.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/requirements.txt:
--------------------------------------------------------------------------------
1 | torch


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/cross_entropy.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/layers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/mappings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/mappings.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/random.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/tensor_parallel/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/timers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/timers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/custom_layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/dot_product_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/dot_product_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/enums.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/enums.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/identity_op.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/identity_op.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/mlp.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/module.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/experts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/experts.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/grouped_gemm_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/grouped_gemm_util.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_layer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/moe_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/router.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/router.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/token_dispatcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/moe/token_dispatcher.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/spec_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/spec_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_block.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_block.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_config.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/transformer_layer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/transformer/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/transformer/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/core/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/core/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/arguments.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/gpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/gpt/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/gpt/model_provider.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/gpt/model_provider.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/static/index.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/static/index.html


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/api.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/beam_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/beam_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/communication.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/communication.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/forward_step.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/forward_step.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/generation.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/sampling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/sampling.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation/tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation/tokenization.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/inference/text_generation_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/inference/text_generation_server.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/autoaugment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/autoaugment.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/biencoder_dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/biencoder_dataset_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/data_samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/data_samplers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/dataset_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/ict_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/ict_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/image_folder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/image_folder.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/multimodal_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/multimodal_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/orqa_wiki_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/orqa_wiki_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/realm_dataset_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/realm_dataset_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/realm_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/realm_index.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/data/vit_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/data/vit_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/fp16_deprecated/loss_scaler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fp16_deprecated/loss_scaler.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/compat.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/compat.h


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/type_shim.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/fused_kernels/type_shim.h


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/indexer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/indexer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/bert_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/bert_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/biencoder_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/biencoder_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/classification.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/enums.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/enums.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_bias_gelu.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_bias_gelu.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_layer_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_layer_norm.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/fused_softmax.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/language_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/module.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/multiple_choice.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/multiple_choice.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/realm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/realm_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/rms_norm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/rms_norm.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/t5_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/t5_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/transformer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/classification.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/dino.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/dino.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/esvit_swin_backbone.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/esvit_swin_backbone.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/inpainting.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/inpainting.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/knn_monitor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/knn_monitor.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/mit_backbone.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/mit_backbone.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/swin_backbone.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/swin_backbone.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/vit_backbone.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/model/vision/vit_backbone.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/commons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/commons.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_cross_entropy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_cross_entropy.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_initialize.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_layers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/legacy/mpu/tests/test_random.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/arguments.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/checkpointing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/checkpointing.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/dist_signal_handler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/dist_signal_handler.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/global_vars.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/global_vars.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/initialize.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/log_handler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/log_handler.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/microbatches.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/microbatches.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/optimizer_param_scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/optimizer_param_scheduler.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/theoretical_memory_usage.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/theoretical_memory_usage.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/tokenizer/bert_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/bert_tokenization.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/tokenizer/gpt2_tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/gpt2_tokenization.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/tokenizer/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/tokenizer/tokenizer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/training.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/megatron/training/yaml_arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/megatron/training/yaml_arguments.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_bert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_bert.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_gpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_gpt.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_ict.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_ict.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_retro.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_retro.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_t5.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_t5.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_vision_classify.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vision_classify.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_vision_dino.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vision_dino.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_vision_inpaint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vision_inpaint.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pretrain_vlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pretrain_vlm.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/pyproject.toml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/report_theoretical_memory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/report_theoretical_memory.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/setup.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/data_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/ensemble_classifier.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/ensemble_classifier.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/eval_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/finetune_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/finetune_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/glue/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/glue/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/finetune.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/glue/mnli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/mnli.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/glue/qqp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/glue/qqp.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/main.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/msdp/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/msdp/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/evaluate.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/msdp/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/main.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/msdp/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/metrics.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/msdp/preprocessing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/preprocessing.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/msdp/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/msdp/prompt.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/evaluate_orqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/evaluate_orqa.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/evaluate_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/evaluate_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/supervised/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/supervised/data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/supervised/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/supervised/eval_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/supervised/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/supervised/finetune.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/nq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/nq.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/qa_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/qa_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/tokenizers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/orqa/unsupervised/tokenizers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/race/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/race/data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/race/finetune.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/race/finetune.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/classification/classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/classification/classification.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/classification/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/classification/eval_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/finetune_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/finetune_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/main.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/cityscapes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/cityscapes.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_segformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_segformer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_setr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/finetune_setr.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/metrics.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_heads.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_heads.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/seg_models.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/transforms.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/transforms.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/vision/segmentation/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/vision/segmentation/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/datasets.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/detokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/detokenizer.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tasks/zeroshot_gpt/evaluate.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-bert.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-bert.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-gpt.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-gpt.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-t5.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/MR-t5.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/build-pyt.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/build-pyt.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/monthly-t5.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/monthly-t5.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-bert.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-bert.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-gpt.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/nightly-gpt.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/weekly-gpt.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/jet_recipes/weekly-gpt.yaml


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/python_test_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/python_test_utils/common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/python_test_utils/common.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/functional_tests/shell_test_utils/jobwait.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/functional_tests/shell_test_utils/jobwait.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_builder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_builder.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_mock_gpt_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_mock_gpt_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_multimodal_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_multimodal_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_mmdata.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/data/test_preprocess_mmdata.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/conftest.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/models/common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/models/common.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/test_mapping.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/dist_checkpointing/test_mapping.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/fusions/test_torch_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/fusions/test_torch_softmax.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_base_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_base_embedding.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_bert_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_bert_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_clip_vit_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_clip_vit_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_gpt_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_llava_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_llava_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_multimodal_projector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_multimodal_projector.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_t5_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/models/test_t5_model.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/pipeline_parallel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/pipeline_parallel/test_schedules.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/pipeline_parallel/test_schedules.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_mappings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_mappings.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_random.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/tensor_parallel/test_random.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/test_basic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_basic.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/test_imports.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_imports.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/test_parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_parallel_state.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/test_training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_training.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/test_utilities.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_utilities.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/test_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_grouped_mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_grouped_mlp.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_routers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/moe/test_routers.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_core_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_core_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_mlp.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_module.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_module.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_retro_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tests/unit_tests/transformer/test_retro_attention.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/autoformat.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/autoformat.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/bert_embedding/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/bert_embedding/dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/bert_embedding/embed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/embed.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/bert_embedding/external_libs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/external_libs.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/bert_embedding/huggingface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/bert_embedding/huggingface.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/convert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/convert.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2_hf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_llama2_hf.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/loader_mcore.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_mcore.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/loader_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/loader_megatron.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/saver_mcore.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/saver_mcore.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/saver_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/saver_megatron.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/setter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/setter.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/checkpoint/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/checkpoint/utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/linter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/linter.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/merge_datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/merge_datasets.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/add_id.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/add_id.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/blacklist_urls.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/blacklist_urls.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_fix_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/cleanup_fix_dataset.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/filter_ngrams.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/filter_ngrams.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/find_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/find_duplicates.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/group_duplicate_url.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/group_duplicate_url.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/merge_jsons.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/merge_jsons.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/openwebtext/remove_group_duplicates.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/openwebtext/remove_group_duplicates.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/preprocess_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/preprocess_data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/preprocess_data_nmt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/preprocess_data_nmt.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/preprocess_mmdata.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/preprocess_mmdata.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/build_db.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/build_db.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/cli/__init__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/cli/__main__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/cli/__main__.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/cli/cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/cli/cli.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/config_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/config_utils.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/docker/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/docker/Dockerfile


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/preprocess_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/preprocess_data.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/sft/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/README.md


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/sft/dataset_conv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/dataset_conv.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/sft/open_inst.sh:
--------------------------------------------------------------------------------
1 | DATA_BLEND="1.0 open_inst"
2 | 


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro_lm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/sft/sft_retro_lm.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/text_generation/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/evaluate.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/text_generation/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/metrics.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_api.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generate.sh


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/retro/text_generation/retro_generation.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/run_text_generation_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/run_text_generation_server.py


--------------------------------------------------------------------------------
/agent_training/Megatron-LM-240424/tools/text_generation_cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/Megatron-LM-240424/tools/text_generation_cli.py


--------------------------------------------------------------------------------
/agent_training/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/README.md


--------------------------------------------------------------------------------
/agent_training/README_zh-CN.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/README_zh-CN.md


--------------------------------------------------------------------------------
/agent_training/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/.DS_Store


--------------------------------------------------------------------------------
/agent_training/examples/idefics2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/.DS_Store


--------------------------------------------------------------------------------
/agent_training/examples/idefics2/pretrain_megatron_idefics2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/pretrain_megatron_idefics2.py


--------------------------------------------------------------------------------
/agent_training/examples/idefics2/run_cot_cmd.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/run_cot_cmd.sh


--------------------------------------------------------------------------------
/agent_training/examples/idefics2/train_llava_instruct_webdataset_cot.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/examples/idefics2/train_llava_instruct_webdataset_cot.sh


--------------------------------------------------------------------------------
/agent_training/megatron_patch/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/.DS_Store


--------------------------------------------------------------------------------
/agent_training/megatron_patch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/arguments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/arguments.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/.DS_Store


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/bloom.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/bloom.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/data_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/data_sampler.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/glm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/glm.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/idefics2/constants.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/idefics2/constants.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/idefics2/idefics2_image_processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/idefics2/idefics2_image_processor.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/idefics2/mm_pretrain_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/idefics2/mm_pretrain_dataset.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llama.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/llava/constants.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/constants.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/llava/conversation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/conversation.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/llava/cvcuda_image_processing_clip.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/cvcuda_image_processing_clip.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/llava/mm_pretrain_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/mm_pretrain_dataset.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/llava/mm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/llava/mm_utils.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/qwen_vl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/qwen_vl.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/starcoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/starcoder.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/data/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/data/utils.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/finetune_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/finetune_utils.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/generation/api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/api.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/generation/generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/generation.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/generation/gpt_predictor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/gpt_predictor.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/generation/megatron.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/megatron.md


--------------------------------------------------------------------------------
/agent_training/megatron_patch/generation/tokenization.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/generation/tokenization.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/initialize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/initialize.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/lm_evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/lm_evaluate.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan2/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan2/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan2/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/layers.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/baichuan2/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/baichuan2/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/bloom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/bloom/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/bloom/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/bloom/layers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/layers.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/bloom/positional_embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/positional_embeddings.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/bloom/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/bloom/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/chatglm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/chatglm/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/chatglm/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/chatglm/positional_embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/positional_embeddings.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/chatglm/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/chatglm/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon40b/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon40b/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon40b/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/falcon40b/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/falcon40b/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/galactica/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/galactica/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/galactica/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/galactica/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/galactica/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/glm130b/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/glm130b/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/glm130b/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/glm130b/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/glm130b/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/get_idefics2vit_layer_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/get_idefics2vit_layer_spec.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/idefics_vision_tower.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/idefics_vision_tower.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/idefics_vlm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/idefics_vlm_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/language_model_llama3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/language_model_llama3.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/perceiver_transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/perceiver_transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/rotary_pos_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/rotary_pos_embedding.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/rotary_pos_embedding_llama3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/rotary_pos_embedding_llama3.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/idefics2/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/idefics2/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama/positional_embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/positional_embeddings.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama2/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama2/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama2/rotary_pos_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/rotary_pos_embedding.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama2/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama2/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama3/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama3/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama3/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama3/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llama3/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llama3/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/clip_encoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/clip_encoder.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/mm_projector_builder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/mm_projector_builder.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/rotary_pos_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/rotary_pos_embedding.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/llava/vlm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/llava/vlm_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mistral/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mistral/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mistral/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mistral/modeling_attn_mask_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/modeling_attn_mask_utils.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mistral/rotary_pos_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/rotary_pos_embedding.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mistral/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mistral/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/layer_specs.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/experts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/experts.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/grouped_gemm_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/grouped_gemm_util.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/moe_layer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/moe_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/moe_utils.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/router.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/router.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/moe/token_dispatcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/moe/token_dispatcher.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/transformer/attention.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/transformer/mlp.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/mixtral/transformer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/mixtral/transformer_config.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/layer_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/layer_specs.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/moe/experts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/experts.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/moe/moe_layer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/moe_layer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/moe/router.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/router.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/moe/token_dispatcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/moe/token_dispatcher.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/transformer/attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/transformer/attention.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5/transformer/mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5/transformer/mlp.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5_megablocks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5_megablocks/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5_megablocks/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5_megablocks/rotary_pos_embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/rotary_pos_embedding.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen1_5_megablocks/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen1_5_megablocks/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen_vl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen_vl/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen_vl/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen_vl/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/qwen_vl/visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/qwen_vl/visual.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/starcoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/starcoder/enums.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/enums.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/starcoder/glu_activations.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/glu_activations.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/starcoder/gpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/gpt_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/starcoder/language_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/language_model.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/model/starcoder/transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/model/starcoder/transformer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/__init__.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/tokenizer/icetk_glm130b_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/icetk_glm130b_tokenizer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/tokenizer/jiebabpe_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/jiebabpe_tokenizer.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/tokenizer/tokenization_baichuan.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/tokenization_baichuan.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/tokenizer/tokenization_qwen_vl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/tokenization_qwen_vl.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/tokenizer/tokenization_yi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/tokenizer/tokenization_yi.py


--------------------------------------------------------------------------------
/agent_training/megatron_patch/training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/megatron_patch/training.py


--------------------------------------------------------------------------------
/agent_training/toolkits/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/.DS_Store


--------------------------------------------------------------------------------
/agent_training/toolkits/model_checkpoints_convertor/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/.DS_Store


--------------------------------------------------------------------------------
/agent_training/toolkits/model_checkpoints_convertor/idefics2/clip_convertor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/clip_convertor.py


--------------------------------------------------------------------------------
/agent_training/toolkits/model_checkpoints_convertor/idefics2/idefics2_hf2mg.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/idefics2_hf2mg.py


--------------------------------------------------------------------------------
/agent_training/toolkits/model_checkpoints_convertor/idefics2/megatron_2hf.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/megatron_2hf.sh


--------------------------------------------------------------------------------
/agent_training/toolkits/model_checkpoints_convertor/idefics2/mg2hf.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/mg2hf.sh


--------------------------------------------------------------------------------
/agent_training/toolkits/model_checkpoints_convertor/idefics2/model_convertor.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/model_checkpoints_convertor/idefics2/model_convertor.sh


--------------------------------------------------------------------------------
/agent_training/toolkits/pretrain_data_preprocessing/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/pretrain_data_preprocessing/.DS_Store


--------------------------------------------------------------------------------
/agent_training/toolkits/pretrain_data_preprocessing/move_bulk_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/agent_training/toolkits/pretrain_data_preprocessing/move_bulk_data.py


--------------------------------------------------------------------------------
/method_figure.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/method_figure.jpg


--------------------------------------------------------------------------------
/reformat_data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/.DS_Store


--------------------------------------------------------------------------------
/reformat_data/convert_to_llava_format.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format.sh


--------------------------------------------------------------------------------
/reformat_data/convert_to_llava_format_with_gold_label_diff.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format_with_gold_label_diff.py


--------------------------------------------------------------------------------
/reformat_data/convert_to_llava_format_with_pos_cot.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format_with_pos_cot.py


--------------------------------------------------------------------------------
/reformat_data/convert_to_llava_format_with_pos_diff_equation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/convert_to_llava_format_with_pos_diff_equation.py


--------------------------------------------------------------------------------
/reformat_data/diff_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/diff_utils.py


--------------------------------------------------------------------------------
/reformat_data/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/reformat_data/utils.py


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/requirement.txt


--------------------------------------------------------------------------------
/sample_data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/.DS_Store


--------------------------------------------------------------------------------
/sample_data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sample_data/gather_gpt4_prompt/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/.DS_Store


--------------------------------------------------------------------------------
/sample_data/gather_gpt4_prompt/completion_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/completion_utils.py


--------------------------------------------------------------------------------
/sample_data/gather_gpt4_prompt/get_prompt_phrase.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/get_prompt_phrase.py


--------------------------------------------------------------------------------
/sample_data/gather_gpt4_prompt/room_constant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/gather_gpt4_prompt/room_constant.py


--------------------------------------------------------------------------------
/sample_data/generate_trajectories.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/generate_trajectories.py


--------------------------------------------------------------------------------
/sample_data/regenerate_init_position.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/regenerate_init_position.py


--------------------------------------------------------------------------------
/sample_data/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhaowei-wang-nlp/DivScene/HEAD/sample_data/utils.py


--------------------------------------------------------------------------------