├── LICENSE ├── README.md ├── modeling_dense.py ├── modeling_moe.py ├── modeling_mole.py ├── modeling_mole_rep.py ├── pretrain ├── README.md ├── megatron │ ├── core │ │ ├── QuickStart.md │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── package_info.cpython-311.pyc │ │ ├── datasets │ │ │ ├── Makefile │ │ │ ├── __init__.py │ │ │ ├── bert_dataset.py │ │ │ ├── blended_dataset.py │ │ │ ├── blended_megatron_dataset_builder.py │ │ │ ├── blended_megatron_dataset_config.py │ │ │ ├── gpt_dataset.py │ │ │ ├── helpers.cpp │ │ │ ├── indexed_dataset.py │ │ │ ├── masked_dataset.py │ │ │ ├── megatron_dataset.py │ │ │ ├── megatron_tokenizer.py │ │ │ ├── multimodal_dataset.py │ │ │ ├── readme.md │ │ │ ├── retro │ │ │ │ ├── __init__.py │ │ │ │ ├── config │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bert_embedders.py │ │ │ │ │ ├── config.py │ │ │ │ │ ├── gpt_chunk_datasets.py │ │ │ │ │ └── tokenizers.py │ │ │ │ ├── db │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── build.py │ │ │ │ │ ├── dataset.py │ │ │ │ │ └── utils.py │ │ │ │ ├── external_libs.py │ │ │ │ ├── index │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── build.py │ │ │ │ │ ├── factory.py │ │ │ │ │ ├── index.py │ │ │ │ │ ├── indexes │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── faiss_base.py │ │ │ │ │ │ └── faiss_par_add.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── validate.py │ │ │ │ ├── query │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── gpt_chunk_dataset.py │ │ │ │ │ ├── multi_split_gpt_dataset.py │ │ │ │ │ ├── query.py │ │ │ │ │ ├── retro_dataset.py │ │ │ │ │ └── utils.py │ │ │ │ └── utils.py │ │ │ ├── t5_dataset.py │ │ │ └── utils.py │ │ ├── dist_checkpointing │ │ │ ├── __init__.py │ │ │ ├── core.py │ │ │ ├── dict_utils.py │ │ │ ├── mapping.py │ │ │ ├── optimizer.py │ │ │ ├── serialization.py │ │ │ ├── strategies │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── filesystem_async.py │ │ │ │ ├── state_dict_saver.py │ │ │ │ ├── tensorstore.py │ │ │ │ ├── torch.py │ │ │ │ ├── two_stage.py │ │ │ │ └── zarr.py │ │ │ └── utils.py │ │ ├── distributed │ │ │ ├── __init__.py │ │ │ ├── distributed_data_parallel.py │ │ │ ├── finalize_model_grads.py │ │ │ └── param_and_grad_buffer.py │ │ ├── enums.py │ │ ├── fusions │ │ │ ├── __init__.py │ │ │ ├── fused_bias_dropout.py │ │ │ ├── fused_bias_geglu.py │ │ │ ├── fused_bias_gelu.py │ │ │ ├── fused_bias_swiglu.py │ │ │ ├── fused_layer_norm.py │ │ │ └── fused_softmax.py │ │ ├── inference │ │ │ ├── __init__.py │ │ │ └── gpt │ │ │ │ ├── __init__.py │ │ │ │ ├── model_specs.py │ │ │ │ └── state_dict_hooks.py │ │ ├── inference_params.py │ │ ├── jit.py │ │ ├── model_parallel_config.py │ │ ├── models │ │ │ ├── T5 │ │ │ │ ├── __init__.py │ │ │ │ ├── t5_model.py │ │ │ │ └── t5_spec.py │ │ │ ├── __init__.py │ │ │ ├── bert │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_layer_specs.py │ │ │ │ ├── bert_lm_head.py │ │ │ │ ├── bert_model.py │ │ │ │ └── pooler.py │ │ │ ├── common │ │ │ │ ├── __init__.py │ │ │ │ ├── embeddings │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── language_model_embedding.py │ │ │ │ │ └── rotary_pos_embedding.py │ │ │ │ ├── language_module │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── language_module.py │ │ │ │ └── vision_module │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── vision_module.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt_layer_specs.py │ │ │ │ └── gpt_model.py │ │ │ ├── multimodal │ │ │ │ ├── __init__.py │ │ │ │ └── llava_model.py │ │ │ ├── retro │ │ │ │ ├── __init__.py │ │ │ │ ├── base_attention.py │ │ │ │ ├── config.py │ │ │ │ ├── decoder_attention.py │ │ │ │ ├── decoder_spec.py │ │ │ │ ├── encoder_attention.py │ │ │ │ ├── encoder_spec.py │ │ │ │ ├── model.py │ │ │ │ └── utils.py │ │ │ └── vision │ │ │ │ ├── __init__.py │ │ │ │ ├── clip_vit_model.py │ │ │ │ └── multimodal_projector.py │ │ ├── optimizer │ │ │ ├── __init__.py │ │ │ ├── clip_grads.py │ │ │ ├── distrib_optimizer.py │ │ │ ├── grad_scaler.py │ │ │ ├── optimizer.py │ │ │ └── optimizer_config.py │ │ ├── package_info.py │ │ ├── packed_seq_params.py │ │ ├── parallel_state.py │ │ ├── pipeline_parallel │ │ │ ├── __init__.py │ │ │ ├── p2p_communication.py │ │ │ └── schedules.py │ │ ├── requirements.txt │ │ ├── tensor_parallel │ │ │ ├── __init__.py │ │ │ ├── cross_entropy.py │ │ │ ├── data.py │ │ │ ├── layers.py │ │ │ ├── mappings.py │ │ │ ├── random.py │ │ │ └── utils.py │ │ ├── timers.py │ │ ├── transformer │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── custom_layers │ │ │ │ ├── __init__.py │ │ │ │ └── transformer_engine.py │ │ │ ├── dot_product_attention.py │ │ │ ├── enums.py │ │ │ ├── identity_op.py │ │ │ ├── mlp.py │ │ │ ├── module.py │ │ │ ├── moe │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── experts.py │ │ │ │ ├── grouped_gemm_util.py │ │ │ │ ├── moe_layer.py │ │ │ │ ├── moe_utils.py │ │ │ │ ├── router.py │ │ │ │ └── token_dispatcher.py │ │ │ ├── spec_utils.py │ │ │ ├── transformer_block.py │ │ │ ├── transformer_config.py │ │ │ ├── transformer_layer.py │ │ │ ├── utils.py │ │ │ └── vanillamlp.py │ │ └── utils.py │ ├── inference │ │ ├── __init__.py │ │ ├── arguments.py │ │ ├── gpt │ │ │ ├── __init__.py │ │ │ └── model_provider.py │ │ ├── static │ │ │ └── index.html │ │ ├── text_generation │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── beam_utils.py │ │ │ ├── communication.py │ │ │ ├── forward_step.py │ │ │ ├── generation.py │ │ │ ├── sampling.py │ │ │ └── tokenization.py │ │ └── text_generation_server.py │ ├── legacy │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── autoaugment.py │ │ │ ├── biencoder_dataset_utils.py │ │ │ ├── data_samplers.py │ │ │ ├── dataset_utils.py │ │ │ ├── ict_dataset.py │ │ │ ├── image_folder.py │ │ │ ├── multimodal_dataset.py │ │ │ ├── orqa_wiki_dataset.py │ │ │ ├── realm_dataset_utils.py │ │ │ ├── realm_index.py │ │ │ └── vit_dataset.py │ │ ├── fp16_deprecated │ │ │ └── loss_scaler.py │ │ ├── fused_kernels │ │ │ ├── __init__.py │ │ │ ├── compat.h │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_fused_kernels.py │ │ │ └── type_shim.h │ │ ├── indexer.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── bert_model.py │ │ │ ├── biencoder_model.py │ │ │ ├── classification.py │ │ │ ├── enums.py │ │ │ ├── fused_bias_gelu.py │ │ │ ├── fused_layer_norm.py │ │ │ ├── fused_softmax.py │ │ │ ├── gpt_model.py │ │ │ ├── language_model.py │ │ │ ├── module.py │ │ │ ├── multiple_choice.py │ │ │ ├── realm_model.py │ │ │ ├── rms_norm.py │ │ │ ├── t5_model.py │ │ │ ├── transformer.py │ │ │ ├── utils.py │ │ │ └── vision │ │ │ │ ├── classification.py │ │ │ │ ├── dino.py │ │ │ │ ├── esvit_swin_backbone.py │ │ │ │ ├── inpainting.py │ │ │ │ ├── knn_monitor.py │ │ │ │ ├── mit_backbone.py │ │ │ │ ├── swin_backbone.py │ │ │ │ ├── utils.py │ │ │ │ └── vit_backbone.py │ │ └── mpu │ │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── commons.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialize.py │ │ │ ├── test_layers.py │ │ │ └── test_random.py │ └── training │ │ ├── __init__.py │ │ ├── arguments.py │ │ ├── checkpointing.py │ │ ├── dist_signal_handler.py │ │ ├── global_vars.py │ │ ├── initialize.py │ │ ├── log_handler.py │ │ ├── microbatches.py │ │ ├── optimizer_param_scheduler.py │ │ ├── theoretical_memory_usage.py │ │ ├── tokenizer │ │ ├── __init__.py │ │ ├── bert_tokenization.py │ │ ├── gpt2_tokenization.py │ │ └── tokenizer.py │ │ ├── training.py │ │ ├── utils.py │ │ └── yaml_arguments.py ├── pretrain_gpt.py ├── pyproject.toml ├── setup.py └── tokenizer │ ├── special_tokens_map.json │ ├── tokenizer.json │ └── tokenizer_config.json └── reparameterize.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/README.md -------------------------------------------------------------------------------- /modeling_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/modeling_dense.py -------------------------------------------------------------------------------- /modeling_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/modeling_moe.py -------------------------------------------------------------------------------- /modeling_mole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/modeling_mole.py -------------------------------------------------------------------------------- /modeling_mole_rep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/modeling_mole_rep.py -------------------------------------------------------------------------------- /pretrain/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/README.md -------------------------------------------------------------------------------- /pretrain/megatron/core/QuickStart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/QuickStart.md -------------------------------------------------------------------------------- /pretrain/megatron/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/README.md -------------------------------------------------------------------------------- /pretrain/megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/__pycache__/package_info.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/__pycache__/package_info.cpython-311.pyc -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/Makefile -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/bert_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/blended_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/blended_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/blended_megatron_dataset_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/blended_megatron_dataset_builder.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/blended_megatron_dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/blended_megatron_dataset_config.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/gpt_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/helpers.cpp -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/indexed_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/masked_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/masked_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/megatron_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/megatron_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/megatron_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/megatron_tokenizer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/multimodal_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/readme.md -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/config/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/config/bert_embedders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/config/bert_embedders.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/config/config.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/config/gpt_chunk_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/config/gpt_chunk_datasets.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/config/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/config/tokenizers.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/db/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/db/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/db/build.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/db/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/db/dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/db/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/db/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/external_libs.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/build.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/factory.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/index.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/indexes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/indexes/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/indexes/faiss_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/indexes/faiss_base.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/indexes/faiss_par_add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/indexes/faiss_par_add.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/index/validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/index/validate.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/query/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/query/gpt_chunk_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/query/gpt_chunk_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/query/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/query/query.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/query/retro_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/query/retro_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/query/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/query/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/retro/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/t5_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/core/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/datasets/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/core.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/dict_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/mapping.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/optimizer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/serialization.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/base.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/filesystem_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/filesystem_async.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/state_dict_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/state_dict_saver.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/tensorstore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/tensorstore.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/torch.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/two_stage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/two_stage.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/strategies/zarr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/strategies/zarr.py -------------------------------------------------------------------------------- /pretrain/megatron/core/dist_checkpointing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/dist_checkpointing/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/distributed/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/distributed/distributed_data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/distributed/distributed_data_parallel.py -------------------------------------------------------------------------------- /pretrain/megatron/core/distributed/finalize_model_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/distributed/finalize_model_grads.py -------------------------------------------------------------------------------- /pretrain/megatron/core/distributed/param_and_grad_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/distributed/param_and_grad_buffer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/enums.py -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/fused_bias_dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/fusions/fused_bias_dropout.py -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/fused_bias_geglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/fusions/fused_bias_geglu.py -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/fusions/fused_bias_gelu.py -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/fused_bias_swiglu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/fusions/fused_bias_swiglu.py -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/fusions/fused_layer_norm.py -------------------------------------------------------------------------------- /pretrain/megatron/core/fusions/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/fusions/fused_softmax.py -------------------------------------------------------------------------------- /pretrain/megatron/core/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/inference/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/inference/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/inference/gpt/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/inference/gpt/model_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/inference/gpt/model_specs.py -------------------------------------------------------------------------------- /pretrain/megatron/core/inference/gpt/state_dict_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/inference/gpt/state_dict_hooks.py -------------------------------------------------------------------------------- /pretrain/megatron/core/inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/inference_params.py -------------------------------------------------------------------------------- /pretrain/megatron/core/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/jit.py -------------------------------------------------------------------------------- /pretrain/megatron/core/model_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/model_parallel_config.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/T5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/T5/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/T5/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/T5/t5_model.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/T5/t5_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/T5/t5_spec.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/bert/bert_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/bert/bert_layer_specs.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/bert/bert_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/bert/bert_lm_head.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/bert/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/bert/bert_model.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/bert/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/bert/pooler.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/embeddings/language_model_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/common/embeddings/language_model_embedding.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/embeddings/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/common/embeddings/rotary_pos_embedding.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/language_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/language_module/language_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/common/language_module/language_module.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/vision_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/common/vision_module/vision_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/common/vision_module/vision_module.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/gpt/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/gpt/gpt_layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/gpt/gpt_layer_specs.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/gpt/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/gpt/gpt_model.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/multimodal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/multimodal/llava_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/multimodal/llava_model.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/base_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/base_attention.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/config.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/decoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/decoder_attention.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/decoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/decoder_spec.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/encoder_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/encoder_attention.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/encoder_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/encoder_spec.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/model.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/retro/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/models/vision/clip_vit_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/vision/clip_vit_model.py -------------------------------------------------------------------------------- /pretrain/megatron/core/models/vision/multimodal_projector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/models/vision/multimodal_projector.py -------------------------------------------------------------------------------- /pretrain/megatron/core/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/optimizer/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/optimizer/clip_grads.py -------------------------------------------------------------------------------- /pretrain/megatron/core/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /pretrain/megatron/core/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/optimizer/optimizer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/optimizer/optimizer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/optimizer/optimizer_config.py -------------------------------------------------------------------------------- /pretrain/megatron/core/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/package_info.py -------------------------------------------------------------------------------- /pretrain/megatron/core/packed_seq_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/packed_seq_params.py -------------------------------------------------------------------------------- /pretrain/megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /pretrain/megatron/core/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/pipeline_parallel/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/pipeline_parallel/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/pipeline_parallel/p2p_communication.py -------------------------------------------------------------------------------- /pretrain/megatron/core/pipeline_parallel/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/pipeline_parallel/schedules.py -------------------------------------------------------------------------------- /pretrain/megatron/core/requirements.txt: -------------------------------------------------------------------------------- 1 | torch -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /pretrain/megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/timers.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/attention.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/custom_layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/custom_layers/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/custom_layers/transformer_engine.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/dot_product_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/dot_product_attention.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/enums.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/identity_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/identity_op.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/mlp.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/module.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/README.md -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/experts.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/grouped_gemm_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/grouped_gemm_util.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/moe_layer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/moe_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/moe_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/router.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/moe/token_dispatcher.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/spec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/spec_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/transformer_block.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/transformer_config.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/transformer_layer.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/core/transformer/vanillamlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/transformer/vanillamlp.py -------------------------------------------------------------------------------- /pretrain/megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/core/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/arguments.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/gpt/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/gpt/model_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/gpt/model_provider.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/static/index.html -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/api.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/beam_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/beam_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/communication.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/forward_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/forward_step.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/generation.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/sampling.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation/tokenization.py -------------------------------------------------------------------------------- /pretrain/megatron/inference/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/inference/text_generation_server.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/autoaugment.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/data_samplers.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/dataset_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/ict_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/image_folder.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/multimodal_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/multimodal_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/realm_index.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/data/vit_dataset.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/fused_kernels/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/fused_kernels/compat.h -------------------------------------------------------------------------------- /pretrain/megatron/legacy/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/legacy/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /pretrain/megatron/legacy/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/indexer.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/bert_model.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/biencoder_model.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/classification.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/enums.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/fused_layer_norm.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/fused_softmax.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/gpt_model.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/language_model.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/module.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/multiple_choice.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/realm_model.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/rms_norm.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/t5_model.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/transformer.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/classification.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/dino.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/esvit_swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/esvit_swin_backbone.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/inpainting.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/knn_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/knn_monitor.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/mit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/mit_backbone.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/swin_backbone.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/model/vision/vit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/model/vision/vit_backbone.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/mpu/tests/commons.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/mpu/tests/test_data.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /pretrain/megatron/legacy/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/legacy/mpu/tests/test_random.py -------------------------------------------------------------------------------- /pretrain/megatron/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/training/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/arguments.py -------------------------------------------------------------------------------- /pretrain/megatron/training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/checkpointing.py -------------------------------------------------------------------------------- /pretrain/megatron/training/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/dist_signal_handler.py -------------------------------------------------------------------------------- /pretrain/megatron/training/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/global_vars.py -------------------------------------------------------------------------------- /pretrain/megatron/training/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/initialize.py -------------------------------------------------------------------------------- /pretrain/megatron/training/log_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/log_handler.py -------------------------------------------------------------------------------- /pretrain/megatron/training/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/microbatches.py -------------------------------------------------------------------------------- /pretrain/megatron/training/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /pretrain/megatron/training/theoretical_memory_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/theoretical_memory_usage.py -------------------------------------------------------------------------------- /pretrain/megatron/training/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/tokenizer/__init__.py -------------------------------------------------------------------------------- /pretrain/megatron/training/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /pretrain/megatron/training/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /pretrain/megatron/training/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /pretrain/megatron/training/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/training.py -------------------------------------------------------------------------------- /pretrain/megatron/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/utils.py -------------------------------------------------------------------------------- /pretrain/megatron/training/yaml_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/megatron/training/yaml_arguments.py -------------------------------------------------------------------------------- /pretrain/pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/pretrain_gpt.py -------------------------------------------------------------------------------- /pretrain/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/pyproject.toml -------------------------------------------------------------------------------- /pretrain/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/setup.py -------------------------------------------------------------------------------- /pretrain/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /pretrain/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /pretrain/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/pretrain/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /reparameterize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JieShibo/MoLE/HEAD/reparameterize.py --------------------------------------------------------------------------------