├── CODEOWNERS ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── dataset ├── README.md ├── download_books.sh ├── download_ckpt.sh └── download_vocab.sh ├── docs ├── distrib_optimizer.md └── images │ └── distrib_optimizer │ ├── data_flow.png │ └── sharding_scheme.png ├── examples ├── README.md ├── detxoify_lm │ ├── README.md │ ├── annotations │ │ ├── filter-selfgeneration.py │ │ ├── perspective_api_annotate.py │ │ └── preprocess.sh │ ├── finetune_gpt.py │ ├── finetune_gpt_distributed-1.3b.sh │ ├── generate-1.3b.sh │ ├── generate_samples_gpt.py │ ├── perspective_api.py │ └── self_generation │ │ └── selfgenerate-1.3b-unconditional.sh ├── evaluate_retriever_nq.sh ├── evaluate_zeroshot_gpt.sh ├── finetune_mnli_distributed.sh ├── finetune_race_distributed.sh ├── finetune_retriever_distributed.sh ├── merge_mp_bert.sh ├── msdp │ ├── README.md │ ├── data_processing.sh │ ├── eval_knwl_generation.sh │ ├── eval_resp_generation.sh │ ├── prep_resp_gen.sh │ ├── prompt_knwl_gen.sh │ └── prompt_resp_gen.sh ├── pretrain_bert.sh ├── pretrain_bert_distributed.sh ├── pretrain_bert_distributed_with_mp.sh ├── pretrain_gpt.sh ├── pretrain_gpt3_175B.sh ├── pretrain_gpt_distributed.sh ├── pretrain_gpt_distributed_with_mp.sh ├── pretrain_ict.sh ├── pretrain_t5.sh ├── pretrain_t5_distributed.sh ├── pretrain_t5_distributed_with_mp.sh ├── run_text_generation_server_345M.sh ├── run_text_generation_server_345M_8_tensor_parallel.sh └── sc21 │ ├── CONFIG.sh │ ├── README.md │ ├── SBATCH.sh │ ├── SRUN.sh │ ├── run_figure_11.sh │ ├── run_figure_12.sh │ ├── run_figure_13.sh │ ├── run_figure_14.sh │ ├── run_figure_15.sh │ ├── run_figure_16.sh │ ├── run_figure_17.sh │ ├── run_figure_18.sh │ └── run_table_1.sh ├── examples_deepspeed ├── MoE │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_config_gpt_Zero2_TEMPLATE.json │ ├── ds_evalharness.sh │ ├── ds_pretrain_gpt_1.3B_MoE128.sh │ ├── ds_pretrain_gpt_1.3B_PR-MoE64or128.sh │ ├── ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh │ ├── ds_pretrain_gpt_1.3B_dense.sh │ ├── ds_pretrain_gpt_1.3B_dense_cl.sh │ ├── ds_pretrain_gpt_125M_MoE64.sh │ ├── ds_pretrain_gpt_125M_dense_cl.sh │ ├── ds_pretrain_gpt_350M_MoE128.sh │ ├── ds_pretrain_gpt_350M_PR-MoE32or64.sh │ ├── ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh │ ├── ds_pretrain_gpt_350M_dense.sh │ ├── ds_pretrain_gpt_6.7B_dense.sh │ └── readme_evalharness.md ├── README.md ├── azure │ ├── README.md │ ├── run-175b.sh │ ├── run-1t.sh │ └── run-benchmark-model.sh ├── azureml │ ├── Dockerfile.dockerfile │ ├── README.md │ ├── aml_submit.py │ └── prepare_dataset.py ├── bert_with_pile │ ├── README.md │ ├── ds_config_bert_TEMPLATE.json │ ├── ds_finetune_bert_mnli.sh │ ├── ds_finetune_bert_qqp.sh │ ├── ds_finetune_bert_race.sh │ ├── ds_pretrain_bert.sh │ └── prepare_pile_data.py ├── compression │ ├── 125M-Int8-test-64gpu-distilled-group48.sh │ ├── 125M-L10-Int8-test-64gpu-distilled-group48.sh │ ├── 125M-L12-Int8-test-64gpu-distilled-group48.sh │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_config_gpt_TEMPLATE_compression.json │ ├── ds_evalharness.sh │ ├── ds_pretrain_gpt_1.3B_dense_cl_kd.sh │ ├── ds_pretrain_gpt_125M_dense_cl_kd.sh │ ├── ds_pretrain_gpt_125M_dense_kd.sh │ └── ds_pretrain_gpt_350M_dense_kd.sh ├── curriculum_learning │ ├── README.md │ ├── ds_config_gpt_slw_TEMPLATE.json │ ├── ds_pretrain_gpt2.sh │ ├── ds_pretrain_gpt_1.3B_rope_slw.sh │ ├── ds_train.sh │ ├── ds_zero_stage_1_config_baseline.json │ └── ds_zero_stage_1_config_curriculum_fixed_linear.json ├── data_efficiency │ ├── README.md │ ├── analyze_data.py │ ├── bert │ │ ├── ds_analyze_bert_data_map.sh │ │ ├── ds_analyze_bert_data_reduce.sh │ │ ├── finetune │ │ │ ├── ds_config_bert_TEMPLATE.json │ │ │ ├── ds_finetune_bert_mnli.sh │ │ │ ├── ds_finetune_bert_qqp.sh │ │ │ ├── ds_finetune_bert_race.sh │ │ │ └── ds_finetune_gather_result.py │ │ ├── finetune_glue │ │ │ ├── ds_config_bert_TEMPLATE.json │ │ │ ├── ds_finetune_bert_glue.sh │ │ │ ├── ds_finetune_bert_glue_run.sh │ │ │ └── ds_finetune_gather_result.py │ │ ├── pile_data_download_preprocess.py │ │ └── pretrain │ │ │ ├── ds_config_bert_1clmetric_TEMPLATE.json │ │ │ ├── ds_config_bert_2clmetrics_TEMPLATE.json │ │ │ ├── ds_pretrain_bert_336M_base_script.sh │ │ │ └── ds_pretrain_bert_336M_run.sh │ └── gpt │ │ ├── ds_analyze_gpt_data_map.sh │ │ ├── ds_analyze_gpt_data_reduce.sh │ │ ├── eval │ │ ├── ds_config_eval_dummy.json │ │ ├── ds_evalharness_1gpu.sh │ │ ├── ds_evalharness_gather_result.py │ │ ├── ds_evalharness_parallel_run.sh │ │ └── ds_evalharness_parallel_run_10shot.sh │ │ └── pretrain │ │ ├── ds_config_gpt_1clmetric_TEMPLATE.json │ │ ├── ds_config_gpt_2clmetrics_TEMPLATE.json │ │ ├── ds_pretrain_gpt_1.3B_dense_base_script.sh │ │ └── ds_pretrain_gpt_1.3B_dense_run.sh ├── deepspeed4science │ └── megatron_long_seq_support │ │ ├── README.md │ │ ├── ds_config_gpt_TEMPLATE.json │ │ ├── host_file │ │ ├── pretrain_gpt_1.3B_seq_parallel.sh │ │ └── pretrain_gpt_30B_seq_parallel.sh ├── finetune_hf_llama │ ├── README.md │ ├── ds_config.json │ └── finetune_llama.sh ├── generate_text.sh ├── offload_pp │ ├── README.md │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_pretrain_gpt_350M.sh │ └── twin-offload.png ├── pretrain_llama2_distributed.sh ├── pretrain_llama_distributed.sh ├── rebase │ ├── README.md │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_config_gpt_slw_TEMPLATE.json │ ├── ds_pretrain_gpt_1.3B.sh │ ├── ds_pretrain_gpt_1.3B_megatron_checkpointing.sh │ ├── ds_pretrain_gpt_1.3B_rope.sh │ ├── ds_pretrain_gpt_1.3B_rope_slw.sh │ ├── ds_pretrain_gpt_125M.sh │ ├── ds_pretrain_gpt_125M_flashattn.sh │ └── ds_pretrain_gpt_13B.sh ├── run_deepspeed_example.sh ├── sequence_parallel │ ├── README.md │ ├── ds_config_gpt_TEMPLATE.json │ ├── ds_pretrain_gpt_1.3B_seq_parallel_32k.sh │ └── ds_pretrain_gpt_30B_seq_parallel_32k.sh └── universal_checkpointing │ ├── README.md │ ├── assets │ └── image │ │ ├── uc_char_training_loss.png │ │ └── uc_char_validation_loss.png │ ├── ds_config.json │ ├── run_bf16.sh │ ├── run_fp16.sh │ ├── run_tb_analysis.sh │ ├── run_universal_bf16.sh │ ├── run_universal_fp16.sh │ └── tb_analysis │ ├── abstract_analysis.py │ ├── arguments.py │ ├── tb_analysis_script.py │ ├── uc_analysis.py │ └── utils.py ├── experimental ├── __pycache__ │ ├── fp4.cpython-310.pyc │ ├── fp8_linear.cpython-310.pyc │ └── layers.cpython-310.pyc ├── fp4.py └── layers.py ├── finetune_llama.py ├── images ├── Achieved_petaFLOPs.png └── cases_april2021.png ├── megatron ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── arguments.cpython-310.pyc │ ├── checkpointing.cpython-310.pyc │ ├── dist_signal_handler.cpython-310.pyc │ ├── global_vars.cpython-310.pyc │ ├── initialize.cpython-310.pyc │ ├── memory.cpython-310.pyc │ ├── microbatches.cpython-310.pyc │ ├── optimizer_param_scheduler.cpython-310.pyc │ ├── profiler.cpython-310.pyc │ ├── timers.cpython-310.pyc │ ├── training.cpython-310.pyc │ └── utils.cpython-310.pyc ├── arguments.py ├── checkpointing.py ├── core │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── enums.cpython-310.pyc │ │ ├── model_parallel_config.cpython-310.pyc │ │ ├── parallel_state.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── enums.py │ ├── fusions │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── fused_softmax.cpython-310.pyc │ │ ├── fused_bias_dropout.py │ │ ├── fused_bias_gelu.py │ │ ├── fused_layer_norm.py │ │ └── fused_softmax.py │ ├── model_parallel_config.py │ ├── models │ │ ├── __init__.py │ │ └── gpt │ │ │ ├── __init__.py │ │ │ ├── gpt_embedding.py │ │ │ └── gpt_model.py │ ├── package_info.py │ ├── parallel_state.py │ ├── pipeline_parallel │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── p2p_communication.cpython-310.pyc │ │ │ └── schedules.cpython-310.pyc │ │ ├── p2p_communication.py │ │ └── schedules.py │ ├── requirements.txt │ ├── sequence_parallel │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── cross_entropy.cpython-310.pyc │ │ └── cross_entropy.py │ ├── tensor_parallel │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── cross_entropy.cpython-310.pyc │ │ │ ├── data.cpython-310.pyc │ │ │ ├── layers.cpython-310.pyc │ │ │ ├── mappings.cpython-310.pyc │ │ │ ├── random.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ ├── cross_entropy.py │ │ ├── data.py │ │ ├── layers.py │ │ ├── mappings.py │ │ ├── random.py │ │ └── utils.py │ ├── transformer │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── core_attention.cpython-310.pyc │ │ │ ├── enums.cpython-310.pyc │ │ │ ├── module.cpython-310.pyc │ │ │ ├── transformer_config.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ ├── attention.py │ │ ├── core_attention.py │ │ ├── custom_layers │ │ │ └── transformer_engine.py │ │ ├── enums.py │ │ ├── mlp.py │ │ ├── module.py │ │ ├── transformer_block.py │ │ ├── transformer_config.py │ │ ├── transformer_layer.py │ │ └── utils.py │ └── utils.py ├── data │ ├── Makefile │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── autoaugment.cpython-310.pyc │ │ ├── blendable_dataset.cpython-310.pyc │ │ ├── data_samplers.cpython-310.pyc │ │ ├── dataset_utils.cpython-310.pyc │ │ ├── gpt_dataset.cpython-310.pyc │ │ ├── image_folder.cpython-310.pyc │ │ ├── indexed_dataset.cpython-310.pyc │ │ └── vit_dataset.cpython-310.pyc │ ├── autoaugment.py │ ├── bert_dataset.py │ ├── biencoder_dataset_utils.py │ ├── blendable_dataset.py │ ├── data_samplers.py │ ├── dataset_utils.py │ ├── gpt_dataset.py │ ├── helpers.cpp │ ├── helpers.cpython-310-x86_64-linux-gnu.so │ ├── ict_dataset.py │ ├── image_folder.py │ ├── indexed_dataset.py │ ├── orqa_wiki_dataset.py │ ├── prompt_dataset.py │ ├── realm_dataset_utils.py │ ├── realm_index.py │ ├── t5_dataset.py │ ├── test │ │ ├── test_indexed_dataset.py │ │ └── test_preprocess_data.sh │ └── vit_dataset.py ├── dist_signal_handler.py ├── enums.py ├── fp16_deprecated │ └── loss_scaler.py ├── fused_kernels │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── compat.h │ ├── scaled_masked_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_softmax.cpp │ ├── scaled_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.cpp │ ├── scaled_upper_triang_masked_softmax.h │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ ├── tests │ │ ├── __init__.py │ │ └── test_fused_kernels.py │ └── type_shim.h ├── global_vars.py ├── indexer.py ├── initialize.py ├── memory.py ├── microbatches.py ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── bert_model.cpython-310.pyc │ │ ├── distributed.cpython-310.pyc │ │ ├── enums.cpython-310.pyc │ │ ├── fused_bias_gelu.cpython-310.pyc │ │ ├── fused_layer_norm.cpython-310.pyc │ │ ├── fused_softmax.cpython-310.pyc │ │ ├── gpt_model.cpython-310.pyc │ │ ├── language_model.cpython-310.pyc │ │ ├── module.cpython-310.pyc │ │ ├── rmsnorm.cpython-310.pyc │ │ ├── rotary_pos_embedding.cpython-310.pyc │ │ ├── t5_model.cpython-310.pyc │ │ ├── transformer.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── bert_model.py │ ├── biencoder_model.py │ ├── classification.py │ ├── distributed.py │ ├── enums.py │ ├── fused_bias_gelu.py │ ├── fused_layer_norm.py │ ├── fused_softmax.py │ ├── gpt_model.py │ ├── language_model.py │ ├── module.py │ ├── multiple_choice.py │ ├── realm_model.py │ ├── rmsnorm.py │ ├── rmsnorm_apex.py │ ├── rotary_pos_embedding.py │ ├── t5_model.py │ ├── transformer.py │ ├── utils.py │ └── vision │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── knn_monitor.cpython-310.pyc │ │ ├── classification.py │ │ ├── dino.py │ │ ├── esvit_swin_backbone.py │ │ ├── inpainting.py │ │ ├── knn_monitor.py │ │ ├── mit_backbone.py │ │ ├── swin_backbone.py │ │ ├── utils.py │ │ └── vit_backbone.py ├── mpu │ └── tests │ │ ├── __init__.py │ │ ├── commons.py │ │ ├── test_cross_entropy.py │ │ ├── test_data.py │ │ ├── test_initialize.py │ │ ├── test_layers.py │ │ └── test_random.py ├── optimizer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── clip_grads.cpython-310.pyc │ │ ├── distrib_optimizer.cpython-310.pyc │ │ ├── grad_scaler.cpython-310.pyc │ │ └── optimizer.cpython-310.pyc │ ├── clip_grads.py │ ├── distrib_optimizer.py │ ├── grad_scaler.py │ └── optimizer.py ├── optimizer_param_scheduler.py ├── p2p_communication.py ├── profiler.py ├── static │ └── index.html ├── text_generation │ ├── __init__.py │ ├── api.py │ ├── beam_utils.py │ ├── communication.py │ ├── forward_step.py │ ├── generation.py │ ├── sampling.py │ └── tokenization.py ├── text_generation_server.py ├── text_generation_utils.py ├── timers.py ├── tokenizer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── bert_tokenization.cpython-310.pyc │ │ ├── gpt2_tokenization.cpython-310.pyc │ │ └── tokenizer.cpython-310.pyc │ ├── bert_tokenization.py │ ├── gpt2_tokenization.py │ └── tokenizer.py ├── training.py └── utils.py ├── pretrain_bert.py ├── pretrain_gpt.py ├── pretrain_gpt_core.py ├── pretrain_ict.py ├── pretrain_retro.py ├── pretrain_t5.py ├── pretrain_vision_classify.py ├── pretrain_vision_dino.py ├── pretrain_vision_inpaint.py ├── scripts ├── convert_ds_to_universal.sh ├── hostsfile ├── run_llama.sh ├── run_llama2_7b_fp4.sh └── run_mixtral.sh ├── setup.py ├── tasks ├── ckp_utils.py ├── data_utils.py ├── ensemble_classifier.py ├── eval_harness │ ├── README.md │ ├── download.py │ ├── evaluate.py │ └── report-to-csv.py ├── eval_utils.py ├── finetune_utils.py ├── glue │ ├── cola.py │ ├── data.py │ ├── finetune.py │ ├── mnli.py │ ├── mrpc.py │ ├── qnli.py │ ├── qqp.py │ ├── rte.py │ ├── sst2.py │ └── stsb.py ├── main.py ├── main_3d.py ├── msdp │ ├── README.md │ ├── evaluate.py │ ├── main.py │ ├── metrics.py │ ├── preprocessing.py │ └── prompt.py ├── orqa │ ├── README.md │ ├── evaluate_orqa.py │ ├── evaluate_utils.py │ ├── supervised │ │ ├── data.py │ │ ├── eval_utils.py │ │ └── finetune.py │ └── unsupervised │ │ ├── nq.py │ │ ├── qa_utils.py │ │ └── tokenizers.py ├── race │ ├── data.py │ └── finetune.py ├── vision │ ├── classification │ │ ├── classification.py │ │ └── eval_utils.py │ ├── finetune_utils.py │ ├── main.py │ └── segmentation │ │ ├── cityscapes.py │ │ ├── data.py │ │ ├── finetune_segformer.py │ │ ├── finetune_setr.py │ │ ├── metrics.py │ │ ├── seg_heads.py │ │ ├── seg_models.py │ │ ├── transforms.py │ │ └── utils.py └── zeroshot_gpt │ ├── datasets.py │ ├── detokenizer.py │ └── evaluate.py ├── tests ├── __init__.py ├── conftest.py ├── functional_tests │ ├── __init__.py │ ├── python_test_utils │ │ ├── __init__.py │ │ ├── check_slurm_job_completion.py │ │ ├── get_test_results_from_tensorboard_logs.py │ │ ├── test_ci_pipeline.py │ │ └── test_resume_checkpoint_pipeline.py │ ├── shell_test_utils │ │ └── jobwait.sh │ ├── test_results │ │ ├── bert │ │ │ ├── bert_tp1_pp2_1nodes_50steps.json │ │ │ ├── bert_tp1_pp4_1nodes_50steps.json │ │ │ ├── bert_tp2_pp2_1nodes_50steps.json │ │ │ └── bert_tp4_pp1_1nodes_50steps.json │ │ └── gpt3 │ │ │ ├── gpt3_tp1_pp2_1nodes_50steps.json │ │ │ ├── gpt3_tp1_pp4_1nodes_50steps.json │ │ │ ├── gpt3_tp2_pp2_1nodes_50steps.json │ │ │ └── gpt3_tp4_pp1_1nodes_50steps.json │ └── test_scripts │ │ ├── bert │ │ ├── pretrain_bert_distributed_resume_checkpoint_test.sh │ │ ├── pretrain_bert_distributed_test.sh │ │ ├── sbatch_bert_distributed_resume_checkpoint_test.sh │ │ └── sbatch_bert_distributed_test.sh │ │ └── gpt3 │ │ ├── pretrain_gpt3_distributed_resume_checkpoint_test.sh │ │ ├── pretrain_gpt3_distributed_test.sh │ │ ├── sbatch_gpt3_distributed_resume_checkpoint_test.sh │ │ └── sbatch_gpt3_distributed_test.sh ├── models │ ├── __init__.py │ ├── test_gpt_embedding.py │ └── test_gpt_model.py ├── old_tests │ ├── ds_config_bf16.json │ ├── test_checkpoints.py │ ├── test_training.py │ └── testing_utils.py ├── pipeline_parallel │ ├── __init__.py │ └── test_schedules.py ├── run_megatron.py ├── tensor_parallel │ └── __int__.py ├── test_megatron.py ├── transformer │ ├── __init__.py │ ├── test_core_attention.py │ ├── test_module.py │ ├── test_parallel_attention.py │ ├── test_parallel_mlp.py │ ├── test_parallel_transformer_block.py │ ├── test_parallel_transformer_layer.py │ └── test_transformer_config.py └── unit_tests │ ├── __init__.py │ ├── tensor_parallel │ ├── test_cross_entropy.py │ ├── test_data.py │ ├── test_mappings.py │ ├── test_random.py │ └── test_tensor_parallel_utils.py │ ├── test_basic.py │ ├── test_parallel_state.py │ ├── test_utilities.py │ └── test_utils.py └── tools ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc └── verify_checkpoint_non_tp_consistency.cpython-310.pyc ├── bert_embedding ├── __init__.py ├── dataset.py ├── embed.py ├── external_libs.py ├── huggingface.py └── utils.py ├── checkpoint_loader_megatron.py ├── checkpoint_saver_megatron.py ├── checkpoint_util.py ├── convert_checkpoint ├── README.md ├── deepspeed_checkpoint.py ├── deepspeed_to_megatron.py ├── deepspeed_to_transformers.py ├── inspect_checkpoint.py ├── inspect_deepspeed_checkpoint.py ├── json │ ├── mds_to_hf_llama_13b.json │ ├── mds_to_hf_llama_70b.json │ ├── mds_to_hf_llama_7b.json │ └── mds_to_hf_llama_7b_full_names.json └── mds_universal_to_huggingface.py ├── generate_samples_gpt.py ├── hf2megads_weight_converter.py ├── linter.py ├── merge_datasets.py ├── openwebtext ├── README.md ├── add_id.py ├── blacklist_urls.py ├── cleanup_dataset.py ├── cleanup_fix_dataset.py ├── filter_ngrams.py ├── find_duplicates.py ├── group_duplicate_url.py ├── merge_jsons.py └── remove_group_duplicates.py ├── preprocess_data.py ├── preprocess_data_nmt.py ├── retro ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── utils.cpython-310.pyc ├── cli │ ├── __init__.py │ ├── __main__.py │ └── cli.py ├── db │ ├── __init__.py │ ├── build.py │ ├── dataset.py │ └── utils.py ├── examples │ ├── get_dataset_configs.sh │ ├── get_preprocess_cmd.sh │ ├── preprocess_data.sh │ └── pretrain_model.sh ├── external_libs.py ├── index │ ├── __init__.py │ ├── build.py │ ├── factory.py │ ├── index.py │ ├── indexes │ │ ├── __init__.py │ │ ├── faiss_base.py │ │ └── faiss_par_add.py │ └── utils.py ├── main.py ├── query │ ├── __init__.py │ ├── chunk_dataset.py │ ├── query.py │ ├── retro_dataset.py │ └── utils.py └── utils.py ├── run_text_generation_server.py ├── text_generation_cli.py └── verify_checkpoint_non_tp_consistency.py /CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/CODEOWNERS -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/SECURITY.md -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/dataset/README.md -------------------------------------------------------------------------------- /dataset/download_books.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/dataset/download_books.sh -------------------------------------------------------------------------------- /dataset/download_ckpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/dataset/download_ckpt.sh -------------------------------------------------------------------------------- /dataset/download_vocab.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/dataset/download_vocab.sh -------------------------------------------------------------------------------- /docs/distrib_optimizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/docs/distrib_optimizer.md -------------------------------------------------------------------------------- /docs/images/distrib_optimizer/data_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/docs/images/distrib_optimizer/data_flow.png -------------------------------------------------------------------------------- /docs/images/distrib_optimizer/sharding_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/docs/images/distrib_optimizer/sharding_scheme.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/detxoify_lm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/README.md -------------------------------------------------------------------------------- /examples/detxoify_lm/annotations/filter-selfgeneration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/annotations/filter-selfgeneration.py -------------------------------------------------------------------------------- /examples/detxoify_lm/annotations/perspective_api_annotate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/annotations/perspective_api_annotate.py -------------------------------------------------------------------------------- /examples/detxoify_lm/annotations/preprocess.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/annotations/preprocess.sh -------------------------------------------------------------------------------- /examples/detxoify_lm/finetune_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/finetune_gpt.py -------------------------------------------------------------------------------- /examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh -------------------------------------------------------------------------------- /examples/detxoify_lm/generate-1.3b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/generate-1.3b.sh -------------------------------------------------------------------------------- /examples/detxoify_lm/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/generate_samples_gpt.py -------------------------------------------------------------------------------- /examples/detxoify_lm/perspective_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/perspective_api.py -------------------------------------------------------------------------------- /examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh -------------------------------------------------------------------------------- /examples/evaluate_retriever_nq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/evaluate_retriever_nq.sh -------------------------------------------------------------------------------- /examples/evaluate_zeroshot_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/evaluate_zeroshot_gpt.sh -------------------------------------------------------------------------------- /examples/finetune_mnli_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/finetune_mnli_distributed.sh -------------------------------------------------------------------------------- /examples/finetune_race_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/finetune_race_distributed.sh -------------------------------------------------------------------------------- /examples/finetune_retriever_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/finetune_retriever_distributed.sh -------------------------------------------------------------------------------- /examples/merge_mp_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/merge_mp_bert.sh -------------------------------------------------------------------------------- /examples/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/README.md -------------------------------------------------------------------------------- /examples/msdp/data_processing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/data_processing.sh -------------------------------------------------------------------------------- /examples/msdp/eval_knwl_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/eval_knwl_generation.sh -------------------------------------------------------------------------------- /examples/msdp/eval_resp_generation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/eval_resp_generation.sh -------------------------------------------------------------------------------- /examples/msdp/prep_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/prep_resp_gen.sh -------------------------------------------------------------------------------- /examples/msdp/prompt_knwl_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/prompt_knwl_gen.sh -------------------------------------------------------------------------------- /examples/msdp/prompt_resp_gen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/msdp/prompt_resp_gen.sh -------------------------------------------------------------------------------- /examples/pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_bert.sh -------------------------------------------------------------------------------- /examples/pretrain_bert_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_bert_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_bert_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_bert_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_gpt.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt3_175B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_gpt3_175B.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_gpt_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_gpt_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_gpt_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/pretrain_ict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_ict.sh -------------------------------------------------------------------------------- /examples/pretrain_t5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_t5.sh -------------------------------------------------------------------------------- /examples/pretrain_t5_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_t5_distributed.sh -------------------------------------------------------------------------------- /examples/pretrain_t5_distributed_with_mp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/pretrain_t5_distributed_with_mp.sh -------------------------------------------------------------------------------- /examples/run_text_generation_server_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/run_text_generation_server_345M.sh -------------------------------------------------------------------------------- /examples/run_text_generation_server_345M_8_tensor_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/run_text_generation_server_345M_8_tensor_parallel.sh -------------------------------------------------------------------------------- /examples/sc21/CONFIG.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/CONFIG.sh -------------------------------------------------------------------------------- /examples/sc21/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/README.md -------------------------------------------------------------------------------- /examples/sc21/SBATCH.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/SBATCH.sh -------------------------------------------------------------------------------- /examples/sc21/SRUN.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/SRUN.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_11.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_11.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_12.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_12.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_13.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_13.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_14.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_14.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_15.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_15.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_16.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_17.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_17.sh -------------------------------------------------------------------------------- /examples/sc21/run_figure_18.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_figure_18.sh -------------------------------------------------------------------------------- /examples/sc21/run_table_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples/sc21/run_table_1.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_evalharness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_evalharness.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh -------------------------------------------------------------------------------- /examples_deepspeed/MoE/readme_evalharness.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/MoE/readme_evalharness.md -------------------------------------------------------------------------------- /examples_deepspeed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/README.md -------------------------------------------------------------------------------- /examples_deepspeed/azure/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azure/README.md -------------------------------------------------------------------------------- /examples_deepspeed/azure/run-175b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azure/run-175b.sh -------------------------------------------------------------------------------- /examples_deepspeed/azure/run-1t.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azure/run-1t.sh -------------------------------------------------------------------------------- /examples_deepspeed/azure/run-benchmark-model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azure/run-benchmark-model.sh -------------------------------------------------------------------------------- /examples_deepspeed/azureml/Dockerfile.dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azureml/Dockerfile.dockerfile -------------------------------------------------------------------------------- /examples_deepspeed/azureml/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azureml/README.md -------------------------------------------------------------------------------- /examples_deepspeed/azureml/aml_submit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azureml/aml_submit.py -------------------------------------------------------------------------------- /examples_deepspeed/azureml/prepare_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/azureml/prepare_dataset.py -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/README.md -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh -------------------------------------------------------------------------------- /examples_deepspeed/bert_with_pile/prepare_pile_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/bert_with_pile/prepare_pile_data.py -------------------------------------------------------------------------------- /examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_evalharness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_evalharness.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh -------------------------------------------------------------------------------- /examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/README.md -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/ds_config_gpt_slw_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/ds_config_gpt_slw_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/ds_pretrain_gpt_1.3B_rope_slw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/ds_pretrain_gpt_1.3B_rope_slw.sh -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/ds_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/ds_train.sh -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json -------------------------------------------------------------------------------- /examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/README.md -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/analyze_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/analyze_data.py -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh -------------------------------------------------------------------------------- /examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh -------------------------------------------------------------------------------- /examples_deepspeed/deepspeed4science/megatron_long_seq_support/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/deepspeed4science/megatron_long_seq_support/README.md -------------------------------------------------------------------------------- /examples_deepspeed/deepspeed4science/megatron_long_seq_support/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/deepspeed4science/megatron_long_seq_support/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/deepspeed4science/megatron_long_seq_support/host_file: -------------------------------------------------------------------------------- 1 | worker-1 slots=4 2 | -------------------------------------------------------------------------------- /examples_deepspeed/deepspeed4science/megatron_long_seq_support/pretrain_gpt_1.3B_seq_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/deepspeed4science/megatron_long_seq_support/pretrain_gpt_1.3B_seq_parallel.sh -------------------------------------------------------------------------------- /examples_deepspeed/deepspeed4science/megatron_long_seq_support/pretrain_gpt_30B_seq_parallel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/deepspeed4science/megatron_long_seq_support/pretrain_gpt_30B_seq_parallel.sh -------------------------------------------------------------------------------- /examples_deepspeed/finetune_hf_llama/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/finetune_hf_llama/README.md -------------------------------------------------------------------------------- /examples_deepspeed/finetune_hf_llama/ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/finetune_hf_llama/ds_config.json -------------------------------------------------------------------------------- /examples_deepspeed/finetune_hf_llama/finetune_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/finetune_hf_llama/finetune_llama.sh -------------------------------------------------------------------------------- /examples_deepspeed/generate_text.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/generate_text.sh -------------------------------------------------------------------------------- /examples_deepspeed/offload_pp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/offload_pp/README.md -------------------------------------------------------------------------------- /examples_deepspeed/offload_pp/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/offload_pp/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/offload_pp/ds_pretrain_gpt_350M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/offload_pp/ds_pretrain_gpt_350M.sh -------------------------------------------------------------------------------- /examples_deepspeed/offload_pp/twin-offload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/offload_pp/twin-offload.png -------------------------------------------------------------------------------- /examples_deepspeed/pretrain_llama2_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/pretrain_llama2_distributed.sh -------------------------------------------------------------------------------- /examples_deepspeed/pretrain_llama_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/pretrain_llama_distributed.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/README.md -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_config_gpt_slw_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_config_gpt_slw_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope_slw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope_slw.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh -------------------------------------------------------------------------------- /examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh -------------------------------------------------------------------------------- /examples_deepspeed/run_deepspeed_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/run_deepspeed_example.sh -------------------------------------------------------------------------------- /examples_deepspeed/sequence_parallel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/sequence_parallel/README.md -------------------------------------------------------------------------------- /examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json -------------------------------------------------------------------------------- /examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh -------------------------------------------------------------------------------- /examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/README.md -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/assets/image/uc_char_training_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/assets/image/uc_char_training_loss.png -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/assets/image/uc_char_validation_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/assets/image/uc_char_validation_loss.png -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/ds_config.json -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/run_bf16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/run_bf16.sh -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/run_fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/run_fp16.sh -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/run_tb_analysis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/run_tb_analysis.sh -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/run_universal_bf16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/run_universal_bf16.sh -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/run_universal_fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/run_universal_fp16.sh -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/tb_analysis/abstract_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/tb_analysis/abstract_analysis.py -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/tb_analysis/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/tb_analysis/arguments.py -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/tb_analysis/tb_analysis_script.py -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/tb_analysis/uc_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/tb_analysis/uc_analysis.py -------------------------------------------------------------------------------- /examples_deepspeed/universal_checkpointing/tb_analysis/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/examples_deepspeed/universal_checkpointing/tb_analysis/utils.py -------------------------------------------------------------------------------- /experimental/__pycache__/fp4.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/experimental/__pycache__/fp4.cpython-310.pyc -------------------------------------------------------------------------------- /experimental/__pycache__/fp8_linear.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/experimental/__pycache__/fp8_linear.cpython-310.pyc -------------------------------------------------------------------------------- /experimental/__pycache__/layers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/experimental/__pycache__/layers.cpython-310.pyc -------------------------------------------------------------------------------- /experimental/fp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/experimental/fp4.py -------------------------------------------------------------------------------- /experimental/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/experimental/layers.py -------------------------------------------------------------------------------- /finetune_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/finetune_llama.py -------------------------------------------------------------------------------- /images/Achieved_petaFLOPs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/images/Achieved_petaFLOPs.png -------------------------------------------------------------------------------- /images/cases_april2021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/images/cases_april2021.png -------------------------------------------------------------------------------- /megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__init__.py -------------------------------------------------------------------------------- /megatron/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/arguments.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/arguments.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/checkpointing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/checkpointing.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/dist_signal_handler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/dist_signal_handler.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/global_vars.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/global_vars.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/initialize.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/initialize.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/memory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/memory.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/microbatches.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/microbatches.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/optimizer_param_scheduler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/optimizer_param_scheduler.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/profiler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/profiler.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/timers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/timers.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/training.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/training.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/arguments.py -------------------------------------------------------------------------------- /megatron/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/checkpointing.py -------------------------------------------------------------------------------- /megatron/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/README.md -------------------------------------------------------------------------------- /megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/__init__.py -------------------------------------------------------------------------------- /megatron/core/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/__pycache__/enums.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/__pycache__/enums.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/__pycache__/model_parallel_config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/__pycache__/model_parallel_config.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/__pycache__/parallel_state.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/__pycache__/parallel_state.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/enums.py -------------------------------------------------------------------------------- /megatron/core/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/fusions/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/fusions/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/fusions/__pycache__/fused_softmax.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/fusions/__pycache__/fused_softmax.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/fusions/fused_bias_dropout.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/fusions/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/fusions/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/core/fusions/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/fusions/fused_softmax.py -------------------------------------------------------------------------------- /megatron/core/model_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/model_parallel_config.py -------------------------------------------------------------------------------- /megatron/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/core/models/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/models/gpt/__init__.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/gpt_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/models/gpt/gpt_embedding.py -------------------------------------------------------------------------------- /megatron/core/models/gpt/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/models/gpt/gpt_model.py -------------------------------------------------------------------------------- /megatron/core/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/package_info.py -------------------------------------------------------------------------------- /megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/pipeline_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/pipeline_parallel/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/__pycache__/p2p_communication.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/pipeline_parallel/__pycache__/p2p_communication.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/__pycache__/schedules.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/pipeline_parallel/__pycache__/schedules.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/pipeline_parallel/p2p_communication.py -------------------------------------------------------------------------------- /megatron/core/pipeline_parallel/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/pipeline_parallel/schedules.py -------------------------------------------------------------------------------- /megatron/core/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/requirements.txt -------------------------------------------------------------------------------- /megatron/core/sequence_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/sequence_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/sequence_parallel/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/sequence_parallel/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/sequence_parallel/__pycache__/cross_entropy.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/sequence_parallel/__pycache__/cross_entropy.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/sequence_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/sequence_parallel/cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__init__.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/cross_entropy.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/cross_entropy.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/data.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/layers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/layers.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/mappings.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/mappings.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/random.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/random.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/cross_entropy.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/mappings.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /megatron/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__init__.py -------------------------------------------------------------------------------- /megatron/core/transformer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/transformer/__pycache__/core_attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__pycache__/core_attention.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/transformer/__pycache__/enums.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__pycache__/enums.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/transformer/__pycache__/module.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__pycache__/module.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/transformer/__pycache__/transformer_config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__pycache__/transformer_config.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/transformer/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/core/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/core_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/core_attention.py -------------------------------------------------------------------------------- /megatron/core/transformer/custom_layers/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/custom_layers/transformer_engine.py -------------------------------------------------------------------------------- /megatron/core/transformer/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/enums.py -------------------------------------------------------------------------------- /megatron/core/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/mlp.py -------------------------------------------------------------------------------- /megatron/core/transformer/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/module.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/transformer_block.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/transformer_config.py -------------------------------------------------------------------------------- /megatron/core/transformer/transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/transformer_layer.py -------------------------------------------------------------------------------- /megatron/core/transformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/transformer/utils.py -------------------------------------------------------------------------------- /megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/core/utils.py -------------------------------------------------------------------------------- /megatron/data/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/Makefile -------------------------------------------------------------------------------- /megatron/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import indexed_dataset 2 | -------------------------------------------------------------------------------- /megatron/data/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/autoaugment.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/autoaugment.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/blendable_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/blendable_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/data_samplers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/data_samplers.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/dataset_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/dataset_utils.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/gpt_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/gpt_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/image_folder.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/image_folder.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/indexed_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/indexed_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/__pycache__/vit_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/__pycache__/vit_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/autoaugment.py -------------------------------------------------------------------------------- /megatron/data/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/bert_dataset.py -------------------------------------------------------------------------------- /megatron/data/biencoder_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/biencoder_dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/blendable_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/blendable_dataset.py -------------------------------------------------------------------------------- /megatron/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/data_samplers.py -------------------------------------------------------------------------------- /megatron/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/gpt_dataset.py -------------------------------------------------------------------------------- /megatron/data/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/helpers.cpp -------------------------------------------------------------------------------- /megatron/data/helpers.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/helpers.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /megatron/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/ict_dataset.py -------------------------------------------------------------------------------- /megatron/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/image_folder.py -------------------------------------------------------------------------------- /megatron/data/indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/indexed_dataset.py -------------------------------------------------------------------------------- /megatron/data/orqa_wiki_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/orqa_wiki_dataset.py -------------------------------------------------------------------------------- /megatron/data/prompt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/prompt_dataset.py -------------------------------------------------------------------------------- /megatron/data/realm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/realm_dataset_utils.py -------------------------------------------------------------------------------- /megatron/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/realm_index.py -------------------------------------------------------------------------------- /megatron/data/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/t5_dataset.py -------------------------------------------------------------------------------- /megatron/data/test/test_indexed_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/test/test_indexed_dataset.py -------------------------------------------------------------------------------- /megatron/data/test/test_preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/test/test_preprocess_data.sh -------------------------------------------------------------------------------- /megatron/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/data/vit_dataset.py -------------------------------------------------------------------------------- /megatron/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/dist_signal_handler.py -------------------------------------------------------------------------------- /megatron/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/enums.py -------------------------------------------------------------------------------- /megatron/fp16_deprecated/loss_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fp16_deprecated/loss_scaler.py -------------------------------------------------------------------------------- /megatron/fused_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/__init__.py -------------------------------------------------------------------------------- /megatron/fused_kernels/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/compat.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_masked_softmax.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /megatron/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/fused_kernels/tests/test_fused_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/tests/test_fused_kernels.py -------------------------------------------------------------------------------- /megatron/fused_kernels/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/fused_kernels/type_shim.h -------------------------------------------------------------------------------- /megatron/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/global_vars.py -------------------------------------------------------------------------------- /megatron/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/indexer.py -------------------------------------------------------------------------------- /megatron/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/initialize.py -------------------------------------------------------------------------------- /megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/memory.py -------------------------------------------------------------------------------- /megatron/microbatches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/microbatches.py -------------------------------------------------------------------------------- /megatron/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__init__.py -------------------------------------------------------------------------------- /megatron/model/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/bert_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/bert_model.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/distributed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/distributed.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/enums.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/enums.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/fused_bias_gelu.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/fused_bias_gelu.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/fused_layer_norm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/fused_layer_norm.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/fused_softmax.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/fused_softmax.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/gpt_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/gpt_model.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/language_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/language_model.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/module.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/module.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/rmsnorm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/rmsnorm.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/rotary_pos_embedding.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/rotary_pos_embedding.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/t5_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/t5_model.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/transformer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/transformer.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/bert_model.py -------------------------------------------------------------------------------- /megatron/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/biencoder_model.py -------------------------------------------------------------------------------- /megatron/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/classification.py -------------------------------------------------------------------------------- /megatron/model/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/distributed.py -------------------------------------------------------------------------------- /megatron/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/enums.py -------------------------------------------------------------------------------- /megatron/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /megatron/model/fused_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/fused_layer_norm.py -------------------------------------------------------------------------------- /megatron/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/fused_softmax.py -------------------------------------------------------------------------------- /megatron/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/gpt_model.py -------------------------------------------------------------------------------- /megatron/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/language_model.py -------------------------------------------------------------------------------- /megatron/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/module.py -------------------------------------------------------------------------------- /megatron/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/multiple_choice.py -------------------------------------------------------------------------------- /megatron/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/realm_model.py -------------------------------------------------------------------------------- /megatron/model/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/rmsnorm.py -------------------------------------------------------------------------------- /megatron/model/rmsnorm_apex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/rmsnorm_apex.py -------------------------------------------------------------------------------- /megatron/model/rotary_pos_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/rotary_pos_embedding.py -------------------------------------------------------------------------------- /megatron/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/t5_model.py -------------------------------------------------------------------------------- /megatron/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/transformer.py -------------------------------------------------------------------------------- /megatron/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/utils.py -------------------------------------------------------------------------------- /megatron/model/vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/model/vision/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/vision/__pycache__/knn_monitor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/__pycache__/knn_monitor.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/model/vision/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/classification.py -------------------------------------------------------------------------------- /megatron/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/dino.py -------------------------------------------------------------------------------- /megatron/model/vision/esvit_swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/esvit_swin_backbone.py -------------------------------------------------------------------------------- /megatron/model/vision/inpainting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/inpainting.py -------------------------------------------------------------------------------- /megatron/model/vision/knn_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/knn_monitor.py -------------------------------------------------------------------------------- /megatron/model/vision/mit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/mit_backbone.py -------------------------------------------------------------------------------- /megatron/model/vision/swin_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/swin_backbone.py -------------------------------------------------------------------------------- /megatron/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/utils.py -------------------------------------------------------------------------------- /megatron/model/vision/vit_backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/model/vision/vit_backbone.py -------------------------------------------------------------------------------- /megatron/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /megatron/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/mpu/tests/commons.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/mpu/tests/test_cross_entropy.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/mpu/tests/test_data.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/mpu/tests/test_initialize.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /megatron/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/mpu/tests/test_random.py -------------------------------------------------------------------------------- /megatron/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/__init__.py -------------------------------------------------------------------------------- /megatron/optimizer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/optimizer/__pycache__/clip_grads.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/__pycache__/clip_grads.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/optimizer/__pycache__/distrib_optimizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/__pycache__/distrib_optimizer.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/optimizer/__pycache__/grad_scaler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/__pycache__/grad_scaler.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/optimizer/__pycache__/optimizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/__pycache__/optimizer.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/clip_grads.py -------------------------------------------------------------------------------- /megatron/optimizer/distrib_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/distrib_optimizer.py -------------------------------------------------------------------------------- /megatron/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /megatron/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer/optimizer.py -------------------------------------------------------------------------------- /megatron/optimizer_param_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/optimizer_param_scheduler.py -------------------------------------------------------------------------------- /megatron/p2p_communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/p2p_communication.py -------------------------------------------------------------------------------- /megatron/profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/profiler.py -------------------------------------------------------------------------------- /megatron/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/static/index.html -------------------------------------------------------------------------------- /megatron/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/__init__.py -------------------------------------------------------------------------------- /megatron/text_generation/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/api.py -------------------------------------------------------------------------------- /megatron/text_generation/beam_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/beam_utils.py -------------------------------------------------------------------------------- /megatron/text_generation/communication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/communication.py -------------------------------------------------------------------------------- /megatron/text_generation/forward_step.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/forward_step.py -------------------------------------------------------------------------------- /megatron/text_generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/generation.py -------------------------------------------------------------------------------- /megatron/text_generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/sampling.py -------------------------------------------------------------------------------- /megatron/text_generation/tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation/tokenization.py -------------------------------------------------------------------------------- /megatron/text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation_server.py -------------------------------------------------------------------------------- /megatron/text_generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/text_generation_utils.py -------------------------------------------------------------------------------- /megatron/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/timers.py -------------------------------------------------------------------------------- /megatron/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/__init__.py -------------------------------------------------------------------------------- /megatron/tokenizer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/tokenizer/__pycache__/bert_tokenization.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/__pycache__/bert_tokenization.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/tokenizer/__pycache__/gpt2_tokenization.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/__pycache__/gpt2_tokenization.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/tokenizer/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /megatron/tokenizer/bert_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/bert_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/gpt2_tokenization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/gpt2_tokenization.py -------------------------------------------------------------------------------- /megatron/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /megatron/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/training.py -------------------------------------------------------------------------------- /megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/megatron/utils.py -------------------------------------------------------------------------------- /pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_bert.py -------------------------------------------------------------------------------- /pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_gpt.py -------------------------------------------------------------------------------- /pretrain_gpt_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_gpt_core.py -------------------------------------------------------------------------------- /pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_ict.py -------------------------------------------------------------------------------- /pretrain_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_retro.py -------------------------------------------------------------------------------- /pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_t5.py -------------------------------------------------------------------------------- /pretrain_vision_classify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_vision_classify.py -------------------------------------------------------------------------------- /pretrain_vision_dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_vision_dino.py -------------------------------------------------------------------------------- /pretrain_vision_inpaint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/pretrain_vision_inpaint.py -------------------------------------------------------------------------------- /scripts/convert_ds_to_universal.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/scripts/convert_ds_to_universal.sh -------------------------------------------------------------------------------- /scripts/hostsfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/scripts/hostsfile -------------------------------------------------------------------------------- /scripts/run_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/scripts/run_llama.sh -------------------------------------------------------------------------------- /scripts/run_llama2_7b_fp4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/scripts/run_llama2_7b_fp4.sh -------------------------------------------------------------------------------- /scripts/run_mixtral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/scripts/run_mixtral.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/setup.py -------------------------------------------------------------------------------- /tasks/ckp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/ckp_utils.py -------------------------------------------------------------------------------- /tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/data_utils.py -------------------------------------------------------------------------------- /tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /tasks/eval_harness/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/eval_harness/README.md -------------------------------------------------------------------------------- /tasks/eval_harness/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/eval_harness/download.py -------------------------------------------------------------------------------- /tasks/eval_harness/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/eval_harness/evaluate.py -------------------------------------------------------------------------------- /tasks/eval_harness/report-to-csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/eval_harness/report-to-csv.py -------------------------------------------------------------------------------- /tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/eval_utils.py -------------------------------------------------------------------------------- /tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/finetune_utils.py -------------------------------------------------------------------------------- /tasks/glue/cola.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/cola.py -------------------------------------------------------------------------------- /tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/data.py -------------------------------------------------------------------------------- /tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/finetune.py -------------------------------------------------------------------------------- /tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/mnli.py -------------------------------------------------------------------------------- /tasks/glue/mrpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/mrpc.py -------------------------------------------------------------------------------- /tasks/glue/qnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/qnli.py -------------------------------------------------------------------------------- /tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/qqp.py -------------------------------------------------------------------------------- /tasks/glue/rte.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/rte.py -------------------------------------------------------------------------------- /tasks/glue/sst2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/sst2.py -------------------------------------------------------------------------------- /tasks/glue/stsb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/glue/stsb.py -------------------------------------------------------------------------------- /tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/main.py -------------------------------------------------------------------------------- /tasks/main_3d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/main_3d.py -------------------------------------------------------------------------------- /tasks/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/msdp/README.md -------------------------------------------------------------------------------- /tasks/msdp/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/msdp/evaluate.py -------------------------------------------------------------------------------- /tasks/msdp/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/msdp/main.py -------------------------------------------------------------------------------- /tasks/msdp/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/msdp/metrics.py -------------------------------------------------------------------------------- /tasks/msdp/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/msdp/preprocessing.py -------------------------------------------------------------------------------- /tasks/msdp/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/msdp/prompt.py -------------------------------------------------------------------------------- /tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/README.md -------------------------------------------------------------------------------- /tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/race/data.py -------------------------------------------------------------------------------- /tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/race/finetune.py -------------------------------------------------------------------------------- /tasks/vision/classification/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/classification/classification.py -------------------------------------------------------------------------------- /tasks/vision/classification/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/classification/eval_utils.py -------------------------------------------------------------------------------- /tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/main.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/cityscapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/cityscapes.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/data.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/finetune_segformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/finetune_segformer.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/finetune_setr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/finetune_setr.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/metrics.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/seg_heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/seg_heads.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/seg_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/seg_models.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/transforms.py -------------------------------------------------------------------------------- /tasks/vision/segmentation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/vision/segmentation/utils.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/check_slurm_job_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/python_test_utils/check_slurm_job_completion.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/test_ci_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/python_test_utils/test_ci_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py -------------------------------------------------------------------------------- /tests/functional_tests/shell_test_utils/jobwait.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/shell_test_utils/jobwait.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_results/bert/bert_tp1_pp2_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/bert/bert_tp1_pp2_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/bert/bert_tp1_pp4_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/bert/bert_tp1_pp4_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/bert/bert_tp2_pp2_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/bert/bert_tp2_pp2_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/bert/bert_tp4_pp1_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/bert/bert_tp4_pp1_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/gpt3/gpt3_tp1_pp2_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp2_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/gpt3/gpt3_tp1_pp4_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp4_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/gpt3/gpt3_tp2_pp2_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/gpt3/gpt3_tp2_pp2_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_results/gpt3/gpt3_tp4_pp1_1nodes_50steps.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_results/gpt3/gpt3_tp4_pp1_1nodes_50steps.json -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_resume_checkpoint_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_resume_checkpoint_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh -------------------------------------------------------------------------------- /tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/test_gpt_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/models/test_gpt_embedding.py -------------------------------------------------------------------------------- /tests/models/test_gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/models/test_gpt_model.py -------------------------------------------------------------------------------- /tests/old_tests/ds_config_bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/old_tests/ds_config_bf16.json -------------------------------------------------------------------------------- /tests/old_tests/test_checkpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/old_tests/test_checkpoints.py -------------------------------------------------------------------------------- /tests/old_tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/old_tests/test_training.py -------------------------------------------------------------------------------- /tests/old_tests/testing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/old_tests/testing_utils.py -------------------------------------------------------------------------------- /tests/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipeline_parallel/test_schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/pipeline_parallel/test_schedules.py -------------------------------------------------------------------------------- /tests/run_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/run_megatron.py -------------------------------------------------------------------------------- /tests/tensor_parallel/__int__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/test_megatron.py -------------------------------------------------------------------------------- /tests/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/transformer/test_core_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_core_attention.py -------------------------------------------------------------------------------- /tests/transformer/test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_module.py -------------------------------------------------------------------------------- /tests/transformer/test_parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_parallel_attention.py -------------------------------------------------------------------------------- /tests/transformer/test_parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_parallel_mlp.py -------------------------------------------------------------------------------- /tests/transformer/test_parallel_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_parallel_transformer_block.py -------------------------------------------------------------------------------- /tests/transformer/test_parallel_transformer_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_parallel_transformer_layer.py -------------------------------------------------------------------------------- /tests/transformer/test_transformer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/transformer/test_transformer_config.py -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/tensor_parallel/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/tensor_parallel/test_data.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/tensor_parallel/test_mappings.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/tensor_parallel/test_random.py -------------------------------------------------------------------------------- /tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py -------------------------------------------------------------------------------- /tests/unit_tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/test_basic.py -------------------------------------------------------------------------------- /tests/unit_tests/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/test_parallel_state.py -------------------------------------------------------------------------------- /tests/unit_tests/test_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/test_utilities.py -------------------------------------------------------------------------------- /tests/unit_tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tests/unit_tests/test_utils.py -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/__init__.py -------------------------------------------------------------------------------- /tools/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tools/__pycache__/verify_checkpoint_non_tp_consistency.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/__pycache__/verify_checkpoint_non_tp_consistency.cpython-310.pyc -------------------------------------------------------------------------------- /tools/bert_embedding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/bert_embedding/__init__.py -------------------------------------------------------------------------------- /tools/bert_embedding/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/bert_embedding/dataset.py -------------------------------------------------------------------------------- /tools/bert_embedding/embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/bert_embedding/embed.py -------------------------------------------------------------------------------- /tools/bert_embedding/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/bert_embedding/external_libs.py -------------------------------------------------------------------------------- /tools/bert_embedding/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/bert_embedding/huggingface.py -------------------------------------------------------------------------------- /tools/bert_embedding/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/bert_embedding/utils.py -------------------------------------------------------------------------------- /tools/checkpoint_loader_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/checkpoint_loader_megatron.py -------------------------------------------------------------------------------- /tools/checkpoint_saver_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/checkpoint_saver_megatron.py -------------------------------------------------------------------------------- /tools/checkpoint_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/checkpoint_util.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/README.md -------------------------------------------------------------------------------- /tools/convert_checkpoint/deepspeed_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/deepspeed_checkpoint.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/deepspeed_to_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/deepspeed_to_megatron.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/deepspeed_to_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/deepspeed_to_transformers.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/inspect_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/inspect_checkpoint.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/inspect_deepspeed_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py -------------------------------------------------------------------------------- /tools/convert_checkpoint/json/mds_to_hf_llama_13b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/json/mds_to_hf_llama_13b.json -------------------------------------------------------------------------------- /tools/convert_checkpoint/json/mds_to_hf_llama_70b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/json/mds_to_hf_llama_70b.json -------------------------------------------------------------------------------- /tools/convert_checkpoint/json/mds_to_hf_llama_7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/json/mds_to_hf_llama_7b.json -------------------------------------------------------------------------------- /tools/convert_checkpoint/json/mds_to_hf_llama_7b_full_names.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/json/mds_to_hf_llama_7b_full_names.json -------------------------------------------------------------------------------- /tools/convert_checkpoint/mds_universal_to_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/convert_checkpoint/mds_universal_to_huggingface.py -------------------------------------------------------------------------------- /tools/generate_samples_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/generate_samples_gpt.py -------------------------------------------------------------------------------- /tools/hf2megads_weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/hf2megads_weight_converter.py -------------------------------------------------------------------------------- /tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/linter.py -------------------------------------------------------------------------------- /tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/merge_datasets.py -------------------------------------------------------------------------------- /tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/README.md -------------------------------------------------------------------------------- /tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/cleanup_fix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/cleanup_fix_dataset.py -------------------------------------------------------------------------------- /tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /tools/openwebtext/group_duplicate_url.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/group_duplicate_url.py -------------------------------------------------------------------------------- /tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /tools/openwebtext/remove_group_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/openwebtext/remove_group_duplicates.py -------------------------------------------------------------------------------- /tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/preprocess_data.py -------------------------------------------------------------------------------- /tools/preprocess_data_nmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/preprocess_data_nmt.py -------------------------------------------------------------------------------- /tools/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/README.md -------------------------------------------------------------------------------- /tools/retro/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/retro/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /tools/retro/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /tools/retro/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/cli/__init__.py -------------------------------------------------------------------------------- /tools/retro/cli/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/cli/__main__.py -------------------------------------------------------------------------------- /tools/retro/cli/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/cli/cli.py -------------------------------------------------------------------------------- /tools/retro/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/db/__init__.py -------------------------------------------------------------------------------- /tools/retro/db/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/db/build.py -------------------------------------------------------------------------------- /tools/retro/db/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/db/dataset.py -------------------------------------------------------------------------------- /tools/retro/db/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/db/utils.py -------------------------------------------------------------------------------- /tools/retro/examples/get_dataset_configs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/examples/get_dataset_configs.sh -------------------------------------------------------------------------------- /tools/retro/examples/get_preprocess_cmd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/examples/get_preprocess_cmd.sh -------------------------------------------------------------------------------- /tools/retro/examples/preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/examples/preprocess_data.sh -------------------------------------------------------------------------------- /tools/retro/examples/pretrain_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/examples/pretrain_model.sh -------------------------------------------------------------------------------- /tools/retro/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/external_libs.py -------------------------------------------------------------------------------- /tools/retro/index/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/__init__.py -------------------------------------------------------------------------------- /tools/retro/index/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/build.py -------------------------------------------------------------------------------- /tools/retro/index/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/factory.py -------------------------------------------------------------------------------- /tools/retro/index/index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/index.py -------------------------------------------------------------------------------- /tools/retro/index/indexes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/indexes/__init__.py -------------------------------------------------------------------------------- /tools/retro/index/indexes/faiss_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/indexes/faiss_base.py -------------------------------------------------------------------------------- /tools/retro/index/indexes/faiss_par_add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/indexes/faiss_par_add.py -------------------------------------------------------------------------------- /tools/retro/index/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/index/utils.py -------------------------------------------------------------------------------- /tools/retro/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/main.py -------------------------------------------------------------------------------- /tools/retro/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/query/__init__.py -------------------------------------------------------------------------------- /tools/retro/query/chunk_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/query/chunk_dataset.py -------------------------------------------------------------------------------- /tools/retro/query/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/query/query.py -------------------------------------------------------------------------------- /tools/retro/query/retro_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/query/retro_dataset.py -------------------------------------------------------------------------------- /tools/retro/query/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/query/utils.py -------------------------------------------------------------------------------- /tools/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/retro/utils.py -------------------------------------------------------------------------------- /tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/text_generation_cli.py -------------------------------------------------------------------------------- /tools/verify_checkpoint_non_tp_consistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anonymous1252022/fp4-all-the-way/HEAD/tools/verify_checkpoint_non_tp_consistency.py --------------------------------------------------------------------------------