├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── README.zh_CN.md ├── VERSION ├── byte_infer_perf ├── general_perf │ ├── README.md │ ├── README.zh_CN.md │ ├── __init__.py │ ├── backends │ │ ├── CPU │ │ │ ├── CPU.json │ │ │ ├── calculate_cpu_diff.py │ │ │ ├── calculate_cpu_diff.sh │ │ │ ├── compile_backend_cpu.py │ │ │ ├── requirements.txt │ │ │ └── runtime_backend_cpu.py │ │ ├── HPU │ │ │ ├── HPU.json │ │ │ ├── README.md │ │ │ ├── bert │ │ │ │ ├── bf16.txt │ │ │ │ └── fp32.txt │ │ │ ├── compile_backend_hpu.py │ │ │ ├── gaudi.png │ │ │ ├── gaudi2.png │ │ │ ├── habana-white_intel_logo.png │ │ │ ├── requirements.txt │ │ │ └── runtime_backend_hpu.py │ │ ├── IPU │ │ │ ├── .gitignore │ │ │ ├── Graphcore-Chinese-Wordmark-Horizontal.svg │ │ │ ├── IPU.json │ │ │ ├── README.md │ │ │ ├── README.zh_CN.md │ │ │ ├── __init__.py │ │ │ ├── compile_backend_ipu.py │ │ │ ├── engine.py │ │ │ ├── engine_poprt.py │ │ │ ├── interact_infos │ │ │ │ ├── albert-torch-fp32.json │ │ │ │ ├── bert-torch-fp32.json │ │ │ │ ├── clip-onnx-fp32.json │ │ │ │ ├── conformer-encoder-onnx-fp32.json │ │ │ │ ├── deberta-torch-fp32.json │ │ │ │ ├── resnet50-torch-fp32.json │ │ │ │ ├── roberta-torch-fp32.json │ │ │ │ ├── roformer-tf-fp32.json │ │ │ │ ├── swin-large-torch-fp32.json │ │ │ │ ├── videobert-onnx-fp32.json │ │ │ │ └── widedeep-tf-fp32.json │ │ │ ├── passes │ │ │ │ ├── __init__.py │ │ │ │ ├── custom_final_check.py │ │ │ │ └── deberta_pack.py │ │ │ ├── requirements.txt │ │ │ └── runtime_backend_ipu.py │ │ ├── SPU │ │ │ ├── README.md │ │ │ ├── base_compile.py │ │ │ ├── compile_backend_spu.py │ │ │ ├── interact_info │ │ │ │ ├── albert-torch-fp32.json │ │ │ │ ├── bert-torch-fp32.json │ │ │ │ ├── conformer-encoder-onnx-fp32.json │ │ │ │ ├── resnet50-torch-fp32.json │ │ │ │ └── roberta-torch-fp32.json │ │ │ ├── requirements.txt │ │ │ ├── runtime_backend_spu.py │ │ │ └── spu.json │ │ ├── STC │ │ │ ├── README.md │ │ │ ├── STC.jpg │ │ │ ├── STC.json │ │ │ ├── compile_backend_stc.py │ │ │ ├── requirements.txt │ │ │ └── runtime_backend_stc.py │ │ ├── compile_backend.py │ │ └── runtime_backend.py │ ├── core │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── __init__.py │ │ │ ├── backend_store.py │ │ │ ├── dataset_store.py │ │ │ └── workload_store.py │ │ └── perf_engine.py │ ├── datasets │ │ ├── data_loader.py │ │ ├── fake_dataset │ │ │ ├── data_loader.py │ │ │ └── test_accuracy.py │ │ ├── open_cail2019 │ │ │ ├── data_loader.py │ │ │ ├── pre_process_data.py │ │ │ └── test_accuracy.py │ │ ├── open_cifar │ │ │ ├── data_loader.py │ │ │ └── test_accuracy.py │ │ ├── open_criteo_kaggle │ │ │ ├── data_loader.py │ │ │ ├── preprocess_dataset.py │ │ │ └── test_accuracy.py │ │ ├── open_imagenet │ │ │ ├── data_loader.py │ │ │ └── test_accuracy.py │ │ ├── open_squad │ │ │ ├── bert │ │ │ │ ├── accuracy_squad.py │ │ │ │ └── evaluate.py │ │ │ ├── create_squad_data.py │ │ │ ├── data_loader.py │ │ │ └── test_accuracy.py │ │ └── test_accuracy.py │ ├── launch.py │ ├── model_zoo │ │ ├── albert-torch-fp32.json │ │ ├── bert-tf-fp32.json │ │ ├── bert-torch-fp32.json │ │ ├── clip-onnx-fp32.json │ │ ├── conformer-encoder-onnx-fp32.json │ │ ├── deberta-torch-fp32.json │ │ ├── resnet50-tf-fp32.json │ │ ├── resnet50-torch-fp32.json │ │ ├── roberta-torch-fp32.json │ │ ├── roformer-tf-fp32.json │ │ ├── swin-large-torch-fp32.json │ │ ├── unet-onnx-fp32.json │ │ ├── vae-decoder-onnx-fp32.json │ │ ├── vae-encoder-onnx-fp32.json │ │ ├── videobert-onnx-fp32.json │ │ ├── widedeep-tf-fp32.json │ │ └── yolov5-onnx-fp32.json │ ├── prepare_model_and_dataset.sh │ ├── reports │ │ ├── HPU │ │ │ ├── albert-torch-fp32 │ │ │ │ ├── albert-torch-fp32-to-fp32.png │ │ │ │ └── result-fp32.json │ │ │ ├── bert-torch-fp32 │ │ │ │ ├── bert-torch-fp32-to-fp32.png │ │ │ │ └── result-fp32.json │ │ │ ├── deberta-torch-fp32 │ │ │ │ ├── deberta-torch-fp32-to-fp32.png │ │ │ │ └── result-fp32.json │ │ │ ├── resnet50-torch-fp32 │ │ │ │ ├── resnet50-torch-fp32-to-fp32.png │ │ │ │ └── result-fp32.json │ │ │ └── swin-large-torch-fp32 │ │ │ │ ├── result-fp32.json │ │ │ │ └── swin-large-torch-fp32-to-fp32.png │ │ ├── IPU │ │ │ ├── albert-torch-fp32 │ │ │ │ ├── albert-torch-fp32-to-fp16.png │ │ │ │ └── result-fp16.json │ │ │ ├── bert-torch-fp32 │ │ │ │ ├── bert-torch-fp32-to-fp16.png │ │ │ │ ├── bert-torch-fp32-to-fp8.png │ │ │ │ ├── result-fp16.json │ │ │ │ └── result-fp8.json │ │ │ ├── clip-onnx-fp32 │ │ │ │ ├── clip-onnx-fp32-to-fp16.png │ │ │ │ └── result-fp16.json │ │ │ ├── conformer-encoder-onnx-fp32 │ │ │ │ ├── conformer-encoder-onnx-fp32-to-fp16.png │ │ │ │ └── result-fp16.json │ │ │ ├── deberta-torch-fp32 │ │ │ │ ├── deberta-torch-fp32-to-fp16.png │ │ │ │ └── result-fp16.json │ │ │ ├── resnet50-torch-fp32 │ │ │ │ ├── resnet50-torch-fp32-to-fp16.png │ │ │ │ ├── resnet50-torch-fp32-to-fp8.png │ │ │ │ ├── result-fp16.json │ │ │ │ └── result-fp8.json │ │ │ ├── roberta-torch-fp32 │ │ │ │ ├── result-fp16.json │ │ │ │ └── roberta-torch-fp32-to-fp16.png │ │ │ ├── roformer-tf-fp32 │ │ │ │ ├── result-fp16.json │ │ │ │ └── roformer-tf-fp32-to-fp16.png │ │ │ ├── swin-large-torch-fp32 │ │ │ │ ├── result-fp16.json │ │ │ │ ├── result-fp8.json │ │ │ │ ├── swin-large-torch-fp32-to-fp16.png │ │ │ │ └── swin-large-torch-fp32-to-fp8.png │ │ │ ├── videobert-onnx-fp32 │ │ │ │ ├── result-fp16.json │ │ │ │ └── videobert-onnx-fp32-to-fp16.png │ │ │ └── widedeep-tf-fp32 │ │ │ │ ├── result-fp16.json │ │ │ │ └── widedeep-tf-fp32-to-fp16.png │ │ ├── README │ │ ├── SPU │ │ │ ├── albert-torch-fp32 │ │ │ │ ├── albert-torch-fp32.png │ │ │ │ └── result.json │ │ │ ├── bert-torch-fp32 │ │ │ │ ├── bert-torch-fp32.png │ │ │ │ └── result.json │ │ │ ├── conformer-encoder-onnx-fp32 │ │ │ │ ├── conformer-encoder-onnx-fp32.png │ │ │ │ └── result.json │ │ │ ├── resnet50-torch-fp32 │ │ │ │ ├── resnet50-torch-fp32.png │ │ │ │ └── result.json │ │ │ └── roberta-torch-fp32 │ │ │ │ ├── result.json │ │ │ │ └── roberta-torch-fp32.png │ │ ├── STC │ │ │ ├── albert-torch-fp32 │ │ │ │ ├── albert-torch-fp32.png │ │ │ │ └── result.json │ │ │ ├── bert-tf-fp32 │ │ │ │ ├── bert-tf-fp32.png │ │ │ │ └── result.json │ │ │ ├── bert-torch-fp32 │ │ │ │ ├── bert-torch-fp32.png │ │ │ │ └── result.json │ │ │ ├── resnet50-tf-fp32 │ │ │ │ ├── resnet50-tf-fp32.png │ │ │ │ └── result.json │ │ │ ├── roberta-torch-fp32 │ │ │ │ ├── result.json │ │ │ │ └── roberta-torch-fp32.png │ │ │ └── widedeep-tf-fp32 │ │ │ │ ├── result.json │ │ │ │ └── widedeep-tf-fp32.png │ │ ├── reports_summary.png │ │ └── reports_summary.py │ ├── requirements.txt │ ├── tools │ │ ├── build_pdf.py │ │ ├── convert.sh │ │ ├── frozen_to_saved.py │ │ ├── h5_to_frozen.py │ │ ├── model_trt_convert.py │ │ ├── mxnet_to_onnx.py │ │ ├── onnx_utils.py │ │ ├── requirements.txt │ │ ├── saved_to_frozen.py │ │ ├── saved_to_onnx.py │ │ ├── tf_fp32_to_fp16.py │ │ ├── tf_utils.py │ │ └── torch_to_onnx.py │ ├── version.py │ └── workloads │ │ ├── albert-torch-fp32.json │ │ ├── bert-tf-fp32.json │ │ ├── bert-torch-fp32.json │ │ ├── clip-onnx-fp32.json │ │ ├── conformer-encoder-onnx-fp32.json │ │ ├── deberta-torch-fp32.json │ │ ├── resnet50-tf-fp32.json │ │ ├── resnet50-torch-fp32.json │ │ ├── roberta-torch-fp32.json │ │ ├── roformer-tf-fp32.json │ │ ├── swin-large-torch-fp32.json │ │ ├── unet-onnx-fp32.json │ │ ├── vae-decoder-onnx-fp32.json │ │ ├── vae-encoder-onnx-fp32.json │ │ ├── videobert-onnx-fp32.json │ │ ├── widedeep-tf-fp32.json │ │ └── yolov5-onnx-fp32.json └── llm_perf │ ├── README.md │ ├── backends │ └── GPU │ │ ├── gpu_ckpt_loader.py │ │ ├── gpu_inferencer.py │ │ ├── gpu_mp_engine.py │ │ ├── gpu_sampler.py │ │ ├── gpu_scheduler.py │ │ ├── model_impl │ │ ├── __init__.py │ │ ├── gpu_chatglm2.py │ │ ├── gpu_falcon.py │ │ ├── gpu_llama3.py │ │ ├── gpu_mixtral.py │ │ ├── modeling_chatglm2.py │ │ ├── modeling_falcon.py │ │ ├── modeling_llama3.py │ │ ├── modeling_mixtral.py │ │ ├── split_falcon.py │ │ ├── split_llama.py │ │ └── split_mixtral.py │ │ └── setup.py │ ├── bench_model.py │ ├── benchmark │ ├── bench.py │ └── tensorrt-llm │ │ ├── README.md │ │ └── bench_engine.py │ ├── core │ ├── ckpt_loader.py │ ├── generation.py │ ├── inferencer.py │ ├── mp_engine.py │ ├── sampler.py │ └── scheduler.py │ ├── datasets │ ├── merged_52_test.csv │ └── test_mini.csv │ ├── launch.py │ ├── model_zoo │ ├── README.md │ ├── chatglm2-torch-fp16-6b.json │ ├── falcon-torch-bf16-180b.json │ ├── llama3-torch-bf16-70b.json │ └── mixtral-torch-bf16-8x22b.json │ ├── prepare_model.py │ ├── requirements.txt │ ├── script │ ├── extra_datasets.py │ ├── lint.sh │ ├── proto.sh │ └── single_query.py │ ├── server │ ├── endpoint.py │ ├── launch_server.py │ ├── pb.py │ ├── server.proto │ ├── server_pb2.py │ └── server_pb2_grpc.py │ ├── utils │ ├── dist_utils.py │ ├── logger.py │ ├── ps_utils.py │ └── reporter.py │ └── workloads │ ├── chatglm2-torch-fp16-6b.json │ ├── falcon-torch-bf16-180b.json │ ├── llama3-torch-bf16-70b.json │ └── mixtral-torch-bf16-8x22b.json ├── byte_micro_perf ├── .gitignore ├── README.md ├── backends │ └── GPU │ │ ├── backend_gpu.py │ │ ├── ops │ │ ├── add.py │ │ ├── add_rms_norm_dynamic_quant.py │ │ ├── all_gather.py │ │ ├── all_reduce.py │ │ ├── all_to_all.py │ │ ├── broadcast.py │ │ ├── cast.py │ │ ├── cos.py │ │ ├── device2device.py │ │ ├── device2host.py │ │ ├── div.py │ │ ├── embedding.py │ │ ├── exp.py │ │ ├── flash_attention.py │ │ ├── flash_attention_session_cache.py │ │ ├── flash_decoding.py │ │ ├── gather.py │ │ ├── gelu.py │ │ ├── gemm.py │ │ ├── head_rms_norm.py │ │ ├── host2device.py │ │ ├── index_add.py │ │ ├── index_select.py │ │ ├── layer_norm.py │ │ ├── log.py │ │ ├── moe_dispatch_tokens.py │ │ ├── moe_gather.py │ │ ├── moe_gating_gemm.py │ │ ├── moe_quant_group_gemm.py │ │ ├── moe_quant_matmul.py │ │ ├── moe_scatter_dynamic_quant.py │ │ ├── moe_softmax_topk.py │ │ ├── moe_swiglu_dynamic_quant.py │ │ ├── mul.py │ │ ├── p2p.py │ │ ├── reduce_max.py │ │ ├── reduce_min.py │ │ ├── reduce_scatter.py │ │ ├── reduce_sum.py │ │ ├── rms_norm.py │ │ ├── rotary_embedding.py │ │ ├── scale_dynamic_quant.py │ │ ├── scatter.py │ │ ├── silu.py │ │ ├── sin.py │ │ ├── softmax.py │ │ ├── sqrt.py │ │ ├── store_kv_cache.py │ │ ├── sub.py │ │ └── topk.py │ │ ├── projects │ │ ├── accuracy_test │ │ │ ├── ccl_reduce_test.py │ │ │ ├── common.py │ │ │ └── test_float_add.py │ │ └── torch_profiler │ │ │ └── perf.py │ │ └── provider_gpu.py ├── core │ ├── README.md │ ├── backend.py │ ├── creators.py │ ├── op.py │ ├── ops │ │ ├── llm_ops.py │ │ ├── tensor_gemm_ops.py │ │ ├── vector_activation_ops.py │ │ ├── vector_index_ops.py │ │ ├── vector_linear_ops.py │ │ ├── vector_norm_ops.py │ │ ├── vector_reduction_ops.py │ │ ├── vector_sfu_ops.py │ │ └── xccl_ops.py │ ├── perf_engine.py │ ├── scheduler.py │ └── utils.py ├── launch.py ├── requirements.txt └── workloads │ ├── basic │ ├── tensor_gemm_ops │ │ └── gemm.json │ ├── vector_activation_ops │ │ ├── gelu.json │ │ └── silu.json │ ├── vector_index_ops │ │ ├── embedding.json │ │ ├── gather.json │ │ ├── index_add.json │ │ ├── index_select.json │ │ └── scatter.json │ ├── vector_linear_ops │ │ ├── add.json │ │ ├── cast.json │ │ ├── mul.json │ │ └── sub.json │ ├── vector_norm_ops │ │ ├── layer_norm.json │ │ ├── rms_norm.json │ │ └── softmax.json │ ├── vector_reduction_ops │ │ ├── reduce_max.json │ │ ├── reduce_min.json │ │ ├── reduce_sum.json │ │ └── topk.json │ ├── vector_sfu_ops │ │ ├── cos.json │ │ ├── div.json │ │ ├── exp.json │ │ ├── log.json │ │ ├── sin.json │ │ └── sqrt.json │ └── xccl_ops │ │ ├── README.md │ │ ├── all_gather.json │ │ ├── all_reduce.json │ │ ├── all_to_all.json │ │ ├── broadcast.json │ │ ├── device2device.json │ │ ├── device2host.json │ │ ├── host2device.json │ │ ├── p2p.json │ │ └── reduce_scatter.json │ ├── llm │ ├── README.md │ └── flash_attention.json │ └── mocked_model │ ├── README.md │ ├── TP8_EP8 │ ├── add_rms_norm_dynamic_quant.json │ ├── all_reduce.json │ ├── flash_attention.json │ ├── head_rms_norm.json │ ├── moe_gather.json │ ├── moe_gating_gemm.json │ ├── moe_quant_group_gemm.json │ ├── moe_quant_matmul.json │ ├── moe_scatter_dynamic_quant.json │ ├── moe_softmax_topk.json │ ├── moe_swiglu_dynamic_quant.json │ ├── rms_norm.json │ ├── rotary_embedding.json │ ├── scale_dynamic_quant.json │ └── store_kv_cache.json │ ├── add_rms_norm_dynamic_quant.json │ ├── all_reduce.json │ ├── flash_attention.json │ ├── flash_decoding.json │ ├── head_rms_norm.json │ ├── moe_gather.json │ ├── moe_gating_gemm.json │ ├── moe_quant_group_gemm.json │ ├── moe_quant_matmul.json │ ├── moe_scatter_dynamic_quant.json │ ├── moe_softmax_topk.json │ ├── moe_swiglu_dynamic_quant.json │ ├── rms_norm.json │ ├── rotary_embedding.json │ ├── scale_dynamic_quant.json │ └── store_kv_cache.json ├── byte_train_perf ├── Megatron-LM │ ├── .coveragerc │ ├── .flake8 │ ├── .github │ │ ├── ISSUE_TEMPLATE │ │ │ ├── bug.md │ │ │ ├── enhancement.md │ │ │ ├── question.md │ │ │ └── regression.md │ │ └── workflows │ │ │ └── stale.yml │ ├── .gitignore │ ├── .gitlab-ci.yml │ ├── .gitlab │ │ ├── labeler-config.yml │ │ └── stages │ │ │ ├── 00.pre.yml │ │ │ ├── 01.test.yml │ │ │ ├── 02.functional-tests.yml │ │ │ └── 03.publish.yml │ ├── .pylintrc │ ├── CHANGELOG.md │ ├── CODEOWNERS │ ├── CONTRIBUTING.md │ ├── Dockerfile.ci.dev │ ├── Dockerfile.ci.lts │ ├── Dockerfile.linting │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── docs │ │ ├── llama_mistral.md │ │ └── source │ │ │ ├── api-guide │ │ │ ├── context_parallel.rst │ │ │ ├── datasets.rst │ │ │ ├── dist_checkpointing.rst │ │ │ ├── dist_checkpointing.strategies.rst │ │ │ ├── dist_optimizer.md │ │ │ ├── distributed.rst │ │ │ ├── encoder_decoder_parallelism.rst │ │ │ ├── fusions.rst │ │ │ ├── index.rst │ │ │ ├── models.bert.rst │ │ │ ├── models.gpt.rst │ │ │ ├── models.rst │ │ │ ├── models.t5.rst │ │ │ ├── moe.rst │ │ │ ├── num_microbatches_calculator.rst │ │ │ ├── optimizer_param_scheduler.rst │ │ │ ├── pipeline_parallel.rst │ │ │ ├── tensor_parallel.rst │ │ │ └── transformer.rst │ │ │ ├── images │ │ │ ├── context_parallel │ │ │ │ ├── CP_overview.png │ │ │ │ └── CP_results.png │ │ │ ├── distrib_optimizer │ │ │ │ ├── data_flow.png │ │ │ │ └── sharding_scheme.png │ │ │ └── moe │ │ │ │ └── token_drop.png │ │ │ ├── index.rst │ │ │ └── user-guide │ │ │ └── index.rst │ ├── examples │ │ ├── bert │ │ │ ├── README.md │ │ │ └── train_bert_340m_distributed.sh │ │ ├── debug_to.py │ │ ├── ds_like │ │ │ ├── Llama2tokenizer.model │ │ │ ├── run2script.sh │ │ │ ├── run4script.sh │ │ │ ├── train_ds_like_2node.sh │ │ │ └── train_ds_like_4node.sh │ │ ├── export │ │ │ ├── README.md │ │ │ ├── knowledge_distillation │ │ │ │ └── pretrain_gpt_modelopt.py │ │ │ ├── ptq_and_trtllm_export │ │ │ │ ├── README.md │ │ │ │ ├── ptq_trtllm_llama2_7b.sh │ │ │ │ ├── ptq_trtllm_llama3_1_8b.sh │ │ │ │ ├── ptq_trtllm_llama3_8b.sh │ │ │ │ ├── ptq_trtllm_minitron_8b.sh │ │ │ │ ├── ptq_trtllm_mistral_12b.sh │ │ │ │ ├── ptq_trtllm_mixtral_8x7b.sh │ │ │ │ ├── text_generation_ptq.py │ │ │ │ └── trtllm_text_generation.py │ │ │ └── trtllm_export │ │ │ │ ├── README.md │ │ │ │ ├── distributed_export │ │ │ │ └── gpt_distributed_gpu_export.py │ │ │ │ └── single_device_export │ │ │ │ └── gpt_single_device_cpu_export.py │ │ ├── gpt3 │ │ │ ├── README.md │ │ │ ├── gpt_config.yaml │ │ │ ├── run_345M.sh │ │ │ ├── run_345M_memory.sh │ │ │ ├── run_345M_variable_memory.sh │ │ │ ├── run_857M.sh │ │ │ ├── train_gpt3_175b_distributed.sh │ │ │ ├── train_gpt3_345M_distributed.sh │ │ │ ├── train_gpt3_345M_distributed_memory.sh │ │ │ ├── train_gpt3_345M_distributed_variable_memory.sh │ │ │ └── train_gpt3_857M_distributed.sh │ │ ├── inference │ │ │ ├── README.md │ │ │ ├── gpt │ │ │ │ └── gpt_batch_inference.py │ │ │ ├── llama_mistral │ │ │ │ ├── huggingface_reference.py │ │ │ │ ├── run_text_generation_llama3.1.sh │ │ │ │ ├── run_text_generation_llama3.sh │ │ │ │ └── run_text_generation_mistral.sh │ │ │ ├── run_text_generation_server_345M.sh │ │ │ ├── run_text_generation_server_345M_8_tensor_parallel.sh │ │ │ └── t5 │ │ │ │ └── simple_t5_batch_inference.py │ │ ├── mamba │ │ │ ├── .gitignore │ │ │ ├── Dockerfile │ │ │ ├── README.md │ │ │ ├── run_text_gen_server_8b.sh │ │ │ ├── run_text_gen_server_8b_gpt3.sh │ │ │ └── train.sh │ │ ├── mixtral │ │ │ ├── Llama2tokenizer.model │ │ │ ├── README.md │ │ │ ├── run_moe.sh │ │ │ ├── train_mixtral_8x7b_distributed.sh │ │ │ └── train_moe.sh │ │ ├── multimodal │ │ │ ├── Dockerfile │ │ │ ├── README.md │ │ │ ├── assets │ │ │ │ └── pretrain_curves.png │ │ │ ├── combine_lm_vision_checkpoints.sh │ │ │ ├── combine_state_dicts.py │ │ │ ├── config.py │ │ │ ├── convert_llava_pretrain_to_wds.py │ │ │ ├── dataloader_provider.py │ │ │ ├── dataset_helpers.py │ │ │ ├── evaluation │ │ │ │ ├── evaluate_ai2d.py │ │ │ │ ├── evaluate_chartqa.py │ │ │ │ ├── evaluate_coco.py │ │ │ │ ├── evaluate_mathvista.py │ │ │ │ ├── evaluate_mmmu.py │ │ │ │ ├── evaluate_ocrbench.py │ │ │ │ ├── evaluate_textvqa.py │ │ │ │ ├── evaluate_vqav2.py │ │ │ │ └── evaluation_datasets.py │ │ │ ├── image_processing.py │ │ │ ├── layer_specs.py │ │ │ ├── manual_prompts.json │ │ │ ├── model.py │ │ │ ├── model_converter │ │ │ │ ├── clip_converter.py │ │ │ │ ├── internvit_converter.py │ │ │ │ ├── siglip_converter.py │ │ │ │ └── vision_model_tester.py │ │ │ ├── multimodal_args.py │ │ │ ├── nvlm │ │ │ │ ├── README.md │ │ │ │ ├── internvit.py │ │ │ │ ├── nvlm_prompts.json │ │ │ │ ├── pp_checkpoint_converter.py │ │ │ │ ├── pretrain_blend.yaml │ │ │ │ ├── pretrain_qwen20_72b_internvit_6b.sh │ │ │ │ ├── pretrain_yi_34b_internvit_6b.sh │ │ │ │ ├── run_text_generation_qwen20_72b_internvit_6b.sh │ │ │ │ ├── run_text_generation_qwen25_7b_siglip.sh │ │ │ │ ├── run_text_generation_yi_34b_internvit_6b.sh │ │ │ │ ├── sft_34b_internvit.sh │ │ │ │ ├── sft_blend.yaml │ │ │ │ └── sft_qwen20_72b_internvit_6b.sh │ │ │ ├── pretrain_dataset.yaml │ │ │ ├── pretrain_mistral_clip.sh │ │ │ ├── run_text_generation.py │ │ │ ├── sft_dataset.yaml │ │ │ ├── sft_mistral_clip.sh │ │ │ ├── text_generation_mistral_clip.sh │ │ │ └── train.py │ │ ├── retro │ │ │ ├── README.md │ │ │ ├── preprocess_data.sh │ │ │ └── train_retro_2b_distributed.sh │ │ ├── run_simple_mcore_train_loop.py │ │ └── t5 │ │ │ ├── README.md │ │ │ ├── t5_mcore_train_curve.png │ │ │ └── train_t5_220m_distributed.sh │ ├── gpt2-merges.txt │ ├── gpt2-vocab.json │ ├── images │ │ ├── model_table.png │ │ ├── strong_scaling.png │ │ └── weak_scaling.png │ ├── log │ ├── megatron │ │ ├── core │ │ │ ├── QuickStart.md │ │ │ ├── README.md │ │ │ ├── README_STRAGGLER.md │ │ │ ├── __init__.py │ │ │ ├── config_logger.py │ │ │ ├── datasets │ │ │ │ ├── Makefile │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_dataset.py │ │ │ │ ├── blended_dataset.py │ │ │ │ ├── blended_megatron_dataset_builder.py │ │ │ │ ├── blended_megatron_dataset_config.py │ │ │ │ ├── gpt_dataset.py │ │ │ │ ├── helpers.cpp │ │ │ │ ├── helpers.py │ │ │ │ ├── indexed_dataset.py │ │ │ │ ├── masked_dataset.py │ │ │ │ ├── megatron_dataset.py │ │ │ │ ├── megatron_tokenizer.py │ │ │ │ ├── multimodal_dataset.py │ │ │ │ ├── readme.md │ │ │ │ ├── retro │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bert_embedders.py │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── gpt_chunk_datasets.py │ │ │ │ │ │ └── tokenizers.py │ │ │ │ │ ├── db │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── dataset.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── external_libs.py │ │ │ │ │ ├── index │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── factory.py │ │ │ │ │ │ ├── index.py │ │ │ │ │ │ ├── indexes │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── faiss_base.py │ │ │ │ │ │ │ └── faiss_par_add.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── validate.py │ │ │ │ │ ├── query │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── gpt_chunk_dataset.py │ │ │ │ │ │ ├── multi_split_gpt_dataset.py │ │ │ │ │ │ ├── query.py │ │ │ │ │ │ ├── retro_dataset.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ └── utils.py │ │ │ │ ├── t5_dataset.py │ │ │ │ ├── utils.py │ │ │ │ └── utils_s3.py │ │ │ ├── dist_checkpointing │ │ │ │ ├── __init__.py │ │ │ │ ├── core.py │ │ │ │ ├── dict_utils.py │ │ │ │ ├── exchange_utils.py │ │ │ │ ├── mapping.py │ │ │ │ ├── optimizer.py │ │ │ │ ├── serialization.py │ │ │ │ ├── state_dict_transformation.py │ │ │ │ ├── strategies │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── async_utils.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── common.py │ │ │ │ │ ├── filesystem_async.py │ │ │ │ │ ├── fully_parallel.py │ │ │ │ │ ├── resharding.py │ │ │ │ │ ├── state_dict_saver.py │ │ │ │ │ ├── tensorstore.py │ │ │ │ │ ├── torch.py │ │ │ │ │ ├── two_stage.py │ │ │ │ │ └── zarr.py │ │ │ │ ├── utils.py │ │ │ │ └── validation.py │ │ │ ├── distributed │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── data_parallel_base.py │ │ │ │ ├── distributed_data_parallel.py │ │ │ │ ├── distributed_data_parallel_config.py │ │ │ │ ├── finalize_model_grads.py │ │ │ │ ├── param_and_grad_buffer.py │ │ │ │ └── torch_fully_sharded_data_parallel.py │ │ │ ├── enums.py │ │ │ ├── export │ │ │ │ ├── __init__.py │ │ │ │ ├── data_type.py │ │ │ │ ├── export_config.py │ │ │ │ ├── model_type.py │ │ │ │ └── trtllm │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── engine_builder │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── trtllm_engine_builder.py │ │ │ │ │ ├── model_to_trllm_mapping │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── default_conversion_dict.py │ │ │ │ │ ├── trt_model_config.py │ │ │ │ │ ├── trt_model_type.py │ │ │ │ │ ├── trtllm_helper.py │ │ │ │ │ ├── trtllm_layers.py │ │ │ │ │ └── trtllm_weights_converter │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── distributed_trtllm_model_weights_converter.py │ │ │ │ │ └── single_device_trtllm_model_weights_converter.py │ │ │ ├── extensions │ │ │ │ ├── __init__.py │ │ │ │ └── transformer_engine.py │ │ │ ├── fusions │ │ │ │ ├── __init__.py │ │ │ │ ├── fused_bias_dropout.py │ │ │ │ ├── fused_bias_geglu.py │ │ │ │ ├── fused_bias_gelu.py │ │ │ │ ├── fused_bias_swiglu.py │ │ │ │ ├── fused_cross_entropy.py │ │ │ │ ├── fused_layer_norm.py │ │ │ │ └── fused_softmax.py │ │ │ ├── inference │ │ │ │ ├── __init__.py │ │ │ │ ├── ammo_support │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── gpt │ │ │ │ │ │ ├── model_specs.py │ │ │ │ │ │ └── state_dict_hooks.py │ │ │ │ ├── common_inference_params.py │ │ │ │ ├── communication_utils.py │ │ │ │ ├── engines │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── abstract_engine.py │ │ │ │ │ └── mcore_engine.py │ │ │ │ ├── inference_request.py │ │ │ │ ├── model_inference_wrappers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── abstract_model_inference_wrapper.py │ │ │ │ │ ├── gpt │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── gpt_inference_wrapper.py │ │ │ │ │ ├── inference_wrapper_config.py │ │ │ │ │ └── t5 │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── t5_inference_wrapper.py │ │ │ │ ├── modelopt_support │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── gpt │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── model_specs.py │ │ │ │ │ │ └── state_dict_hooks.py │ │ │ │ ├── sampling_params.py │ │ │ │ ├── scheduler.py │ │ │ │ ├── text_generation_controllers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── encoder_decoder_text_generation_controller.py │ │ │ │ │ ├── simple_text_generation_controller.py │ │ │ │ │ └── text_generation_controller.py │ │ │ │ └── utils.py │ │ │ ├── inference_params.py │ │ │ ├── jit.py │ │ │ ├── model_parallel_config.py │ │ │ ├── models │ │ │ │ ├── T5 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── t5_model.py │ │ │ │ │ └── t5_spec.py │ │ │ │ ├── __init__.py │ │ │ │ ├── bert │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bert_layer_specs.py │ │ │ │ │ ├── bert_lm_head.py │ │ │ │ │ ├── bert_model.py │ │ │ │ │ └── pooler.py │ │ │ │ ├── common │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── embeddings │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── language_model_embedding.py │ │ │ │ │ │ ├── relative_pos_embedding.py │ │ │ │ │ │ ├── rope_utils.py │ │ │ │ │ │ ├── rotary_pos_embedding.py │ │ │ │ │ │ └── yarn_rotary_pos_embedding.py │ │ │ │ │ ├── language_module │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── language_module.py │ │ │ │ │ └── vision_module │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── vision_module.py │ │ │ │ ├── deepseek │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── deepseek_layer_specs.py │ │ │ │ │ └── deepseek_model.py │ │ │ │ ├── gpt │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── gpt_layer_specs.py │ │ │ │ │ ├── gpt_model.py │ │ │ │ │ └── moe_module_specs.py │ │ │ │ ├── mamba │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── mamba_layer_specs.py │ │ │ │ │ └── mamba_model.py │ │ │ │ ├── multimodal │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── llava_model.py │ │ │ │ │ └── llava_spec.py │ │ │ │ ├── retro │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_attention.py │ │ │ │ │ ├── config.py │ │ │ │ │ ├── decoder_attention.py │ │ │ │ │ ├── decoder_spec.py │ │ │ │ │ ├── encoder_attention.py │ │ │ │ │ ├── encoder_spec.py │ │ │ │ │ ├── model.py │ │ │ │ │ └── utils.py │ │ │ │ └── vision │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── clip_vit_model.py │ │ │ │ │ ├── multimodal_projector.py │ │ │ │ │ └── vit_layer_specs.py │ │ │ ├── num_microbatches_calculator.py │ │ │ ├── optimizer │ │ │ │ ├── __init__.py │ │ │ │ ├── clip_grads.py │ │ │ │ ├── distrib_optimizer.py │ │ │ │ ├── grad_scaler.py │ │ │ │ ├── optimizer.py │ │ │ │ └── optimizer_config.py │ │ │ ├── optimizer_param_scheduler.py │ │ │ ├── package_info.py │ │ │ ├── packed_seq_params.py │ │ │ ├── parallel_state.py │ │ │ ├── pipeline_parallel │ │ │ │ ├── __init__.py │ │ │ │ ├── p2p_communication.py │ │ │ │ └── schedules.py │ │ │ ├── requirements.txt │ │ │ ├── rerun_state_machine.py │ │ │ ├── ssm │ │ │ │ ├── __init__.py │ │ │ │ ├── mamba_block.py │ │ │ │ ├── mamba_hybrid_layer_allocation.py │ │ │ │ ├── mamba_layer.py │ │ │ │ ├── mamba_mixer.py │ │ │ │ └── triton_cache_manager.py │ │ │ ├── tensor_parallel │ │ │ │ ├── __init__.py │ │ │ │ ├── cross_entropy.py │ │ │ │ ├── data.py │ │ │ │ ├── layers.py │ │ │ │ ├── mappings.py │ │ │ │ ├── random.py │ │ │ │ └── utils.py │ │ │ ├── timers.py │ │ │ ├── transformer │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── cuda_graphs.py │ │ │ │ ├── custom_layers │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── transformer_engine.py │ │ │ │ ├── deepseek_transformer_block.py │ │ │ │ ├── deepseek_transformer_layer.py │ │ │ │ ├── dot_product_attention.py │ │ │ │ ├── enums.py │ │ │ │ ├── identity_op.py │ │ │ │ ├── mlp.py │ │ │ │ ├── module.py │ │ │ │ ├── moe │ │ │ │ │ ├── README.md │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── experts.py │ │ │ │ │ ├── grouped_gemm_util.py │ │ │ │ │ ├── legacy_a2a_token_dispatcher.py │ │ │ │ │ ├── moe_layer.py │ │ │ │ │ ├── moe_utils.py │ │ │ │ │ ├── router.py │ │ │ │ │ ├── shared_experts.py │ │ │ │ │ ├── token_dispatcher.py │ │ │ │ │ └── upcycling_utils.py │ │ │ │ ├── multi_latent_attention.py │ │ │ │ ├── spec_utils.py │ │ │ │ ├── torch_layer_norm.py │ │ │ │ ├── torch_norm.py │ │ │ │ ├── transformer_block.py │ │ │ │ ├── transformer_config.py │ │ │ │ ├── transformer_layer.py │ │ │ │ └── utils.py │ │ │ └── utils.py │ │ ├── inference │ │ │ ├── __init__.py │ │ │ ├── algos │ │ │ │ ├── __init__.py │ │ │ │ └── distillation.py │ │ │ ├── arguments.py │ │ │ ├── checkpointing.py │ │ │ ├── docs │ │ │ │ └── distillation.md │ │ │ ├── endpoints │ │ │ │ ├── common.py │ │ │ │ └── completions.py │ │ │ ├── gpt │ │ │ │ ├── __init__.py │ │ │ │ ├── loss_func.py │ │ │ │ └── model_provider.py │ │ │ ├── static │ │ │ │ └── index.html │ │ │ ├── text_generation │ │ │ │ ├── __init__.py │ │ │ │ ├── api.py │ │ │ │ ├── beam_utils.py │ │ │ │ ├── communication.py │ │ │ │ ├── forward_step.py │ │ │ │ ├── generation.py │ │ │ │ ├── sampling.py │ │ │ │ └── tokenization.py │ │ │ └── text_generation_server.py │ │ ├── legacy │ │ │ ├── data │ │ │ │ ├── __init__.py │ │ │ │ ├── autoaugment.py │ │ │ │ ├── biencoder_dataset_utils.py │ │ │ │ ├── data_samplers.py │ │ │ │ ├── dataset_utils.py │ │ │ │ ├── ict_dataset.py │ │ │ │ ├── image_folder.py │ │ │ │ ├── multimodal_dataset.py │ │ │ │ ├── orqa_wiki_dataset.py │ │ │ │ ├── realm_dataset_utils.py │ │ │ │ ├── realm_index.py │ │ │ │ └── vit_dataset.py │ │ │ ├── fp16_deprecated │ │ │ │ └── loss_scaler.py │ │ │ ├── fused_kernels │ │ │ │ ├── __init__.py │ │ │ │ ├── compat.h │ │ │ │ ├── tests │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_fused_kernels.py │ │ │ │ └── type_shim.h │ │ │ ├── indexer.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_model.py │ │ │ │ ├── biencoder_model.py │ │ │ │ ├── classification.py │ │ │ │ ├── enums.py │ │ │ │ ├── fused_bias_gelu.py │ │ │ │ ├── fused_layer_norm.py │ │ │ │ ├── fused_softmax.py │ │ │ │ ├── gpt_model.py │ │ │ │ ├── language_model.py │ │ │ │ ├── module.py │ │ │ │ ├── multiple_choice.py │ │ │ │ ├── realm_model.py │ │ │ │ ├── rms_norm.py │ │ │ │ ├── t5_model.py │ │ │ │ ├── transformer.py │ │ │ │ ├── utils.py │ │ │ │ └── vision │ │ │ │ │ ├── classification.py │ │ │ │ │ ├── dino.py │ │ │ │ │ ├── esvit_swin_backbone.py │ │ │ │ │ ├── inpainting.py │ │ │ │ │ ├── knn_monitor.py │ │ │ │ │ ├── mit_backbone.py │ │ │ │ │ ├── swin_backbone.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── vit_backbone.py │ │ │ └── mpu │ │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── commons.py │ │ │ │ ├── test_cross_entropy.py │ │ │ │ ├── test_data.py │ │ │ │ ├── test_initialize.py │ │ │ │ ├── test_layers.py │ │ │ │ └── test_random.py │ │ └── training │ │ │ ├── __init__.py │ │ │ ├── activations.py │ │ │ ├── arguments.py │ │ │ ├── async_utils.py │ │ │ ├── checkpointing.py │ │ │ ├── dist_signal_handler.py │ │ │ ├── ft_integration.py │ │ │ ├── global_vars.py │ │ │ ├── initialize.py │ │ │ ├── log_handler.py │ │ │ ├── one_logger_utils.py │ │ │ ├── theoretical_memory_usage.py │ │ │ ├── tokenizer │ │ │ ├── __init__.py │ │ │ ├── bert_tokenization.py │ │ │ ├── gpt2_tokenization.py │ │ │ ├── multimodal_tokenizer.py │ │ │ └── tokenizer.py │ │ │ ├── training.py │ │ │ ├── utils.py │ │ │ └── yaml_arguments.py │ ├── mypy.ini │ ├── output.log │ ├── pretrain_bert.py │ ├── pretrain_gpt.py │ ├── pretrain_ict.py │ ├── pretrain_mamba.py │ ├── pretrain_retro.py │ ├── pretrain_t5.py │ ├── pretrain_vision_classify.py │ ├── pretrain_vision_dino.py │ ├── pretrain_vision_inpaint.py │ ├── pretrain_vlm.py │ ├── pyproject.toml │ ├── pytest.ini │ ├── requirements │ │ ├── pytorch_24.01 │ │ │ └── requirements.txt │ │ ├── pytorch_24.07 │ │ │ └── requirements.txt │ │ └── pytorch_24.10 │ │ │ └── requirements.txt │ ├── setup.py │ ├── tasks │ │ ├── data_utils.py │ │ ├── ensemble_classifier.py │ │ ├── eval_utils.py │ │ ├── finetune_utils.py │ │ ├── glue │ │ │ ├── data.py │ │ │ ├── finetune.py │ │ │ ├── mnli.py │ │ │ └── qqp.py │ │ ├── main.py │ │ ├── msdp │ │ │ ├── README.md │ │ │ ├── evaluate.py │ │ │ ├── main.py │ │ │ ├── metrics.py │ │ │ ├── preprocessing.py │ │ │ └── prompt.py │ │ ├── orqa │ │ │ ├── README.md │ │ │ ├── evaluate_orqa.py │ │ │ ├── evaluate_utils.py │ │ │ ├── supervised │ │ │ │ ├── data.py │ │ │ │ ├── eval_utils.py │ │ │ │ └── finetune.py │ │ │ └── unsupervised │ │ │ │ ├── nq.py │ │ │ │ ├── qa_utils.py │ │ │ │ └── tokenizers.py │ │ ├── quantize │ │ │ └── calibrate_gpt.py │ │ ├── race │ │ │ ├── data.py │ │ │ └── finetune.py │ │ ├── vision │ │ │ ├── classification │ │ │ │ ├── classification.py │ │ │ │ └── eval_utils.py │ │ │ ├── finetune_utils.py │ │ │ ├── main.py │ │ │ └── segmentation │ │ │ │ ├── cityscapes.py │ │ │ │ ├── data.py │ │ │ │ ├── finetune_segformer.py │ │ │ │ ├── finetune_setr.py │ │ │ │ ├── metrics.py │ │ │ │ ├── seg_heads.py │ │ │ │ ├── seg_models.py │ │ │ │ ├── transforms.py │ │ │ │ └── utils.py │ │ └── zeroshot_gpt │ │ │ ├── datasets.py │ │ │ ├── detokenizer.py │ │ │ └── evaluate.py │ ├── tests │ │ ├── __init__.py │ │ ├── functional_tests │ │ │ ├── __init__.py │ │ │ ├── python_test_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── common.py │ │ │ │ ├── conftest.py │ │ │ │ ├── get_test_results_from_tensorboard_logs.py │ │ │ │ ├── test_regular_pipeline.py │ │ │ │ └── test_resume_checkpoint_pipeline.py │ │ │ ├── shell_test_utils │ │ │ │ ├── _run_training.sh │ │ │ │ └── run_ci_test.sh │ │ │ └── test_cases │ │ │ │ ├── bert │ │ │ │ ├── bert_mr_mcore_tp2_pp2_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_mcore_tp2_pp2_frozen_resume_torch_dist_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_tp1_pp4_vp2_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_tp2_pp2_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_nightly_dgx_a100_1N8G_tp1_pp2 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── bert_nightly_dgx_a100_1N8G_tp4_pp1 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ └── bert_release │ │ │ │ │ ├── golden_values_0.9.0.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── common │ │ │ │ └── ckpt_converter │ │ │ │ │ ├── __main__.py │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt-nemo │ │ │ │ ├── gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_8experts_tp2_ep2_pp1_dgx_a100_1N8G │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_tp2_pp4_vp3_seq_par_overlap_p2p_dgx_a100_1N8G │ │ │ │ │ └── model_config.yaml │ │ │ │ └── gpt3-nemo_126m_mr_mbs4_gbs64_mcore_te_tp1_pp1_dgx_a100_1N8G │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt │ │ │ │ ├── gpt3_15b_8t_release │ │ │ │ │ ├── golden_values_0.8.0.json │ │ │ │ │ ├── golden_values_0.9.0.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_15b_8t_release_sm │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_fsdp2_resume_torch_dist_te │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1 │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_7b_mr_dgx_a100_1N8G_tp1_pp4_memory_speed │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_7b_mr_dgx_a100_1N8G_tp4_pp1_memory_speed │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_tunable_overlap_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_fsdp2_resume_torch_dist_dgx_a100_1N8G │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_multi_dist_optimizer_instances_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp1_te_8experts_etp1_ep4_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_te_tp2_pp2_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_tp2_pp2_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── gpt3_mr_tp2_pp2_resume_torch_dgx_a100_1N8G │ │ │ │ │ └── model_config.yaml │ │ │ │ └── gpt3_nightly_mcore_te_tp2_pp1_modelopt_distill_resume │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── mixtral │ │ │ │ ├── mixtral_8x22b_tp2pp8ep8vpp1_release │ │ │ │ │ ├── golden_values_0.9.0.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── mixtral_8x7b_alltoall_tp2pp4ep4_release │ │ │ │ │ ├── golden_values_0.8.0.json │ │ │ │ │ ├── golden_values_0.9.0.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── mixtral_8x7b_alltoall_tp2pp4ep4_release_sm │ │ │ │ │ └── model_config.yaml │ │ │ │ └── mixtral_8x7b_tp1pp4ep8vpp8_release │ │ │ │ │ ├── golden_values_0.9.0.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── multimodal-llava │ │ │ │ ├── multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ ├── multimodal_llava_mr_mcore_te_tp4_pp1_freeze_vit_freeze_lm_dist_opt_dgx_a100_1N8G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ └── multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G │ │ │ │ │ ├── golden_values_dev.json │ │ │ │ │ ├── golden_values_lts.json │ │ │ │ │ └── model_config.yaml │ │ │ │ └── t5 │ │ │ │ ├── t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_te_tp2_pp2_frozen_resume_torch_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_te_tp2_pp2_resume_torch_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_te_tp4_pp1_resume_torch_dist_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_mr_mcore_tp4_pp1_resume_torch_dist_dgx_a100_1N8G │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp1_pp1_vp1_resume_torch │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_nightly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1 │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_vp1_resume_torch │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_nightly_dgx_a100_1N8G_mcore_tp2_pp1_vp1 │ │ │ │ ├── golden_values_dev.json │ │ │ │ ├── golden_values_lts.json │ │ │ │ └── model_config.yaml │ │ │ │ ├── t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1 │ │ │ │ └── golden_values_lts.json │ │ │ │ ├── t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel │ │ │ │ └── golden_values_lts.json │ │ │ │ └── t5_release │ │ │ │ ├── golden_values_0.9.0.json │ │ │ │ └── model_config.yaml │ │ ├── test_utils │ │ │ ├── python_scripts │ │ │ │ ├── common.py │ │ │ │ ├── download_golden_values.py │ │ │ │ ├── generate_jet_trigger_job.py │ │ │ │ ├── generate_local_jobs.py │ │ │ │ └── launch_jet_workload.py │ │ │ ├── recipes │ │ │ │ ├── _build-mcore-dev.yaml │ │ │ │ ├── _build-mcore-lts.yaml │ │ │ │ ├── _build-nemo.yaml │ │ │ │ ├── bert.yaml │ │ │ │ ├── gpt-modelopt.yaml │ │ │ │ ├── gpt-nemo.yaml │ │ │ │ ├── gpt.yaml │ │ │ │ ├── multimodal-llava.yaml │ │ │ │ ├── t5.yaml │ │ │ │ └── unit-tests.yaml │ │ │ └── shell_scripts │ │ │ │ └── notify.sh │ │ └── unit_tests │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── test_bin_reader.py │ │ │ ├── test_builder.py │ │ │ ├── test_gpt_dataset.py │ │ │ ├── test_multimodal_dataset.py │ │ │ ├── test_preprocess_data.py │ │ │ └── test_preprocess_mmdata.py │ │ │ ├── dist_checkpointing │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── common.py │ │ │ │ ├── test_bert_model.py │ │ │ │ ├── test_gpt_model.py │ │ │ │ ├── test_mamba.py │ │ │ │ ├── test_mlp_glu.py │ │ │ │ ├── test_moe_experts.py │ │ │ │ ├── test_retro_model.py │ │ │ │ └── test_t5_model.py │ │ │ ├── test_async_save.py │ │ │ ├── test_cached_metadata.py │ │ │ ├── test_flattened_resharding.py │ │ │ ├── test_fp8.py │ │ │ ├── test_fully_parallel.py │ │ │ ├── test_local.py │ │ │ ├── test_mapping.py │ │ │ ├── test_nonpersistent.py │ │ │ ├── test_optimizer.py │ │ │ ├── test_serialization.py │ │ │ └── utils.py │ │ │ ├── distributed │ │ │ ├── test_grad_reduce_for_replicated_embedder.py │ │ │ ├── test_grad_sync_with_expert_parallel.py │ │ │ └── test_param_and_grad_buffer.py │ │ │ ├── export │ │ │ └── trtllm │ │ │ │ ├── __init__.py │ │ │ │ ├── test_distributed_fp8.py │ │ │ │ ├── test_single_device_fp8.py │ │ │ │ ├── test_trtllm_distributed_gpu_converter.py │ │ │ │ ├── test_trtllm_helper.py │ │ │ │ ├── test_trtllm_layers.py │ │ │ │ └── test_trtllm_single_device_converter.py │ │ │ ├── fusions │ │ │ └── test_torch_softmax.py │ │ │ ├── inference │ │ │ ├── __init__.py │ │ │ ├── engines │ │ │ │ ├── __init__.py │ │ │ │ └── test_mcore_engine.py │ │ │ ├── model_inference_wrappers │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt │ │ │ │ │ └── test_gpt_inference_wrapper.py │ │ │ │ ├── t5 │ │ │ │ │ └── test_t5_inference_wrapper.py │ │ │ │ └── test_model_inference_wrapper_config.py │ │ │ ├── test_common_inference_params.py │ │ │ ├── test_flash_decode.py │ │ │ ├── test_inference_utils.py │ │ │ ├── test_modelopt_gpt_model.py │ │ │ ├── test_scheduler.py │ │ │ └── text_generation_controllers │ │ │ │ ├── __init__.py │ │ │ │ ├── test_encoder_decoder_text_generation_controller.py │ │ │ │ └── test_simple_text_generation_controller.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── test_base_embedding.py │ │ │ ├── test_bert_model.py │ │ │ ├── test_clip_vit_model.py │ │ │ ├── test_gpt_model.py │ │ │ ├── test_llava_model.py │ │ │ ├── test_mamba_model.py │ │ │ ├── test_multimodal_projector.py │ │ │ └── test_t5_model.py │ │ │ ├── pipeline_parallel │ │ │ ├── __init__.py │ │ │ ├── test_helpers.py │ │ │ └── test_schedules.py │ │ │ ├── ssm │ │ │ ├── test_mamba_block.py │ │ │ ├── test_mamba_hybrid_layer_allocation.py │ │ │ ├── test_mamba_layer.py │ │ │ └── test_mamba_mixer.py │ │ │ ├── tensor_parallel │ │ │ ├── __init__.py │ │ │ ├── test_cross_entropy.py │ │ │ ├── test_data.py │ │ │ ├── test_initialization.py │ │ │ ├── test_layers.py │ │ │ ├── test_mappings.py │ │ │ ├── test_random.py │ │ │ └── test_tensor_parallel_utils.py │ │ │ ├── test_basic.py │ │ │ ├── test_imports.py │ │ │ ├── test_inference.py │ │ │ ├── test_local_multi_tensor_fns.py │ │ │ ├── test_model_configs.py │ │ │ ├── test_num_microbatches_calculator.py │ │ │ ├── test_optimizer.py │ │ │ ├── test_optimizer_param_scheduler.py │ │ │ ├── test_parallel_state.py │ │ │ ├── test_tokenizer.py │ │ │ ├── test_training.py │ │ │ ├── test_utilities.py │ │ │ ├── test_utils.py │ │ │ └── transformer │ │ │ ├── __init__.py │ │ │ ├── moe │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_a2a_token_dispatcher.py │ │ │ ├── test_aux_loss.py │ │ │ ├── test_grouped_mlp.py │ │ │ ├── test_moe_layer.py │ │ │ ├── test_routers.py │ │ │ ├── test_sequential_mlp.py │ │ │ ├── test_shared_experts.py │ │ │ ├── test_token_dispatcher.py │ │ │ └── test_upcycling.py │ │ │ ├── test_attention.py │ │ │ ├── test_attention_packed_seq.py │ │ │ ├── test_core_attention.py │ │ │ ├── test_mlp.py │ │ │ ├── test_module.py │ │ │ ├── test_multi_latent_attention.py │ │ │ ├── test_relative_attention.py │ │ │ ├── test_retro_attention.py │ │ │ ├── test_rope.py │ │ │ ├── test_spec_customization.py │ │ │ ├── test_transformer_block.py │ │ │ └── test_transformer_layer.py │ ├── tools │ │ ├── autoformat.sh │ │ ├── bert_embedding │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── embed.py │ │ │ ├── external_libs.py │ │ │ └── huggingface.py │ │ ├── checkpoint │ │ │ ├── convert.py │ │ │ ├── hybrid_conversion.py │ │ │ ├── loader_llama_mistral.py │ │ │ ├── loader_mcore.py │ │ │ ├── loader_megatron.py │ │ │ ├── loader_mixtral_hf.py │ │ │ ├── saver_mcore.py │ │ │ ├── saver_megatron.py │ │ │ ├── schema_base.py │ │ │ ├── schema_mcore.py │ │ │ └── utils.py │ │ ├── copyright.sh │ │ ├── linter.py │ │ ├── merge_datasets.py │ │ ├── openwebtext │ │ │ ├── README.md │ │ │ ├── add_id.py │ │ │ ├── blacklist_urls.py │ │ │ ├── cleanup_dataset.py │ │ │ ├── cleanup_fix_dataset.py │ │ │ ├── filter_ngrams.py │ │ │ ├── find_duplicates.py │ │ │ ├── group_duplicate_url.py │ │ │ ├── merge_jsons.py │ │ │ └── remove_group_duplicates.py │ │ ├── preprocess_data.py │ │ ├── preprocess_data_nmt.py │ │ ├── preprocess_mmdata.py │ │ ├── report_theoretical_memory.py │ │ ├── retro │ │ │ ├── README.md │ │ │ ├── build_db.md │ │ │ ├── cli │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── cli.py │ │ │ ├── config_utils.py │ │ │ ├── docker │ │ │ │ └── Dockerfile │ │ │ ├── preprocess_data.py │ │ │ ├── sft │ │ │ │ ├── README.md │ │ │ │ ├── dataset_conv.py │ │ │ │ ├── open_inst.sh │ │ │ │ ├── sft_retro.py │ │ │ │ └── sft_retro_lm.sh │ │ │ └── text_generation │ │ │ │ ├── evaluate.py │ │ │ │ ├── metrics.py │ │ │ │ ├── retro_api.py │ │ │ │ ├── retro_generate.sh │ │ │ │ ├── retro_generation.py │ │ │ │ └── retro_text_generation.py │ │ ├── run_mamba_text_generation_server.py │ │ ├── run_text_generation_server.py │ │ ├── run_vlm_text_generation.py │ │ └── text_generation_cli.py │ └── wandb │ │ ├── debug-cli.root.log │ │ └── settings └── README.md ├── docs └── images │ ├── flowchat.png │ └── icon.png ├── pylint.conf └── vendor_zoo ├── AWS ├── Inferentia.json ├── Inferentia2.json ├── Trainium.json ├── mem_16.png └── mem_17.png ├── Graphcore ├── IPU C600.json ├── image_12.png └── mem_12.png ├── Habana ├── Gaudi2.json ├── image_14.png ├── mem_14.png └── pe_14.png ├── Moffett ├── S10.json ├── S30.json ├── S4.json ├── image_7.png ├── image_8.png ├── image_9.png ├── mem_7.png ├── mem_8.png ├── mem_9.png ├── pe_7.png └── pe_9.png ├── NVIDIA ├── A10-PCIe.json ├── A100-PCIe.json ├── A100-SXM4.json ├── A30-PCIe.json ├── H100-PCIe.json ├── H100-SXM5.json ├── T4.json ├── image_0.png ├── image_1.png ├── image_2.png ├── image_3.png ├── image_4.png ├── image_5.png └── image_6.png ├── QUALCOMM ├── AIC100.json ├── image_10.png ├── mem_10.png └── pe_10.png └── Stream ├── STC920.json └── image_13.png /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/.gitignore -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | ByteMLPerf 2 | Copyright 2023 ByteDance Ltd. and/or its affiliates. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/README.md -------------------------------------------------------------------------------- /README.zh_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/README.zh_CN.md -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/VERSION -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/README.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/README.zh_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/README.zh_CN.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/__init__.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/CPU/CPU.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/CPU/CPU.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/CPU/calculate_cpu_diff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/CPU/calculate_cpu_diff.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/CPU/calculate_cpu_diff.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/CPU/calculate_cpu_diff.sh -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/CPU/compile_backend_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/CPU/compile_backend_cpu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/CPU/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/CPU/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/CPU/runtime_backend_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/CPU/runtime_backend_cpu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/HPU.json: -------------------------------------------------------------------------------- 1 | [ 2 | ] -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/README.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/bert/bf16.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/bert/bf16.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/bert/fp32.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/bert/fp32.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/compile_backend_hpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/compile_backend_hpu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/gaudi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/gaudi.png -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/gaudi2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/gaudi2.png -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/HPU/runtime_backend_hpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/HPU/runtime_backend_hpu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/.gitignore -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/IPU.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/IPU.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/README.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/README.zh_CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/README.zh_CN.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/__init__.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/compile_backend_ipu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/compile_backend_ipu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/engine.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/engine_poprt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/engine_poprt.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/passes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/passes/__init__.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/passes/deberta_pack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/passes/deberta_pack.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/IPU/runtime_backend_ipu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/IPU/runtime_backend_ipu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/SPU/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/SPU/README.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/SPU/base_compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/SPU/base_compile.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/SPU/compile_backend_spu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/SPU/compile_backend_spu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/SPU/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/SPU/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/SPU/runtime_backend_spu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/SPU/runtime_backend_spu.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/SPU/spu.json: -------------------------------------------------------------------------------- 1 | [ 2 | 3 | ] -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/STC/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/STC/README.md -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/STC/STC.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/STC/STC.jpg -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/STC/STC.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/STC/STC.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/STC/compile_backend_stc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/STC/compile_backend_stc.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/STC/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/STC/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/STC/runtime_backend_stc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/STC/runtime_backend_stc.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/compile_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/compile_backend.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/backends/runtime_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/backends/runtime_backend.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/core/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/core/configs/backend_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/core/configs/backend_store.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/core/configs/dataset_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/core/configs/dataset_store.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/core/configs/workload_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/core/configs/workload_store.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/core/perf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/core/perf_engine.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/data_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/fake_dataset/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/fake_dataset/data_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/fake_dataset/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/fake_dataset/test_accuracy.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_cail2019/data_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_cail2019/test_accuracy.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_cifar/data_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_cifar/test_accuracy.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_imagenet/data_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_imagenet/test_accuracy.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_squad/bert/evaluate.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_squad/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_squad/data_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/open_squad/test_accuracy.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/datasets/test_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/datasets/test_accuracy.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/launch.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/albert-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/albert-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/bert-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/bert-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/bert-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/bert-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/clip-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/clip-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/deberta-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/deberta-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/resnet50-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/resnet50-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/resnet50-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/resnet50-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/roberta-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/roberta-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/roformer-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/swin-large-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/swin-large-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/unet-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/unet-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/vae-decoder-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/vae-decoder-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/vae-encoder-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/vae-encoder-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/videobert-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/videobert-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/widedeep-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/widedeep-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/model_zoo/yolov5-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/model_zoo/yolov5-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/prepare_model_and_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/prepare_model_and_dataset.sh -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/reports/README: -------------------------------------------------------------------------------- 1 | benchmark reports 2 | -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/reports/SPU/bert-torch-fp32/result.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/reports/SPU/bert-torch-fp32/result.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/reports/STC/bert-tf-fp32/result.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/reports/STC/bert-tf-fp32/result.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/reports/STC/bert-torch-fp32/result.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/reports/STC/bert-torch-fp32/result.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/reports/reports_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/reports/reports_summary.png -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/reports/reports_summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/reports/reports_summary.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/build_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/build_pdf.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/convert.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/convert.sh -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/frozen_to_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/frozen_to_saved.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/h5_to_frozen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/h5_to_frozen.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/model_trt_convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/model_trt_convert.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/mxnet_to_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/mxnet_to_onnx.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/onnx_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/onnx_utils.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=2.6.0 2 | tf2onnx 3 | numpy 4 | torch==1.9.1 -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/saved_to_frozen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/saved_to_frozen.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/saved_to_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/saved_to_onnx.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/tf_fp32_to_fp16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/tf_fp32_to_fp16.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/tf_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/tf_utils.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/tools/torch_to_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/tools/torch_to_onnx.py -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/albert-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/albert-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/bert-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/bert-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/bert-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/bert-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/clip-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/clip-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/deberta-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/deberta-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/resnet50-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/resnet50-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/resnet50-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/resnet50-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/roberta-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/roberta-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/roformer-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/roformer-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/swin-large-torch-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/swin-large-torch-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/unet-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/unet-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/vae-decoder-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/vae-decoder-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/vae-encoder-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/vae-encoder-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/videobert-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/videobert-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/widedeep-tf-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/widedeep-tf-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/general_perf/workloads/yolov5-onnx-fp32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/general_perf/workloads/yolov5-onnx-fp32.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/README.md -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/gpu_ckpt_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/gpu_ckpt_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/gpu_inferencer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/gpu_inferencer.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/gpu_mp_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/gpu_mp_engine.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/gpu_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/gpu_sampler.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/gpu_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/gpu_scheduler.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/__init__.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_chatglm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_chatglm2.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_falcon.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_llama3.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/gpu_mixtral.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/modeling_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/modeling_falcon.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/modeling_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/modeling_llama3.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/modeling_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/modeling_mixtral.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/split_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/split_falcon.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/split_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/split_llama.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/model_impl/split_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/model_impl/split_mixtral.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/backends/GPU/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/backends/GPU/setup.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/bench_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/bench_model.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/benchmark/bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/benchmark/bench.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/benchmark/tensorrt-llm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/benchmark/tensorrt-llm/README.md -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/benchmark/tensorrt-llm/bench_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/benchmark/tensorrt-llm/bench_engine.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/core/ckpt_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/core/ckpt_loader.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/core/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/core/generation.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/core/inferencer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/core/inferencer.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/core/mp_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/core/mp_engine.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/core/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/core/sampler.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/core/scheduler.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/datasets/merged_52_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/datasets/merged_52_test.csv -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/datasets/test_mini.csv: -------------------------------------------------------------------------------- 1 | id,question,A,B,C,D 2 | 0,最早向中国介绍西方进化论的是____,严复,梁启超,康有为,谭嗣同 3 | -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/launch.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/model_zoo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/model_zoo/README.md -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/model_zoo/chatglm2-torch-fp16-6b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/model_zoo/chatglm2-torch-fp16-6b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/model_zoo/falcon-torch-bf16-180b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/model_zoo/falcon-torch-bf16-180b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/model_zoo/llama3-torch-bf16-70b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/model_zoo/llama3-torch-bf16-70b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/model_zoo/mixtral-torch-bf16-8x22b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/model_zoo/mixtral-torch-bf16-8x22b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/prepare_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/prepare_model.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/requirements.txt -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/script/extra_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/script/extra_datasets.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/script/lint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/script/lint.sh -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/script/proto.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/script/proto.sh -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/script/single_query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/script/single_query.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/server/endpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/server/endpoint.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/server/launch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/server/launch_server.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/server/pb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/server/pb.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/server/server.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/server/server.proto -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/server/server_pb2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/server/server_pb2.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/server/server_pb2_grpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/server/server_pb2_grpc.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/utils/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/utils/dist_utils.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/utils/logger.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/utils/ps_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/utils/ps_utils.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/utils/reporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/utils/reporter.py -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/workloads/chatglm2-torch-fp16-6b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/workloads/chatglm2-torch-fp16-6b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/workloads/falcon-torch-bf16-180b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/workloads/falcon-torch-bf16-180b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/workloads/llama3-torch-bf16-70b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/workloads/llama3-torch-bf16-70b.json -------------------------------------------------------------------------------- /byte_infer_perf/llm_perf/workloads/mixtral-torch-bf16-8x22b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_infer_perf/llm_perf/workloads/mixtral-torch-bf16-8x22b.json -------------------------------------------------------------------------------- /byte_micro_perf/.gitignore: -------------------------------------------------------------------------------- 1 | TORCH_PROFILER_RAW_DATA/ 2 | profiling/ 3 | reports/ 4 | kernel_meta/ -------------------------------------------------------------------------------- /byte_micro_perf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/README.md -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/backend_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/backend_gpu.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/add.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/add_rms_norm_dynamic_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/add_rms_norm_dynamic_quant.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/all_gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/all_gather.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/all_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/all_reduce.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/all_to_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/all_to_all.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/broadcast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/broadcast.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/cast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/cast.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/cos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/cos.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/device2device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/device2device.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/device2host.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/device2host.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/div.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/div.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/embedding.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/exp.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/flash_attention.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/flash_attention_session_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/flash_attention_session_cache.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/flash_decoding.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/gather.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/gelu.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/gemm.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/head_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/head_rms_norm.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/host2device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/host2device.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/index_add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/index_add.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/index_select.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/index_select.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/layer_norm.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/log.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_dispatch_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_dispatch_tokens.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_gather.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_gating_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_gating_gemm.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_quant_group_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_quant_group_gemm.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_quant_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_quant_matmul.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_scatter_dynamic_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_scatter_dynamic_quant.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_softmax_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_softmax_topk.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/moe_swiglu_dynamic_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/moe_swiglu_dynamic_quant.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/mul.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/p2p.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/p2p.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/reduce_max.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/reduce_max.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/reduce_min.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/reduce_min.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/reduce_scatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/reduce_scatter.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/reduce_sum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/reduce_sum.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/rms_norm.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/rotary_embedding.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/scale_dynamic_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/scale_dynamic_quant.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/scatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/scatter.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/silu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/silu.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/sin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/sin.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/softmax.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/sqrt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/sqrt.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/store_kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/store_kv_cache.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/sub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/sub.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/ops/topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/ops/topk.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/projects/accuracy_test/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/projects/accuracy_test/common.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/projects/torch_profiler/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/projects/torch_profiler/perf.py -------------------------------------------------------------------------------- /byte_micro_perf/backends/GPU/provider_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/backends/GPU/provider_gpu.py -------------------------------------------------------------------------------- /byte_micro_perf/core/README.md: -------------------------------------------------------------------------------- 1 | # BasicOp -------------------------------------------------------------------------------- /byte_micro_perf/core/backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/backend.py -------------------------------------------------------------------------------- /byte_micro_perf/core/creators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/creators.py -------------------------------------------------------------------------------- /byte_micro_perf/core/op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/op.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/llm_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/llm_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/tensor_gemm_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/tensor_gemm_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/vector_activation_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/vector_activation_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/vector_index_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/vector_index_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/vector_linear_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/vector_linear_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/vector_norm_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/vector_norm_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/vector_reduction_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/vector_reduction_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/vector_sfu_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/vector_sfu_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/ops/xccl_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/ops/xccl_ops.py -------------------------------------------------------------------------------- /byte_micro_perf/core/perf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/perf_engine.py -------------------------------------------------------------------------------- /byte_micro_perf/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/scheduler.py -------------------------------------------------------------------------------- /byte_micro_perf/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/core/utils.py -------------------------------------------------------------------------------- /byte_micro_perf/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/launch.py -------------------------------------------------------------------------------- /byte_micro_perf/requirements.txt: -------------------------------------------------------------------------------- 1 | prettytable 2 | jsonlines 3 | psutil -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/tensor_gemm_ops/gemm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/tensor_gemm_ops/gemm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_activation_ops/gelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_activation_ops/gelu.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_activation_ops/silu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_activation_ops/silu.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_index_ops/embedding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_index_ops/embedding.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_index_ops/gather.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_index_ops/gather.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_index_ops/index_add.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_index_ops/index_add.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_index_ops/index_select.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_index_ops/index_select.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_index_ops/scatter.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_index_ops/scatter.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_linear_ops/add.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_linear_ops/add.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_linear_ops/cast.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_linear_ops/cast.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_linear_ops/mul.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_linear_ops/mul.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_linear_ops/sub.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_linear_ops/sub.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_norm_ops/layer_norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_norm_ops/layer_norm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_norm_ops/rms_norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_norm_ops/rms_norm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_norm_ops/softmax.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_norm_ops/softmax.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_reduction_ops/reduce_max.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_reduction_ops/reduce_max.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_reduction_ops/reduce_min.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_reduction_ops/reduce_min.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_reduction_ops/reduce_sum.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_reduction_ops/reduce_sum.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_reduction_ops/topk.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_reduction_ops/topk.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_sfu_ops/cos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_sfu_ops/cos.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_sfu_ops/div.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_sfu_ops/div.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_sfu_ops/exp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_sfu_ops/exp.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_sfu_ops/log.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_sfu_ops/log.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_sfu_ops/sin.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_sfu_ops/sin.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/vector_sfu_ops/sqrt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/vector_sfu_ops/sqrt.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/README.md -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/all_gather.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/all_gather.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/all_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/all_reduce.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/all_to_all.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/all_to_all.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/broadcast.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/broadcast.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/device2device.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/device2device.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/device2host.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/device2host.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/host2device.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/host2device.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/p2p.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/p2p.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/basic/xccl_ops/reduce_scatter.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/basic/xccl_ops/reduce_scatter.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/llm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/llm/README.md -------------------------------------------------------------------------------- /byte_micro_perf/workloads/llm/flash_attention.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/llm/flash_attention.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/README.md -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/all_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/all_reduce.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/flash_attention.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/flash_attention.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/head_rms_norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/head_rms_norm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_gather.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_gather.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_gating_gemm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_gating_gemm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_quant_matmul.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_quant_matmul.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_softmax_topk.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/moe_softmax_topk.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/rms_norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/rms_norm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/rotary_embedding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/rotary_embedding.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/TP8_EP8/store_kv_cache.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/TP8_EP8/store_kv_cache.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/all_reduce.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/all_reduce.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/flash_attention.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/flash_attention.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/flash_decoding.json: -------------------------------------------------------------------------------- 1 | { 2 | "to_be_implemented": true 3 | } -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/head_rms_norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/head_rms_norm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/moe_gather.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/moe_gather.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/moe_gating_gemm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/moe_gating_gemm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/moe_quant_group_gemm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/moe_quant_group_gemm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/moe_quant_matmul.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/moe_quant_matmul.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/moe_softmax_topk.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/moe_softmax_topk.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/moe_swiglu_dynamic_quant.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/moe_swiglu_dynamic_quant.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/rms_norm.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/rms_norm.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/rotary_embedding.json: -------------------------------------------------------------------------------- 1 | { 2 | "to_be_implemented": true 3 | } -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/scale_dynamic_quant.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_micro_perf/workloads/mocked_model/scale_dynamic_quant.json -------------------------------------------------------------------------------- /byte_micro_perf/workloads/mocked_model/store_kv_cache.json: -------------------------------------------------------------------------------- 1 | { 2 | "to_be_implemented": true 3 | } -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.coveragerc -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.flake8 -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/bug.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/enhancement.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/question.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/regression.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.github/ISSUE_TEMPLATE/regression.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.github/workflows/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.github/workflows/stale.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitignore -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitlab-ci.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitlab/labeler-config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitlab/labeler-config.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitlab/stages/00.pre.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitlab/stages/00.pre.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitlab/stages/01.test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitlab/stages/01.test.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitlab/stages/02.functional-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitlab/stages/02.functional-tests.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.gitlab/stages/03.publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.gitlab/stages/03.publish.yml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/.pylintrc -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/CHANGELOG.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/CODEOWNERS -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/CONTRIBUTING.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/Dockerfile.ci.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/Dockerfile.ci.dev -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/Dockerfile.ci.lts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/Dockerfile.ci.lts -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/Dockerfile.linting: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/Dockerfile.linting -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/LICENSE -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/MANIFEST.in -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/llama_mistral.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/llama_mistral.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/datasets.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/datasets.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/dist_optimizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/dist_optimizer.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/distributed.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/distributed.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/fusions.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/fusions.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/index.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/models.bert.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/models.bert.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/models.gpt.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/models.gpt.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/models.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/models.t5.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/models.t5.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/moe.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/moe.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/api-guide/transformer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/api-guide/transformer.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/images/moe/token_drop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/images/moe/token_drop.png -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/index.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/docs/source/user-guide/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/docs/source/user-guide/index.rst -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/bert/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/bert/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/debug_to.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/debug_to.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/ds_like/Llama2tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/ds_like/Llama2tokenizer.model -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/ds_like/run2script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/ds_like/run2script.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/ds_like/run4script.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/ds_like/run4script.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/ds_like/train_ds_like_2node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/ds_like/train_ds_like_2node.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/ds_like/train_ds_like_4node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/ds_like/train_ds_like_4node.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/export/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/export/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/export/trtllm_export/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/export/trtllm_export/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/gpt3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/gpt3/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/gpt3/gpt_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/gpt3/gpt_config.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/gpt3/run_345M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/gpt3/run_345M.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/gpt3/run_345M_memory.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/gpt3/run_345M_memory.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/gpt3/run_857M.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/gpt3/run_857M.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/inference/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mamba/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mamba/.gitignore -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mamba/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mamba/Dockerfile -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mamba/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mamba/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mamba/run_text_gen_server_8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mamba/run_text_gen_server_8b.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mamba/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mamba/train.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mixtral/Llama2tokenizer.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mixtral/Llama2tokenizer.model -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mixtral/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mixtral/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mixtral/run_moe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mixtral/run_moe.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/mixtral/train_moe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/mixtral/train_moe.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/Dockerfile -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/config.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/dataset_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/dataset_helpers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/image_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/image_processing.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/layer_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/layer_specs.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/manual_prompts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/manual_prompts.json -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/multimodal_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/multimodal_args.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/nvlm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/nvlm/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/nvlm/internvit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/nvlm/internvit.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/nvlm/sft_blend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/nvlm/sft_blend.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/sft_dataset.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/sft_dataset.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/sft_mistral_clip.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/sft_mistral_clip.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/multimodal/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/multimodal/train.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/retro/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/retro/preprocess_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/retro/preprocess_data.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/run_simple_mcore_train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/run_simple_mcore_train_loop.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/t5/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/t5/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/t5/t5_mcore_train_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/t5/t5_mcore_train_curve.png -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/examples/t5/train_t5_220m_distributed.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/examples/t5/train_t5_220m_distributed.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/gpt2-merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/gpt2-merges.txt -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/gpt2-vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/gpt2-vocab.json -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/images/model_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/images/model_table.png -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/images/strong_scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/images/strong_scaling.png -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/images/weak_scaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/images/weak_scaling.png -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/log -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/QuickStart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/QuickStart.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/README_STRAGGLER.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/README_STRAGGLER.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/config_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/config_logger.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/Makefile -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/bert_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/bert_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/gpt_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/gpt_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/helpers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/helpers.cpp -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/helpers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/masked_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/masked_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/readme.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/retro/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/retro/db/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/retro/db/build.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/retro/db/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/retro/db/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/retro/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/t5_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/t5_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/datasets/utils_s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/datasets/utils_s3.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/dist_checkpointing/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/dist_checkpointing/core.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/distributed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/distributed/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/distributed/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/enums.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/export/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/export/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/export/data_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/export/data_type.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/export/export_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/export/export_config.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/export/model_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/export/model_type.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/export/trtllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/export/trtllm/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/extensions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/fusions/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/fusions/fused_bias_gelu.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/fusions/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/fusions/fused_softmax.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/inference/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/inference/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/inference/scheduler.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/inference/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/inference_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/inference_params.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/jit.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/model_parallel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/model_parallel_config.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/T5/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/T5/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/T5/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/T5/t5_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/T5/t5_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/T5/t5_spec.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/bert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/bert/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/bert/bert_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/bert/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/bert/pooler.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/common/language_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/common/vision_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/gpt/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/gpt/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/gpt/gpt_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/mamba/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/mamba/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/retro/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/retro/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/retro/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/retro/config.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/retro/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/retro/model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/retro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/models/retro/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/models/vision/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/optimizer/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/optimizer/clip_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/optimizer/clip_grads.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/optimizer/grad_scaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/optimizer/grad_scaler.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/optimizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/optimizer/optimizer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/package_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/package_info.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/packed_seq_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/packed_seq_params.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/parallel_state.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | packaging 3 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/rerun_state_machine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/rerun_state_machine.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/ssm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/ssm/mamba_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/ssm/mamba_block.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/ssm/mamba_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/ssm/mamba_layer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/ssm/mamba_mixer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/ssm/mamba_mixer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/layers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/random.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/tensor_parallel/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/timers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/timers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/attention.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/cuda_graphs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/cuda_graphs.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/custom_layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/enums.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/identity_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/identity_op.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/mlp.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/module.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/moe/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/moe/experts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/moe/experts.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/moe/router.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/spec_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/spec_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/torch_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/torch_norm.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/transformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/transformer/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/core/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/algos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/algos/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/algos/distillation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/algos/distillation.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/arguments.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/checkpointing.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/docs/distillation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/docs/distillation.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/endpoints/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/endpoints/common.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/gpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/gpt/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/gpt/loss_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/gpt/loss_func.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/gpt/model_provider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/gpt/model_provider.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/inference/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/inference/static/index.html -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/autoaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/autoaugment.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/data_samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/data_samplers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/dataset_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/ict_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/ict_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/image_folder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/image_folder.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/realm_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/realm_index.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/data/vit_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/data/vit_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/fused_kernels/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/fused_kernels/compat.h -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/fused_kernels/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/indexer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/bert_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/biencoder_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/biencoder_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/classification.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/enums.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/fused_bias_gelu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/fused_bias_gelu.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/fused_softmax.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/gpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/gpt_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/language_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/module.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/multiple_choice.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/realm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/realm_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/rms_norm.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/t5_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/transformer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/vision/dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/vision/dino.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/model/vision/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/model/vision/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/commons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/commons.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/test_data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/test_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/test_layers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/test_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/legacy/mpu/tests/test_random.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/activations.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/arguments.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/async_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/async_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/checkpointing.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/dist_signal_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/dist_signal_handler.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/ft_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/ft_integration.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/global_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/global_vars.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/initialize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/initialize.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/log_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/log_handler.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/one_logger_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/one_logger_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/tokenizer/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/tokenizer/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/tokenizer/tokenizer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/training.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/megatron/training/yaml_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/megatron/training/yaml_arguments.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/mypy.ini -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/output.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/output.log -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_bert.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_gpt.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_ict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_ict.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_mamba.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_retro.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_t5.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_vision_classify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_vision_classify.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_vision_dino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_vision_dino.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_vision_inpaint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_vision_inpaint.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pretrain_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pretrain_vlm.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pyproject.toml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/pytest.ini -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/setup.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/data_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/ensemble_classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/ensemble_classifier.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/eval_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/finetune_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/glue/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/glue/data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/glue/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/glue/finetune.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/glue/mnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/glue/mnli.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/glue/qqp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/glue/qqp.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/main.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/msdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/msdp/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/msdp/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/msdp/evaluate.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/msdp/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/msdp/main.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/msdp/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/msdp/metrics.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/msdp/preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/msdp/preprocessing.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/msdp/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/msdp/prompt.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/evaluate_orqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/evaluate_orqa.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/evaluate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/evaluate_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/supervised/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/supervised/data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/supervised/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/supervised/eval_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/supervised/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/supervised/finetune.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/unsupervised/nq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/unsupervised/nq.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/unsupervised/qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/unsupervised/qa_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/orqa/unsupervised/tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/orqa/unsupervised/tokenizers.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/quantize/calibrate_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/quantize/calibrate_gpt.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/race/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/race/data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/race/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/race/finetune.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/finetune_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/finetune_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/main.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/cityscapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/cityscapes.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/metrics.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/seg_heads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/seg_heads.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/seg_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/seg_models.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/transforms.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/vision/segmentation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/vision/segmentation/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/zeroshot_gpt/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/zeroshot_gpt/datasets.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/zeroshot_gpt/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/zeroshot_gpt/detokenizer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tasks/zeroshot_gpt/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tasks/zeroshot_gpt/evaluate.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/functional_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/functional_tests/python_test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic_dgx_a100_1N8G/golden_values_dev.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/test_utils/recipes/bert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/test_utils/recipes/bert.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/test_utils/recipes/gpt-nemo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/test_utils/recipes/gpt-nemo.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/test_utils/recipes/gpt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/test_utils/recipes/gpt.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/test_utils/recipes/t5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/test_utils/recipes/t5.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/test_utils/recipes/unit-tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/test_utils/recipes/unit-tests.yaml -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/test_utils/shell_scripts/notify.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/test_utils/shell_scripts/notify.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/conftest.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/data/test_bin_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/data/test_bin_reader.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/data/test_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/data/test_builder.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/dist_checkpointing/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/export/trtllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/inference/engines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/inference/model_inference_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/inference/text_generation_controllers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/models/test_t5_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/models/test_t5_model.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/pipeline_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/ssm/test_mamba_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/ssm/test_mamba_block.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/ssm/test_mamba_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/ssm/test_mamba_layer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/ssm/test_mamba_mixer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/ssm/test_mamba_mixer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/tensor_parallel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_basic.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_imports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_imports.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_inference.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_model_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_model_configs.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_optimizer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_parallel_state.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_tokenizer.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_training.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_utilities.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/test_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/transformer/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tests/unit_tests/transformer/test_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tests/unit_tests/transformer/test_mlp.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/autoformat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/autoformat.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/bert_embedding/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/bert_embedding/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/bert_embedding/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/bert_embedding/dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/bert_embedding/embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/bert_embedding/embed.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/bert_embedding/external_libs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/bert_embedding/external_libs.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/bert_embedding/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/bert_embedding/huggingface.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/convert.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/hybrid_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/hybrid_conversion.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/loader_llama_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/loader_llama_mistral.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/loader_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/loader_mcore.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/loader_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/loader_megatron.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/loader_mixtral_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/loader_mixtral_hf.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/saver_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/saver_mcore.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/saver_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/saver_megatron.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/schema_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/schema_base.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/schema_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/schema_mcore.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/checkpoint/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/checkpoint/utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/copyright.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/copyright.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/linter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/linter.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/merge_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/merge_datasets.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/add_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/add_id.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/blacklist_urls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/blacklist_urls.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/cleanup_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/cleanup_dataset.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/filter_ngrams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/filter_ngrams.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/find_duplicates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/find_duplicates.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/openwebtext/merge_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/openwebtext/merge_jsons.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/preprocess_data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/preprocess_data_nmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/preprocess_data_nmt.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/preprocess_mmdata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/preprocess_mmdata.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/report_theoretical_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/report_theoretical_memory.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/build_db.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/build_db.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/cli/__init__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/cli/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/cli/__main__.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/cli/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/cli/cli.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/config_utils.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/docker/Dockerfile -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/preprocess_data.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/sft/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/sft/README.md -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/sft/dataset_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/sft/dataset_conv.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/sft/open_inst.sh: -------------------------------------------------------------------------------- 1 | DATA_BLEND="1.0 open_inst" 2 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/sft/sft_retro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/sft/sft_retro.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/sft/sft_retro_lm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/sft/sft_retro_lm.sh -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/retro/text_generation/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/retro/text_generation/metrics.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/run_text_generation_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/run_text_generation_server.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/run_vlm_text_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/run_vlm_text_generation.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/tools/text_generation_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/Megatron-LM/tools/text_generation_cli.py -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/wandb/debug-cli.root.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /byte_train_perf/Megatron-LM/wandb/settings: -------------------------------------------------------------------------------- 1 | [default] 2 | 3 | -------------------------------------------------------------------------------- /byte_train_perf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/byte_train_perf/README.md -------------------------------------------------------------------------------- /docs/images/flowchat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/docs/images/flowchat.png -------------------------------------------------------------------------------- /docs/images/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/docs/images/icon.png -------------------------------------------------------------------------------- /pylint.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/pylint.conf -------------------------------------------------------------------------------- /vendor_zoo/AWS/Inferentia.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/AWS/Inferentia.json -------------------------------------------------------------------------------- /vendor_zoo/AWS/Inferentia2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/AWS/Inferentia2.json -------------------------------------------------------------------------------- /vendor_zoo/AWS/Trainium.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/AWS/Trainium.json -------------------------------------------------------------------------------- /vendor_zoo/AWS/mem_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/AWS/mem_16.png -------------------------------------------------------------------------------- /vendor_zoo/AWS/mem_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/AWS/mem_17.png -------------------------------------------------------------------------------- /vendor_zoo/Graphcore/IPU C600.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Graphcore/IPU C600.json -------------------------------------------------------------------------------- /vendor_zoo/Graphcore/image_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Graphcore/image_12.png -------------------------------------------------------------------------------- /vendor_zoo/Graphcore/mem_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Graphcore/mem_12.png -------------------------------------------------------------------------------- /vendor_zoo/Habana/Gaudi2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Habana/Gaudi2.json -------------------------------------------------------------------------------- /vendor_zoo/Habana/image_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Habana/image_14.png -------------------------------------------------------------------------------- /vendor_zoo/Habana/mem_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Habana/mem_14.png -------------------------------------------------------------------------------- /vendor_zoo/Habana/pe_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Habana/pe_14.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/S10.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/S10.json -------------------------------------------------------------------------------- /vendor_zoo/Moffett/S30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/S30.json -------------------------------------------------------------------------------- /vendor_zoo/Moffett/S4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/S4.json -------------------------------------------------------------------------------- /vendor_zoo/Moffett/image_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/image_7.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/image_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/image_8.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/image_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/image_9.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/mem_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/mem_7.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/mem_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/mem_8.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/mem_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/mem_9.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/pe_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/pe_7.png -------------------------------------------------------------------------------- /vendor_zoo/Moffett/pe_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Moffett/pe_9.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/A10-PCIe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/A10-PCIe.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/A100-PCIe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/A100-PCIe.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/A100-SXM4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/A100-SXM4.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/A30-PCIe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/A30-PCIe.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/H100-PCIe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/H100-PCIe.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/H100-SXM5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/H100-SXM5.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/T4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/T4.json -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_0.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_1.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_2.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_3.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_4.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_5.png -------------------------------------------------------------------------------- /vendor_zoo/NVIDIA/image_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/NVIDIA/image_6.png -------------------------------------------------------------------------------- /vendor_zoo/QUALCOMM/AIC100.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/QUALCOMM/AIC100.json -------------------------------------------------------------------------------- /vendor_zoo/QUALCOMM/image_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/QUALCOMM/image_10.png -------------------------------------------------------------------------------- /vendor_zoo/QUALCOMM/mem_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/QUALCOMM/mem_10.png -------------------------------------------------------------------------------- /vendor_zoo/QUALCOMM/pe_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/QUALCOMM/pe_10.png -------------------------------------------------------------------------------- /vendor_zoo/Stream/STC920.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Stream/STC920.json -------------------------------------------------------------------------------- /vendor_zoo/Stream/image_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/ByteMLPerf/HEAD/vendor_zoo/Stream/image_13.png --------------------------------------------------------------------------------