├── .github
    ├── CODEOWNERS
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── MLPERF4.0
    ├── Inference
    │   ├── functions.sh
    │   ├── llama
    │   │   ├── README.md
    │   │   ├── SUT.py
    │   │   ├── configs
    │   │   │   └── fp8.conf
    │   │   ├── evaluation.py
    │   │   ├── hqt
    │   │   │   └── llama2-70b-8x
    │   │   │   │   ├── config_meas_maxabs_quant_MAXABS_HW.json
    │   │   │   │   ├── measure_hooks_maxabs_0_8.json
    │   │   │   │   ├── measure_hooks_maxabs_0_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_0_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_1_8.json
    │   │   │   │   ├── measure_hooks_maxabs_1_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_1_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_2_8.json
    │   │   │   │   ├── measure_hooks_maxabs_2_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_2_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_3_8.json
    │   │   │   │   ├── measure_hooks_maxabs_3_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_3_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_4_8.json
    │   │   │   │   ├── measure_hooks_maxabs_4_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_4_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_5_8.json
    │   │   │   │   ├── measure_hooks_maxabs_5_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_5_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_6_8.json
    │   │   │   │   ├── measure_hooks_maxabs_6_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_6_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_7_8.json
    │   │   │   │   ├── measure_hooks_maxabs_7_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_7_8_mod_list.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_0_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_0_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_1_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_1_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_2_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_2_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_3_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_3_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_4_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_4_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_5_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_5_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_6_8.json
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_6_8.npz
    │   │   │   │   ├── measure_hooks_maxabs_MAXABS_HW_7_8.json
    │   │   │   │   └── measure_hooks_maxabs_MAXABS_HW_7_8.npz
    │   │   ├── llama_greedy.py
    │   │   ├── main.py
    │   │   ├── mlperf.conf
    │   │   ├── processorca.py
    │   │   ├── quantization_config
    │   │   │   ├── act_maxabs_hw_weights_pcs_maxabs_pow2_quant.json
    │   │   │   ├── maxabs_measure.json
    │   │   │   ├── maxabs_pcq_measure.json
    │   │   │   ├── maxabs_quant.json
    │   │   │   ├── shape_measure.json
    │   │   │   ├── unit_scale_quant.json
    │   │   │   └── without_scale_quant.json
    │   │   ├── requirements.txt
    │   │   ├── run_tgi_server.sh
    │   │   ├── setup_tgi.sh
    │   │   └── utils.py
    │   ├── prepare_and_check_submission.py
    │   ├── run_mlperf_scenarios.py
    │   ├── scenarios.yaml
    │   └── stable-diffusion-xl
    │   │   ├── README.md
    │   │   ├── attention_processor.py
    │   │   ├── backend.py
    │   │   ├── backend_debug.py
    │   │   ├── backend_pytorch.py
    │   │   ├── coco.py
    │   │   ├── coco2014
    │   │       ├── calibration
    │   │       │   └── coco_cal_captions_list.txt
    │   │       └── captions
    │   │       │   └── captions_source.tsv
    │   │   ├── configs
    │   │       └── user.conf
    │   │   ├── dataset.py
    │   │   ├── evaluation.py
    │   │   ├── hpu_multicard.py
    │   │   ├── main.py
    │   │   ├── mlperf.conf
    │   │   ├── pipeline_stable_diffusion_xl_hpu.py
    │   │   ├── requirements.txt
    │   │   ├── scheduling_euler_discrete_hpu.py
    │   │   ├── tools
    │   │       ├── accuracy_coco.py
    │   │       ├── check_latents.py
    │   │       ├── clip
    │   │       │   └── clip_encoder.py
    │   │       ├── coco.py
    │   │       ├── coco_calibration.py
    │   │       ├── coco_generate_calibration.py
    │   │       ├── download-coco-2014-calibration.sh
    │   │       ├── download-coco-2014.sh
    │   │       ├── fid
    │   │       │   ├── README.md
    │   │       │   ├── fid_score.py
    │   │       │   └── inception.py
    │   │       ├── generate_fp32_weights.py
    │   │       ├── latent.py
    │   │       ├── latents.npy
    │   │       ├── latents.pt
    │   │       ├── measure.sh
    │   │       ├── quantize
    │   │       │   ├── measure_all
    │   │       │   │   ├── fp8_hooks_maxabs.json
    │   │       │   │   ├── fp8_hooks_maxabs.npz
    │   │       │   │   ├── fp8_hooks_maxabs_0_8.npz
    │   │       │   │   ├── fp8_hooks_maxabs_1_8.npz
    │   │       │   │   ├── fp8_hooks_maxabs_2_8.npz
    │   │       │   │   ├── fp8_hooks_maxabs_3_8.npz
    │   │       │   │   ├── fp8_hooks_maxabs_4_8.npz
    │   │       │   │   ├── fp8_hooks_maxabs_5_8.npz
    │   │       │   │   ├── fp8_hooks_maxabs_6_8.npz
    │   │       │   │   └── fp8_hooks_maxabs_7_8.npz
    │   │       │   ├── measure_config.json
    │   │       │   ├── quant_config.json
    │   │       │   └── quant_config_bmm.json
    │   │       ├── sample_ids.py
    │   │       └── sample_ids.txt
    │   │   └── unet_2d_condition_hpu.py
    └── Training
    │   └── benchmarks
    │       ├── gpt3
    │           ├── CODEOWNERS
    │           ├── LICENSE
    │           ├── MANIFEST.in
    │           ├── README.md
    │           ├── SECURITY.md
    │           ├── dataset
    │           │   ├── README.md
    │           │   ├── download_books.sh
    │           │   ├── download_ckpt.sh
    │           │   └── download_vocab.sh
    │           ├── examples
    │           │   ├── MoE
    │           │   │   ├── ds_config_gpt_TEMPLATE.json
    │           │   │   ├── ds_config_gpt_Zero2_TEMPLATE.json
    │           │   │   ├── ds_evalharness.sh
    │           │   │   ├── ds_pretrain_gpt_1.3B_MoE128.sh
    │           │   │   ├── ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
    │           │   │   ├── ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
    │           │   │   ├── ds_pretrain_gpt_1.3B_dense.sh
    │           │   │   ├── ds_pretrain_gpt_1.3B_dense_cl.sh
    │           │   │   ├── ds_pretrain_gpt_125M_MoE64.sh
    │           │   │   ├── ds_pretrain_gpt_125M_dense_cl.sh
    │           │   │   ├── ds_pretrain_gpt_350M_MoE128.sh
    │           │   │   ├── ds_pretrain_gpt_350M_PR-MoE32or64.sh
    │           │   │   ├── ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
    │           │   │   ├── ds_pretrain_gpt_350M_dense.sh
    │           │   │   ├── ds_pretrain_gpt_6.7B_dense.sh
    │           │   │   └── readme_evalharness.md
    │           │   ├── README.md
    │           │   ├── azure
    │           │   │   ├── README.md
    │           │   │   ├── run-175b.sh
    │           │   │   ├── run-1t.sh
    │           │   │   └── run-benchmark-model.sh
    │           │   ├── azureml
    │           │   │   ├── Dockerfile.dockerfile
    │           │   │   ├── README.md
    │           │   │   ├── aml_submit.py
    │           │   │   └── prepare_dataset.py
    │           │   ├── compression
    │           │   │   ├── 125M-Int8-test-64gpu-distilled-group48.sh
    │           │   │   ├── 125M-L10-Int8-test-64gpu-distilled-group48.sh
    │           │   │   ├── 125M-L12-Int8-test-64gpu-distilled-group48.sh
    │           │   │   ├── ds_config_gpt_TEMPLATE.json
    │           │   │   ├── ds_config_gpt_TEMPLATE_compression.json
    │           │   │   ├── ds_evalharness.sh
    │           │   │   ├── ds_pretrain_gpt_1.3B_dense_cl_kd.sh
    │           │   │   ├── ds_pretrain_gpt_125M_dense_cl_kd.sh
    │           │   │   ├── ds_pretrain_gpt_125M_dense_kd.sh
    │           │   │   └── ds_pretrain_gpt_350M_dense_kd.sh
    │           │   ├── create_embeddings.sh
    │           │   ├── curriculum_learning
    │           │   │   ├── README.md
    │           │   │   ├── ds_pretrain_gpt2.sh
    │           │   │   ├── ds_train.sh
    │           │   │   ├── ds_zero_stage_1_config_baseline.json
    │           │   │   └── ds_zero_stage_1_config_curriculum_fixed_linear.json
    │           │   ├── evaluate_ict_zeroshot_nq.sh
    │           │   ├── evaluate_zeroshot_gpt.sh
    │           │   ├── finetune_mnli_distributed.sh
    │           │   ├── finetune_race_distributed.sh
    │           │   ├── generate_text.sh
    │           │   ├── merge_mp_bert.sh
    │           │   ├── pretrain_bert.sh
    │           │   ├── pretrain_bert_distributed.sh
    │           │   ├── pretrain_bert_distributed_with_mp.sh
    │           │   ├── pretrain_gpt.sh
    │           │   ├── pretrain_gpt3_175B.sh
    │           │   ├── pretrain_gpt_distributed.sh
    │           │   ├── pretrain_gpt_distributed_with_mp.sh
    │           │   ├── pretrain_ict.sh
    │           │   ├── pretrain_t5.sh
    │           │   ├── pretrain_t5_distributed.sh
    │           │   ├── pretrain_t5_distributed_with_mp.sh
    │           │   └── run_deepspeed_example.sh
    │           ├── images
    │           │   └── cases_april2021.png
    │           ├── megatron
    │           │   ├── __init__.py
    │           │   ├── arguments.py
    │           │   ├── checkpointing.py
    │           │   ├── data
    │           │   │   ├── Makefile
    │           │   │   ├── __init__.py
    │           │   │   ├── autoaugment.py
    │           │   │   ├── bert_dataset.py
    │           │   │   ├── biencoder_dataset_utils.py
    │           │   │   ├── blendable_dataset.py
    │           │   │   ├── data_samplers.py
    │           │   │   ├── dataset_utils.py
    │           │   │   ├── gpt_dataset.py
    │           │   │   ├── helpers.cpp
    │           │   │   ├── ict_dataset.py
    │           │   │   ├── indexed_dataset.py
    │           │   │   ├── orqa_wiki_dataset.py
    │           │   │   ├── realm_dataset_utils.py
    │           │   │   ├── realm_index.py
    │           │   │   ├── t5_dataset.py
    │           │   │   ├── test
    │           │   │   │   ├── test_indexed_dataset.py
    │           │   │   │   └── test_preprocess_data.sh
    │           │   │   └── vit_dataset.py
    │           │   ├── enums.py
    │           │   ├── fp16_deprecated
    │           │   │   └── loss_scaler.py
    │           │   ├── fused_kernels
    │           │   │   ├── __init__.py
    │           │   │   ├── compat.h
    │           │   │   ├── layer_norm_cuda.cpp
    │           │   │   ├── layer_norm_cuda_kernel.cu
    │           │   │   ├── scaled_masked_softmax.cpp
    │           │   │   ├── scaled_masked_softmax.h
    │           │   │   ├── scaled_masked_softmax_cuda.cu
    │           │   │   ├── scaled_upper_triang_masked_softmax.cpp
    │           │   │   ├── scaled_upper_triang_masked_softmax.h
    │           │   │   ├── scaled_upper_triang_masked_softmax_cuda.cu
    │           │   │   └── type_shim.h
    │           │   ├── global_vars.py
    │           │   ├── indexer.py
    │           │   ├── initialize.py
    │           │   ├── learning_rates.py
    │           │   ├── memory.py
    │           │   ├── microbatches.py
    │           │   ├── model
    │           │   │   ├── __init__.py
    │           │   │   ├── bert_model.py
    │           │   │   ├── biencoder_model.py
    │           │   │   ├── classification.py
    │           │   │   ├── distributed.py
    │           │   │   ├── enums.py
    │           │   │   ├── fused_bias_gelu.py
    │           │   │   ├── fused_layer_norm.py
    │           │   │   ├── fused_softmax.py
    │           │   │   ├── gpt_model.py
    │           │   │   ├── language_model.py
    │           │   │   ├── llama_model.py
    │           │   │   ├── module.py
    │           │   │   ├── multiple_choice.py
    │           │   │   ├── positional_embeddings.py
    │           │   │   ├── realm_model.py
    │           │   │   ├── rmsnorm.py
    │           │   │   ├── t5_model.py
    │           │   │   ├── transformer.py
    │           │   │   ├── utils.py
    │           │   │   └── vit_model.py
    │           │   ├── mpu
    │           │   │   ├── __init__.py
    │           │   │   ├── cross_entropy.py
    │           │   │   ├── data.py
    │           │   │   ├── initialize.py
    │           │   │   ├── layers.py
    │           │   │   ├── mappings.py
    │           │   │   ├── random.py
    │           │   │   ├── tests
    │           │   │   │   ├── __init__.py
    │           │   │   │   ├── commons.py
    │           │   │   │   ├── test_cross_entropy.py
    │           │   │   │   ├── test_data.py
    │           │   │   │   ├── test_initialize.py
    │           │   │   │   ├── test_layers.py
    │           │   │   │   └── test_random.py
    │           │   │   └── utils.py
    │           │   ├── optimizer
    │           │   │   ├── __init__.py
    │           │   │   ├── clip_grads.py
    │           │   │   ├── grad_scaler.py
    │           │   │   └── optimizer.py
    │           │   ├── p2p_communication.py
    │           │   ├── package_info.py
    │           │   ├── profiler.py
    │           │   ├── schedules.py
    │           │   ├── testing_utils.py
    │           │   ├── text_generation_utils.py
    │           │   ├── tokenizer
    │           │   │   ├── __init__.py
    │           │   │   ├── bert_tokenization.py
    │           │   │   ├── gpt2_tokenization.py
    │           │   │   ├── sentencepiece_tokenization.py
    │           │   │   └── tokenizer.py
    │           │   ├── training.py
    │           │   └── utils.py
    │           ├── pretrain_bert.py
    │           ├── pretrain_gpt.py
    │           ├── pretrain_ict.py
    │           ├── pretrain_t5.py
    │           ├── pretrain_vit.py
    │           ├── requirements.txt
    │           ├── run_gpt.sh
    │           ├── setup.py
    │           ├── tasks
    │           │   ├── data_utils.py
    │           │   ├── detok.py
    │           │   ├── ensemble_classifier.py
    │           │   ├── eval_harness
    │           │   │   ├── download.py
    │           │   │   ├── evaluate.py
    │           │   │   └── report-to-csv.py
    │           │   ├── eval_utils.py
    │           │   ├── finetune_utils.py
    │           │   ├── glue
    │           │   │   ├── data.py
    │           │   │   ├── finetune.py
    │           │   │   ├── mnli.py
    │           │   │   └── qqp.py
    │           │   ├── main.py
    │           │   ├── main_3d.py
    │           │   ├── orqa
    │           │   │   ├── evaluate_orqa.py
    │           │   │   ├── evaluate_utils.py
    │           │   │   └── natural_questions
    │           │   │   │   ├── nq.py
    │           │   │   │   ├── qa_utils.py
    │           │   │   │   └── tokenizers.py
    │           │   ├── race
    │           │   │   ├── data.py
    │           │   │   └── finetune.py
    │           │   ├── tasks_args.py
    │           │   ├── vision
    │           │   │   ├── classification.py
    │           │   │   ├── eval_utils.py
    │           │   │   ├── finetune_utils.py
    │           │   │   └── main.py
    │           │   └── zeroshot_gpt
    │           │   │   ├── datasets.py
    │           │   │   ├── detokenizer.py
    │           │   │   └── evaluate.py
    │           ├── tests
    │           │   ├── ds_config_bf16.json
    │           │   ├── test_basic.py
    │           │   ├── test_checkpoints.py
    │           │   └── test_training.py
    │           └── tools
    │           │   ├── __init__.py
    │           │   ├── convert_checkpoint
    │           │       ├── README.md
    │           │       ├── __init__.py
    │           │       ├── common_bf16.json
    │           │       ├── convert_paxml_optimizer.py
    │           │       ├── deepspeed_checkpoint.py
    │           │       ├── deepspeed_to_megatron.py
    │           │       ├── deepspeed_to_transformers.py
    │           │       ├── ds_to_universal.py
    │           │       ├── inspect_checkpoint.py
    │           │       ├── inspect_deepspeed_checkpoint.py
    │           │       ├── megatron_optim_merge.py
    │           │       ├── megatron_optim_merged_to_ds_universal_convert.py
    │           │       └── verify_checkpoint_non_tp_consistency.py
    │           │   ├── create_doc_index.py
    │           │   ├── create_synthetic_dataset.py
    │           │   ├── generate_samples_gpt.py
    │           │   ├── linter.py
    │           │   ├── merge_mp_partitions.py
    │           │   ├── openwebtext
    │           │       ├── README.md
    │           │       ├── add_id.py
    │           │       ├── blacklist_urls.py
    │           │       ├── cleanup_dataset.py
    │           │       ├── cleanup_fix_dataset.py
    │           │       ├── filter_ngrams.py
    │           │       ├── find_duplicates.py
    │           │       ├── group_duplicate_url.py
    │           │       ├── merge_jsons.py
    │           │       └── remove_group_duplicates.py
    │           │   └── preprocess_data.py
    │       └── llm_finetune
    │           ├── LICENSE.md
    │           ├── README.md
    │           ├── config.json
    │           ├── configs
    │               └── ds_zero3.json
    │           ├── ops_bf16.txt
    │           ├── requirements.txt
    │           ├── run_llama_70B_fp8_submission.sh
    │           └── scripts
    │               ├── create_warmup_data.py
    │               ├── gaudi_spawn.py
    │               ├── mlperf_logging_utils.py
    │               ├── train.py
    │               └── utils.py
├── PyTorch
    ├── __init__.py
    ├── audio
    │   └── wav2vec2
    │   │   └── inference
    │   │       ├── LICENSE
    │   │       ├── README.md
    │   │       ├── librispeech_asr_test_clean.py
    │   │       ├── requirements.txt
    │   │       └── wav2vec.py
    ├── computer_vision
    │   ├── classification
    │   │   ├── ViT
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── img
    │   │   │   │   ├── figure1.png
    │   │   │   │   ├── figure2.png
    │   │   │   │   └── figure3.png
    │   │   │   ├── models
    │   │   │   │   ├── configs.py
    │   │   │   │   ├── modeling.py
    │   │   │   │   └── modeling_resnet.py
    │   │   │   ├── ops_bf16.txt
    │   │   │   ├── ops_fp32.txt
    │   │   │   ├── requirements.txt
    │   │   │   ├── train.py
    │   │   │   ├── visualize_attention_map.ipynb
    │   │   │   └── vit_utils
    │   │   │   │   ├── data_utils.py
    │   │   │   │   ├── dist_util.py
    │   │   │   │   └── scheduler.py
    │   │   └── torchvision
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── data_loaders.py
    │   │   │   ├── googlenet_utils.py
    │   │   │   ├── inference.py
    │   │   │   ├── main.py
    │   │   │   ├── media_pipe_settings.py
    │   │   │   ├── model
    │   │   │       ├── __init__.py
    │   │   │       ├── optimizer.py
    │   │   │       ├── resnet.py
    │   │   │       └── utils.py
    │   │   │   ├── ops_bf16_Resnet.txt
    │   │   │   ├── ops_fp32_Resnet.txt
    │   │   │   ├── requirements.txt
    │   │   │   ├── requirements_u24.txt
    │   │   │   ├── resnet_media_pipe.py
    │   │   │   ├── train.py
    │   │   │   └── utils.py
    │   ├── detection
    │   │   └── yolox
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── assets
    │   │   │       ├── demo.png
    │   │   │       ├── dog.jpg
    │   │   │       ├── git_fig.png
    │   │   │       └── logo.png
    │   │   │   ├── demo
    │   │   │       ├── MegEngine
    │   │   │       │   ├── cpp
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── build.sh
    │   │   │       │   │   └── yolox.cpp
    │   │   │       │   └── python
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── build.py
    │   │   │       │   │   ├── convert_weights.py
    │   │   │       │   │   ├── demo.py
    │   │   │       │   │   ├── dump.py
    │   │   │       │   │   └── models
    │   │   │       │   │       ├── __init__.py
    │   │   │       │   │       ├── darknet.py
    │   │   │       │   │       ├── network_blocks.py
    │   │   │       │   │       ├── yolo_fpn.py
    │   │   │       │   │       ├── yolo_head.py
    │   │   │       │   │       ├── yolo_pafpn.py
    │   │   │       │   │       └── yolox.py
    │   │   │       ├── ONNXRuntime
    │   │   │       │   ├── README.md
    │   │   │       │   └── onnx_inference.py
    │   │   │       ├── OpenVINO
    │   │   │       │   ├── README.md
    │   │   │       │   ├── cpp
    │   │   │       │   │   ├── CMakeLists.txt
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── yolox_openvino.cpp
    │   │   │       │   └── python
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── openvino_inference.py
    │   │   │       ├── TensorRT
    │   │   │       │   ├── cpp
    │   │   │       │   │   ├── CMakeLists.txt
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── logging.h
    │   │   │       │   │   └── yolox.cpp
    │   │   │       │   └── python
    │   │   │       │   │   └── README.md
    │   │   │       └── ncnn
    │   │   │       │   ├── android
    │   │   │       │       ├── README.md
    │   │   │       │       ├── app
    │   │   │       │       │   ├── build.gradle
    │   │   │       │       │   └── src
    │   │   │       │       │   │   └── main
    │   │   │       │       │   │       ├── AndroidManifest.xml
    │   │   │       │       │   │       ├── assets
    │   │   │       │       │   │           └── yolox.param
    │   │   │       │       │   │       ├── java
    │   │   │       │       │   │           └── com
    │   │   │       │       │   │           │   └── megvii
    │   │   │       │       │   │           │       └── yoloXncnn
    │   │   │       │       │   │           │           ├── MainActivity.java
    │   │   │       │       │   │           │           ├── YOLOXncnn.java
    │   │   │       │       │   │           │           └── yoloXncnn.java
    │   │   │       │       │   │       ├── jni
    │   │   │       │       │   │           ├── CMakeLists.txt
    │   │   │       │       │   │           └── yoloXncnn_jni.cpp
    │   │   │       │       │   │       └── res
    │   │   │       │       │   │           ├── layout
    │   │   │       │       │   │               └── main.xml
    │   │   │       │       │   │           └── values
    │   │   │       │       │   │               └── strings.xml
    │   │   │       │       ├── build.gradle
    │   │   │       │       ├── gradle
    │   │   │       │       │   └── wrapper
    │   │   │       │       │   │   ├── gradle-wrapper.jar
    │   │   │       │       │   │   └── gradle-wrapper.properties
    │   │   │       │       ├── gradlew
    │   │   │       │       ├── gradlew.bat
    │   │   │       │       └── settings.gradle
    │   │   │       │   └── cpp
    │   │   │       │       ├── README.md
    │   │   │       │       └── yolox.cpp
    │   │   │   ├── docs
    │   │   │       ├── .gitignore
    │   │   │       ├── Makefile
    │   │   │       ├── _static
    │   │   │       │   └── css
    │   │   │       │   │   └── custom.css
    │   │   │       ├── conf.py
    │   │   │       ├── demo
    │   │   │       │   ├── megengine_cpp_readme.md
    │   │   │       │   ├── megengine_py_readme.md
    │   │   │       │   ├── ncnn_android_readme.md
    │   │   │       │   ├── ncnn_cpp_readme.md
    │   │   │       │   ├── onnx_readme.md
    │   │   │       │   ├── openvino_cpp_readme.md
    │   │   │       │   ├── openvino_py_readme.md
    │   │   │       │   ├── trt_cpp_readme.md
    │   │   │       │   └── trt_py_readme.md
    │   │   │       ├── index.rst
    │   │   │       ├── manipulate_training_image_size.md
    │   │   │       ├── model_zoo.md
    │   │   │       ├── quick_run.md
    │   │   │       ├── requirements-doc.txt
    │   │   │       ├── train_custom_data.md
    │   │   │       └── updates_note.md
    │   │   │   ├── download_dataset.sh
    │   │   │   ├── exps
    │   │   │       ├── default
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── yolov3.py
    │   │   │       │   ├── yolox_l.py
    │   │   │       │   ├── yolox_m.py
    │   │   │       │   ├── yolox_nano.py
    │   │   │       │   ├── yolox_s.py
    │   │   │       │   ├── yolox_tiny.py
    │   │   │       │   └── yolox_x.py
    │   │   │       └── example
    │   │   │       │   ├── custom
    │   │   │       │       ├── nano.py
    │   │   │       │       └── yolox_s.py
    │   │   │       │   └── yolox_voc
    │   │   │       │       └── yolox_voc_s.py
    │   │   │   ├── hubconf.py
    │   │   │   ├── ops_bf16_yolox.txt
    │   │   │   ├── ops_fp32_yolox.txt
    │   │   │   ├── requirements.txt
    │   │   │   ├── setup.cfg
    │   │   │   ├── setup.py
    │   │   │   ├── tests
    │   │   │       ├── __init__.py
    │   │   │       └── utils
    │   │   │       │   └── test_model_utils.py
    │   │   │   ├── tools
    │   │   │       ├── __init__.py
    │   │   │       ├── demo.py
    │   │   │       ├── eval.py
    │   │   │       ├── export_onnx.py
    │   │   │       ├── export_torchscript.py
    │   │   │       ├── train.py
    │   │   │       └── trt.py
    │   │   │   └── yolox
    │   │   │       ├── __init__.py
    │   │   │       ├── core
    │   │   │           ├── __init__.py
    │   │   │           ├── launch.py
    │   │   │           └── trainer.py
    │   │   │       ├── data
    │   │   │           ├── __init__.py
    │   │   │           ├── data_augment.py
    │   │   │           ├── data_prefetcher.py
    │   │   │           ├── dataloading.py
    │   │   │           ├── datasets
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── coco.py
    │   │   │           │   ├── coco_classes.py
    │   │   │           │   ├── datasets_wrapper.py
    │   │   │           │   ├── mosaicdetection.py
    │   │   │           │   ├── voc.py
    │   │   │           │   └── voc_classes.py
    │   │   │           └── samplers.py
    │   │   │       ├── evaluators
    │   │   │           ├── __init__.py
    │   │   │           ├── coco_evaluator.py
    │   │   │           ├── voc_eval.py
    │   │   │           └── voc_evaluator.py
    │   │   │       ├── exp
    │   │   │           ├── __init__.py
    │   │   │           ├── base_exp.py
    │   │   │           ├── build.py
    │   │   │           ├── default
    │   │   │           │   └── __init__.py
    │   │   │           └── yolox_base.py
    │   │   │       ├── layers
    │   │   │           ├── __init__.py
    │   │   │           ├── csrc
    │   │   │           │   ├── cocoeval
    │   │   │           │   │   ├── cocoeval.cpp
    │   │   │           │   │   └── cocoeval.h
    │   │   │           │   └── vision.cpp
    │   │   │           └── fast_coco_eval_api.py
    │   │   │       ├── models
    │   │   │           ├── __init__.py
    │   │   │           ├── build.py
    │   │   │           ├── darknet.py
    │   │   │           ├── losses.py
    │   │   │           ├── network_blocks.py
    │   │   │           ├── yolo_fpn.py
    │   │   │           ├── yolo_head.py
    │   │   │           ├── yolo_head_script.py
    │   │   │           ├── yolo_pafpn.py
    │   │   │           └── yolox.py
    │   │   │       ├── tools
    │   │   │           └── __init__.py
    │   │   │       └── utils
    │   │   │           ├── __init__.py
    │   │   │           ├── allreduce_norm.py
    │   │   │           ├── boxes.py
    │   │   │           ├── checkpoint.py
    │   │   │           ├── compat.py
    │   │   │           ├── demo_utils.py
    │   │   │           ├── dist.py
    │   │   │           ├── ema.py
    │   │   │           ├── logger.py
    │   │   │           ├── lr_scheduler.py
    │   │   │           ├── metric.py
    │   │   │           ├── model_utils.py
    │   │   │           ├── setup_env.py
    │   │   │           └── visualize.py
    │   └── segmentation
    │   │   └── Unet
    │   │       ├── Dockerfile
    │   │       ├── LICENSE
    │   │       ├── README.md
    │   │       ├── config
    │   │           ├── ops_bf16_unet.txt
    │   │           └── ops_fp32_unet.txt
    │   │       ├── data_loading
    │   │           ├── dali_loader.py
    │   │           └── data_module.py
    │   │       ├── data_preprocessing
    │   │           ├── configs.py
    │   │           └── preprocessor.py
    │   │       ├── download.py
    │   │       ├── evaluate.py
    │   │       ├── images
    │   │           └── unet3d.png
    │   │       ├── lightning_trainer
    │   │           ├── __init__.py
    │   │           └── ptl.py
    │   │       ├── main.py
    │   │       ├── models
    │   │           ├── dice.py
    │   │           ├── layers.py
    │   │           ├── loss.py
    │   │           ├── metrics.py
    │   │           ├── monai_sliding_window_inference.py
    │   │           ├── nn_unet.py
    │   │           ├── pl_metric.py
    │   │           └── unet.py
    │   │       ├── preprocess.py
    │   │       ├── pytorch
    │   │           ├── early_stopping_unet.py
    │   │           ├── misc.py
    │   │           ├── npt.py
    │   │           └── trainer.py
    │   │       ├── requirements.txt
    │   │       ├── requirements_u22.txt
    │   │       ├── requirements_u24.txt
    │   │       ├── scripts
    │   │           ├── benchmark.py
    │   │           ├── inference.py
    │   │           └── train.py
    │   │       └── utils
    │   │           ├── __init__.py
    │   │           ├── early_stopping_unet.py
    │   │           ├── gpu_affinity.py
    │   │           ├── logger.py
    │   │           └── utils.py
    ├── examples
    │   ├── DeepSpeed
    │   │   └── cifar_example
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── cifar10_deepspeed.py
    │   │   │   ├── ds_config.json
    │   │   │   ├── requirements.txt
    │   │   │   └── run_ds_habanax8.sh
    │   ├── bucketing
    │   │   ├── README.md
    │   │   ├── brute_force_min_pad_waste.py
    │   │   ├── bucket.py
    │   │   ├── bucket_analysis.svg
    │   │   ├── bucket_analysis_bar_gaussian.svg
    │   │   ├── bucket_analysis_bar_squad.svg
    │   │   ├── bucket_analysis_num_steps_gaussian.svg
    │   │   ├── datasets_library.py
    │   │   ├── gaussian.svg
    │   │   ├── lloyd_max_bucket.py
    │   │   ├── plotting.py
    │   │   ├── requirements.txt
    │   │   ├── run_demo_bucketing_gaussian.py
    │   │   ├── run_demo_bucketing_squad.py
    │   │   ├── run_demo_controlling_num_steps.py
    │   │   ├── run_demo_gaussian.py
    │   │   ├── run_demo_squad.py
    │   │   ├── squad.svg
    │   │   └── test.py
    │   ├── computer_vision
    │   │   └── hello_world
    │   │   │   ├── README.md
    │   │   │   ├── example.py
    │   │   │   ├── mnist.py
    │   │   │   └── utils.py
    │   ├── custom_op
    │   │   ├── custom_fusedsdpa
    │   │   │   ├── README.md
    │   │   │   └── custom_fusedsdpa_op.patch
    │   │   ├── legacy_custom_op_API
    │   │   │   ├── custom_relu
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── custom_relu.py
    │   │   │   │   ├── custom_relu_op.patch
    │   │   │   │   ├── hpu_custom_op_relu_test.py
    │   │   │   │   ├── hpu_custom_relu.cpp
    │   │   │   │   └── setup.py
    │   │   │   └── custom_topk
    │   │   │   │   ├── README.md
    │   │   │   │   ├── hpu_custom_op_topk_test.py
    │   │   │   │   ├── hpu_custom_topk.cpp
    │   │   │   │   └── setup.py
    │   │   └── pt2_custom_op_API
    │   │   │   ├── custom_relu
    │   │   │       ├── README.md
    │   │   │       ├── __init__.py
    │   │   │       ├── custom_relu.py
    │   │   │       ├── custom_relu_op.patch
    │   │   │       ├── hpu_custom_op_relu_test.py
    │   │   │       ├── hpu_custom_relu.cpp
    │   │   │       └── setup.py
    │   │   │   └── custom_topk
    │   │   │       ├── README.md
    │   │   │       ├── hpu_custom_topk.cpp
    │   │   │       ├── setup.py
    │   │   │       └── test_hpu_custom_op_topk.py
    │   ├── gpu_migration
    │   │   ├── README.md
    │   │   ├── computer_vision
    │   │   │   └── classification
    │   │   │   │   └── torchvision
    │   │   │   │       ├── LICENSE
    │   │   │   │       ├── README.md
    │   │   │   │       ├── gpu_migration_logs
    │   │   │   │           └── gpu_migration_958.log
    │   │   │   │       ├── patches
    │   │   │   │           ├── lr_scheduler.diff
    │   │   │   │           ├── minimal_changes.diff
    │   │   │   │           └── performance_improvements.diff
    │   │   │   │       ├── presets.py
    │   │   │   │       ├── sampler.py
    │   │   │   │       ├── train.py
    │   │   │   │       ├── train_quantization.py
    │   │   │   │       ├── transforms.py
    │   │   │   │       └── utils.py
    │   │   ├── generative_models
    │   │   │   └── stable-diffusion
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── README.md
    │   │   │   │   ├── configs
    │   │   │   │       ├── autoencoder
    │   │   │   │       │   ├── autoencoder_kl_16x16x16.yaml
    │   │   │   │       │   ├── autoencoder_kl_32x32x4.yaml
    │   │   │   │       │   ├── autoencoder_kl_64x64x3.yaml
    │   │   │   │       │   └── autoencoder_kl_8x8x64.yaml
    │   │   │   │       ├── latent-diffusion
    │   │   │   │       │   ├── celebahq-ldm-vq-4.yaml
    │   │   │   │       │   ├── cin-ldm-vq-f8.yaml
    │   │   │   │       │   ├── cin256-v2.yaml
    │   │   │   │       │   ├── ffhq-ldm-vq-4.yaml
    │   │   │   │       │   ├── lsun_bedrooms-ldm-vq-4.yaml
    │   │   │   │       │   ├── lsun_churches-ldm-kl-8.yaml
    │   │   │   │       │   └── txt2img-1p4B-eval.yaml
    │   │   │   │       └── stable-diffusion
    │   │   │   │       │   ├── dev.yaml
    │   │   │   │       │   ├── dev_mn.yaml
    │   │   │   │       │   ├── dev_mn_dummy.yaml
    │   │   │   │       │   ├── inpainting
    │   │   │   │       │       ├── v1-edgeinpainting.yaml
    │   │   │   │       │       ├── v1-finetune-for-inpainting-laion-aesthetic-larger-masks-and-ucfg.yaml
    │   │   │   │       │       ├── v1-finetune-for-inpainting-laion-aesthetic-larger-masks.yaml
    │   │   │   │       │       └── v1-finetune-for-inpainting-laion-iaesthe.yaml
    │   │   │   │       │   ├── txt2img-1p4B-multinode-clip-encoder-high-res-512.yaml
    │   │   │   │       │   ├── txt2img-1p4B-multinode-clip-encoder.yaml
    │   │   │   │       │   ├── txt2img-1p4B-multinode-t5-encoder.yaml
    │   │   │   │       │   ├── txt2img-1p4B-multinode.yaml
    │   │   │   │       │   ├── txt2img-clip-encoder-dev.yaml
    │   │   │   │       │   ├── txt2img-ldm-frozen-dev.yaml
    │   │   │   │       │   ├── txt2img-ldm-unfrozen-dev.yaml
    │   │   │   │       │   ├── txt2img-ldm-vae-f8.yaml
    │   │   │   │       │   ├── txt2img-multinode-clip-encoder-f16-1024-laion-hr.yaml
    │   │   │   │       │   ├── txt2img-multinode-clip-encoder-f16-256-pretraining.yaml
    │   │   │   │       │   ├── txt2img-multinode-clip-encoder-f16-768-laion-hr-inference.yaml
    │   │   │   │       │   ├── txt2img-multinode-clip-encoder-f16-768-laion-hr.yaml
    │   │   │   │       │   ├── txt2img-multinode-clip-encoder-f16-768.yaml
    │   │   │   │       │   ├── txt2img-t5-encoder-dev.yaml
    │   │   │   │       │   ├── txt2img-upscale-clip-encoder-f16-1024-dev.yaml
    │   │   │   │       │   ├── txt2img-upscale-clip-encoder-f16-1024.yaml
    │   │   │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-256-dev.yaml
    │   │   │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-256.yaml
    │   │   │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-512-dev.yaml
    │   │   │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-512.yaml
    │   │   │   │       │   ├── upscaling
    │   │   │   │       │       └── upscale-v1-with-f16.yaml
    │   │   │   │       │   ├── v1-inference.yaml
    │   │   │   │       │   ├── v1_improvedaesthetics.yaml
    │   │   │   │       │   ├── v1_laionhr.yaml
    │   │   │   │       │   ├── v2_laionhr1024.yaml
    │   │   │   │       │   ├── v2_laionhr1024_2.yaml
    │   │   │   │       │   ├── v2_pretraining.yaml
    │   │   │   │       │   └── v3_pretraining.yaml
    │   │   │   │   ├── environment.yaml
    │   │   │   │   ├── gpu_migration_logs
    │   │   │   │       └── gpu_migration_1762.log
    │   │   │   │   ├── hpu_graph_utils.py
    │   │   │   │   ├── ldm
    │   │   │   │       ├── data
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── base.py
    │   │   │   │       │   ├── coco.py
    │   │   │   │       │   ├── dummy.py
    │   │   │   │       │   ├── imagenet.py
    │   │   │   │       │   ├── inpainting
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   └── synthetic_mask.py
    │   │   │   │       │   ├── laion.py
    │   │   │   │       │   └── lsun.py
    │   │   │   │       ├── lr_scheduler.py
    │   │   │   │       ├── models
    │   │   │   │       │   ├── autoencoder.py
    │   │   │   │       │   └── diffusion
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── classifier.py
    │   │   │   │       │   │   ├── ddim.py
    │   │   │   │       │   │   ├── ddpm.py
    │   │   │   │       │   │   ├── plms.py
    │   │   │   │       │   │   └── sampling_util.py
    │   │   │   │       ├── modules
    │   │   │   │       │   ├── attention.py
    │   │   │   │       │   ├── diffusionmodules
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── model.py
    │   │   │   │       │   │   ├── openaimodel.py
    │   │   │   │       │   │   └── util.py
    │   │   │   │       │   ├── distributions
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   └── distributions.py
    │   │   │   │       │   ├── ema.py
    │   │   │   │       │   ├── encoders
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   └── modules.py
    │   │   │   │       │   ├── evaluate
    │   │   │   │       │   │   ├── adm_evaluator.py
    │   │   │   │       │   │   ├── evaluate_perceptualsim.py
    │   │   │   │       │   │   ├── frechet_video_distance.py
    │   │   │   │       │   │   ├── ssim.py
    │   │   │   │       │   │   └── torch_frechet_video_distance.py
    │   │   │   │       │   ├── image_degradation
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── bsrgan.py
    │   │   │   │       │   │   ├── bsrgan_light.py
    │   │   │   │       │   │   ├── utils
    │   │   │   │       │   │   │   └── test.png
    │   │   │   │       │   │   └── utils_image.py
    │   │   │   │       │   ├── losses
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── contperceptual.py
    │   │   │   │       │   │   └── vqperceptual.py
    │   │   │   │       │   └── x_transformer.py
    │   │   │   │       └── util.py
    │   │   │   │   ├── main.py
    │   │   │   │   ├── models
    │   │   │   │       ├── first_stage_models
    │   │   │   │       │   ├── kl-f16
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── kl-f32
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── kl-f4
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── kl-f8
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── vq-f16
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── vq-f4-noattn
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── vq-f4
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   ├── vq-f8-n256
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       │   └── vq-f8
    │   │   │   │       │   │   └── config.yaml
    │   │   │   │       └── ldm
    │   │   │   │       │   ├── bsr_sr
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── celeba256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── cin256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── ffhq256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── inpainting_big
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── layout2img-openimages256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── lsun_beds256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── lsun_churches256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── semantic_synthesis256
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   ├── semantic_synthesis512
    │   │   │   │       │       └── config.yaml
    │   │   │   │       │   └── text2img256
    │   │   │   │       │       └── config.yaml
    │   │   │   │   ├── notebook_helpers.py
    │   │   │   │   ├── patches
    │   │   │   │       ├── hpu_graph.diff
    │   │   │   │       ├── minimal_changes.diff
    │   │   │   │       └── randn_to_cpu.diff
    │   │   │   │   ├── requirements.txt
    │   │   │   │   ├── scripts
    │   │   │   │       ├── autoencoder-eval.py
    │   │   │   │       ├── checker.py
    │   │   │   │       ├── cmd_on_new_ckpt.py
    │   │   │   │       ├── demo
    │   │   │   │       │   └── inpainting.py
    │   │   │   │       ├── download_first_stages.sh
    │   │   │   │       ├── download_models.sh
    │   │   │   │       ├── img2img.py
    │   │   │   │       ├── inpaint.py
    │   │   │   │       ├── logging_template.py
    │   │   │   │       ├── mnist-distributed.py
    │   │   │   │       ├── printckpt.py
    │   │   │   │       ├── prompts
    │   │   │   │       │   ├── aesthetic-prompts-plain.txt
    │   │   │   │       │   ├── aesthetic-prompts-surrealism.txt
    │   │   │   │       │   ├── prompts-with-wings.txt
    │   │   │   │       │   ├── six-prompts
    │   │   │   │       │   ├── weird-dalle-prompts.txt
    │   │   │   │       │   ├── wings1.txt
    │   │   │   │       │   ├── wings2.txt
    │   │   │   │       │   ├── wings3.txt
    │   │   │   │       │   └── wings4.txt
    │   │   │   │       ├── prune-ckpt.py
    │   │   │   │       ├── sample_diffusion.py
    │   │   │   │       ├── slurm
    │   │   │   │       │   ├── README.md
    │   │   │   │       │   ├── resume_512
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── resume_512_improvedaesthetic
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── resume_768_hr
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1-upscaling-f16-pretraining-512-aesthetics
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_edgeinpainting
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_iahr_torch111
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_iahr_torch111_ucg
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_improvedaesthetics
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_improvedaesthetics_torch111
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_inpainting_aesthetics-larger-masks-ucg
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_inpainting_aesthetics-larger-masks
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_inpainting_improvedaesthetics_torch111
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v1_laionhr_torch111
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v2_laionhr1024
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v2_laionhr1024_2
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   ├── v2_pretraining
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       │   └── v3_pretraining
    │   │   │   │       │   │   ├── launcher.sh
    │   │   │   │       │   │   └── sbatch.sh
    │   │   │   │       ├── test_gpu.py
    │   │   │   │       ├── test_gpu.sh
    │   │   │   │       ├── txt2img.py
    │   │   │   │       └── vqgan_codebook_visualizer.py
    │   │   │   │   └── setup.py
    │   │   ├── nlp
    │   │   │   ├── DeepSpeedExamples
    │   │   │   │   └── Megatron-DeepSpeed
    │   │   │   │   │   ├── CODEOWNERS
    │   │   │   │   │   ├── LICENSE
    │   │   │   │   │   ├── MANIFEST.in
    │   │   │   │   │   ├── README.md
    │   │   │   │   │   ├── SECURITY.md
    │   │   │   │   │   ├── dataset
    │   │   │   │   │       ├── README.md
    │   │   │   │   │       ├── download_books.sh
    │   │   │   │   │       ├── download_ckpt.sh
    │   │   │   │   │       └── download_vocab.sh
    │   │   │   │   │   ├── examples
    │   │   │   │   │       ├── MoE
    │   │   │   │   │       │   ├── ds_config_gpt_TEMPLATE.json
    │   │   │   │   │       │   ├── ds_config_gpt_Zero2_TEMPLATE.json
    │   │   │   │   │       │   ├── ds_evalharness.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_1.3B_MoE128.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_1.3B_dense.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_1.3B_dense_cl.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_125M_MoE64.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_125M_dense_cl.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_350M_MoE128.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_350M_PR-MoE32or64.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_350M_dense.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_6.7B_dense.sh
    │   │   │   │   │       │   └── readme_evalharness.md
    │   │   │   │   │       ├── README.md
    │   │   │   │   │       ├── azure
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── run-175b.sh
    │   │   │   │   │       │   ├── run-1t.sh
    │   │   │   │   │       │   └── run-benchmark-model.sh
    │   │   │   │   │       ├── azureml
    │   │   │   │   │       │   ├── Dockerfile.dockerfile
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── aml_submit.py
    │   │   │   │   │       │   └── prepare_dataset.py
    │   │   │   │   │       ├── compression
    │   │   │   │   │       │   ├── 125M-Int8-test-64gpu-distilled-group48.sh
    │   │   │   │   │       │   ├── 125M-L10-Int8-test-64gpu-distilled-group48.sh
    │   │   │   │   │       │   ├── 125M-L12-Int8-test-64gpu-distilled-group48.sh
    │   │   │   │   │       │   ├── ds_config_gpt_TEMPLATE.json
    │   │   │   │   │       │   ├── ds_config_gpt_TEMPLATE_compression.json
    │   │   │   │   │       │   ├── ds_evalharness.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_1.3B_dense_cl_kd.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_125M_dense_cl_kd.sh
    │   │   │   │   │       │   ├── ds_pretrain_gpt_125M_dense_kd.sh
    │   │   │   │   │       │   └── ds_pretrain_gpt_350M_dense_kd.sh
    │   │   │   │   │       ├── create_embeddings.sh
    │   │   │   │   │       ├── curriculum_learning
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── ds_pretrain_gpt2.sh
    │   │   │   │   │       │   ├── ds_train.sh
    │   │   │   │   │       │   ├── ds_zero_stage_1_config_baseline.json
    │   │   │   │   │       │   └── ds_zero_stage_1_config_curriculum_fixed_linear.json
    │   │   │   │   │       ├── evaluate_ict_zeroshot_nq.sh
    │   │   │   │   │       ├── evaluate_zeroshot_gpt.sh
    │   │   │   │   │       ├── finetune_mnli_distributed.sh
    │   │   │   │   │       ├── finetune_race_distributed.sh
    │   │   │   │   │       ├── generate_text.sh
    │   │   │   │   │       ├── merge_mp_bert.sh
    │   │   │   │   │       ├── pretrain_bert.sh
    │   │   │   │   │       ├── pretrain_bert_distributed.sh
    │   │   │   │   │       ├── pretrain_bert_distributed_with_mp.sh
    │   │   │   │   │       ├── pretrain_gpt.sh
    │   │   │   │   │       ├── pretrain_gpt3_175B.sh
    │   │   │   │   │       ├── pretrain_gpt_distributed.sh
    │   │   │   │   │       ├── pretrain_gpt_distributed_with_mp.sh
    │   │   │   │   │       ├── pretrain_ict.sh
    │   │   │   │   │       ├── pretrain_t5.sh
    │   │   │   │   │       ├── pretrain_t5_distributed.sh
    │   │   │   │   │       ├── pretrain_t5_distributed_with_mp.sh
    │   │   │   │   │       └── run_deepspeed_example.sh
    │   │   │   │   │   ├── gpu_migration_logs
    │   │   │   │   │       └── gpu_migration_424488.log
    │   │   │   │   │   ├── images
    │   │   │   │   │       └── cases_april2021.png
    │   │   │   │   │   ├── megatron
    │   │   │   │   │       ├── __init__.py
    │   │   │   │   │       ├── arguments.py
    │   │   │   │   │       ├── checkpointing.py
    │   │   │   │   │       ├── data
    │   │   │   │   │       │   ├── Makefile
    │   │   │   │   │       │   ├── __init__.py
    │   │   │   │   │       │   ├── autoaugment.py
    │   │   │   │   │       │   ├── bert_dataset.py
    │   │   │   │   │       │   ├── biencoder_dataset_utils.py
    │   │   │   │   │       │   ├── blendable_dataset.py
    │   │   │   │   │       │   ├── data_samplers.py
    │   │   │   │   │       │   ├── dataset_utils.py
    │   │   │   │   │       │   ├── gpt_dataset.py
    │   │   │   │   │       │   ├── helpers.cpp
    │   │   │   │   │       │   ├── ict_dataset.py
    │   │   │   │   │       │   ├── indexed_dataset.py
    │   │   │   │   │       │   ├── orqa_wiki_dataset.py
    │   │   │   │   │       │   ├── realm_dataset_utils.py
    │   │   │   │   │       │   ├── realm_index.py
    │   │   │   │   │       │   ├── t5_dataset.py
    │   │   │   │   │       │   ├── test
    │   │   │   │   │       │   │   ├── test_indexed_dataset.py
    │   │   │   │   │       │   │   └── test_preprocess_data.sh
    │   │   │   │   │       │   └── vit_dataset.py
    │   │   │   │   │       ├── enums.py
    │   │   │   │   │       ├── fp16_deprecated
    │   │   │   │   │       │   └── loss_scaler.py
    │   │   │   │   │       ├── fused_kernels
    │   │   │   │   │       │   ├── __init__.py
    │   │   │   │   │       │   ├── compat.h
    │   │   │   │   │       │   ├── layer_norm_cuda.cpp
    │   │   │   │   │       │   ├── layer_norm_cuda_kernel.cu
    │   │   │   │   │       │   ├── scaled_masked_softmax.cpp
    │   │   │   │   │       │   ├── scaled_masked_softmax.h
    │   │   │   │   │       │   ├── scaled_masked_softmax_cuda.cu
    │   │   │   │   │       │   ├── scaled_upper_triang_masked_softmax.cpp
    │   │   │   │   │       │   ├── scaled_upper_triang_masked_softmax.h
    │   │   │   │   │       │   ├── scaled_upper_triang_masked_softmax_cuda.cu
    │   │   │   │   │       │   └── type_shim.h
    │   │   │   │   │       ├── global_vars.py
    │   │   │   │   │       ├── indexer.py
    │   │   │   │   │       ├── initialize.py
    │   │   │   │   │       ├── learning_rates.py
    │   │   │   │   │       ├── memory.py
    │   │   │   │   │       ├── microbatches.py
    │   │   │   │   │       ├── model
    │   │   │   │   │       │   ├── __init__.py
    │   │   │   │   │       │   ├── bert_model.py
    │   │   │   │   │       │   ├── biencoder_model.py
    │   │   │   │   │       │   ├── classification.py
    │   │   │   │   │       │   ├── distributed.py
    │   │   │   │   │       │   ├── enums.py
    │   │   │   │   │       │   ├── fused_bias_gelu.py
    │   │   │   │   │       │   ├── fused_layer_norm.py
    │   │   │   │   │       │   ├── fused_softmax.py
    │   │   │   │   │       │   ├── gpt_model.py
    │   │   │   │   │       │   ├── language_model.py
    │   │   │   │   │       │   ├── module.py
    │   │   │   │   │       │   ├── multiple_choice.py
    │   │   │   │   │       │   ├── realm_model.py
    │   │   │   │   │       │   ├── t5_model.py
    │   │   │   │   │       │   ├── transformer.py
    │   │   │   │   │       │   ├── utils.py
    │   │   │   │   │       │   └── vit_model.py
    │   │   │   │   │       ├── mpu
    │   │   │   │   │       │   ├── __init__.py
    │   │   │   │   │       │   ├── cross_entropy.py
    │   │   │   │   │       │   ├── data.py
    │   │   │   │   │       │   ├── initialize.py
    │   │   │   │   │       │   ├── layers.py
    │   │   │   │   │       │   ├── mappings.py
    │   │   │   │   │       │   ├── random.py
    │   │   │   │   │       │   ├── tests
    │   │   │   │   │       │   │   ├── __init__.py
    │   │   │   │   │       │   │   ├── commons.py
    │   │   │   │   │       │   │   ├── test_cross_entropy.py
    │   │   │   │   │       │   │   ├── test_data.py
    │   │   │   │   │       │   │   ├── test_initialize.py
    │   │   │   │   │       │   │   ├── test_layers.py
    │   │   │   │   │       │   │   └── test_random.py
    │   │   │   │   │       │   └── utils.py
    │   │   │   │   │       ├── optimizer
    │   │   │   │   │       │   ├── __init__.py
    │   │   │   │   │       │   ├── clip_grads.py
    │   │   │   │   │       │   ├── grad_scaler.py
    │   │   │   │   │       │   └── optimizer.py
    │   │   │   │   │       ├── p2p_communication.py
    │   │   │   │   │       ├── package_info.py
    │   │   │   │   │       ├── schedules.py
    │   │   │   │   │       ├── text_generation_utils.py
    │   │   │   │   │       ├── tokenizer
    │   │   │   │   │       │   ├── __init__.py
    │   │   │   │   │       │   ├── bert_tokenization.py
    │   │   │   │   │       │   ├── gpt2_tokenization.py
    │   │   │   │   │       │   └── tokenizer.py
    │   │   │   │   │       ├── training.py
    │   │   │   │   │       └── utils.py
    │   │   │   │   │   ├── patches
    │   │   │   │   │       ├── functional_changes.diff
    │   │   │   │   │       └── performance_patch_1.diff
    │   │   │   │   │   ├── pretrain_bert.py
    │   │   │   │   │   ├── pretrain_gpt.py
    │   │   │   │   │   ├── pretrain_ict.py
    │   │   │   │   │   ├── pretrain_t5.py
    │   │   │   │   │   ├── pretrain_vit.py
    │   │   │   │   │   ├── requirements.txt
    │   │   │   │   │   ├── scripts
    │   │   │   │   │       └── run_bloom13b.sh
    │   │   │   │   │   ├── setup.py
    │   │   │   │   │   ├── tasks
    │   │   │   │   │       ├── data_utils.py
    │   │   │   │   │       ├── ensemble_classifier.py
    │   │   │   │   │       ├── eval_harness
    │   │   │   │   │       │   ├── download.py
    │   │   │   │   │       │   ├── evaluate.py
    │   │   │   │   │       │   └── report-to-csv.py
    │   │   │   │   │       ├── eval_utils.py
    │   │   │   │   │       ├── finetune_utils.py
    │   │   │   │   │       ├── glue
    │   │   │   │   │       │   ├── data.py
    │   │   │   │   │       │   ├── finetune.py
    │   │   │   │   │       │   ├── mnli.py
    │   │   │   │   │       │   └── qqp.py
    │   │   │   │   │       ├── main.py
    │   │   │   │   │       ├── orqa
    │   │   │   │   │       │   ├── evaluate_orqa.py
    │   │   │   │   │       │   ├── evaluate_utils.py
    │   │   │   │   │       │   └── natural_questions
    │   │   │   │   │       │   │   ├── nq.py
    │   │   │   │   │       │   │   ├── qa_utils.py
    │   │   │   │   │       │   │   └── tokenizers.py
    │   │   │   │   │       ├── race
    │   │   │   │   │       │   ├── data.py
    │   │   │   │   │       │   └── finetune.py
    │   │   │   │   │       ├── vision
    │   │   │   │   │       │   ├── classification.py
    │   │   │   │   │       │   ├── eval_utils.py
    │   │   │   │   │       │   ├── finetune_utils.py
    │   │   │   │   │       │   └── main.py
    │   │   │   │   │       └── zeroshot_gpt
    │   │   │   │   │       │   ├── datasets.py
    │   │   │   │   │       │   ├── detokenizer.py
    │   │   │   │   │       │   └── evaluate.py
    │   │   │   │   │   ├── tests
    │   │   │   │   │       └── test_basic.py
    │   │   │   │   │   └── tools
    │   │   │   │   │       ├── convert_checkpoint
    │   │   │   │   │           ├── README.md
    │   │   │   │   │           ├── deepspeed_checkpoint.py
    │   │   │   │   │           ├── deepspeed_to_megatron.py
    │   │   │   │   │           ├── deepspeed_to_transformers.py
    │   │   │   │   │           ├── inspect_checkpoint.py
    │   │   │   │   │           └── inspect_deepspeed_checkpoint.py
    │   │   │   │   │       ├── create_doc_index.py
    │   │   │   │   │       ├── generate_samples_gpt.py
    │   │   │   │   │       ├── linter.py
    │   │   │   │   │       ├── merge_mp_partitions.py
    │   │   │   │   │       ├── openwebtext
    │   │   │   │   │           ├── README.md
    │   │   │   │   │           ├── add_id.py
    │   │   │   │   │           ├── blacklist_urls.py
    │   │   │   │   │           ├── cleanup_dataset.py
    │   │   │   │   │           ├── cleanup_fix_dataset.py
    │   │   │   │   │           ├── filter_ngrams.py
    │   │   │   │   │           ├── find_duplicates.py
    │   │   │   │   │           ├── group_duplicate_url.py
    │   │   │   │   │           ├── merge_jsons.py
    │   │   │   │   │           └── remove_group_duplicates.py
    │   │   │   │   │       └── preprocess_data.py
    │   │   │   └── bert
    │   │   │   │   ├── Dockerfile
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── NOTICE
    │   │   │   │   ├── README.md
    │   │   │   │   ├── bert_config.json
    │   │   │   │   ├── bind.sh
    │   │   │   │   ├── bind_pyt.py
    │   │   │   │   ├── checkpoints
    │   │   │   │       └── .keep
    │   │   │   │   ├── configurations.yml
    │   │   │   │   ├── create_pretraining_data.py
    │   │   │   │   ├── data
    │   │   │   │       ├── BooksDownloader.py
    │   │   │   │       ├── BookscorpusTextFormatting.py
    │   │   │   │       ├── Downloader.py
    │   │   │   │       ├── GLUEDownloader.py
    │   │   │   │       ├── GooglePretrainedWeightDownloader.py
    │   │   │   │       ├── NVIDIAPretrainedWeightDownloader.py
    │   │   │   │       ├── SquadDownloader.py
    │   │   │   │       ├── TextSharding.py
    │   │   │   │       ├── WikiDownloader.py
    │   │   │   │       ├── WikicorpusTextFormatting.py
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── bertPrep.py
    │   │   │   │       ├── create_datasets_from_start.sh
    │   │   │   │       └── squad
    │   │   │   │       │   └── squad_download.sh
    │   │   │   │   ├── extract_features.py
    │   │   │   │   ├── file_utils.py
    │   │   │   │   ├── gpu_migration_logs
    │   │   │   │       └── gpu_migration_5494.log
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── modeling.py
    │   │   │   │   ├── optimization.py
    │   │   │   │   ├── patches
    │   │   │   │       ├── minimal_changes.diff
    │   │   │   │       ├── performance_improvements.diff
    │   │   │   │       └── use_packed_dataset.diff
    │   │   │   │   ├── processors
    │   │   │   │       ├── __init__.py
    │   │   │   │       └── glue.py
    │   │   │   │   ├── requirements.txt
    │   │   │   │   ├── results
    │   │   │   │       ├── .keep
    │   │   │   │       ├── checkpoints
    │   │   │   │       │   └── lddl_log
    │   │   │   │       │   │   ├── node-0.txt
    │   │   │   │       │   │   └── node-0_local-0.txt
    │   │   │   │       └── dllogger.json
    │   │   │   │   ├── run.sub
    │   │   │   │   ├── run_glue.py
    │   │   │   │   ├── run_pretraining.py
    │   │   │   │   ├── run_squad.py
    │   │   │   │   ├── run_swag.py
    │   │   │   │   ├── schedulers.py
    │   │   │   │   ├── scripts
    │   │   │   │       ├── configs
    │   │   │   │       │   ├── glue_config.sh
    │   │   │   │       │   ├── pretrain_config.sh
    │   │   │   │       │   └── squad_config.sh
    │   │   │   │       ├── data_download.sh
    │   │   │   │       ├── docker
    │   │   │   │       │   ├── build.sh
    │   │   │   │       │   └── launch.sh
    │   │   │   │       ├── run_glue.sh
    │   │   │   │       ├── run_pretraining.sh
    │   │   │   │       ├── run_squad.sh
    │   │   │   │       └── run_swag.sh
    │   │   │   │   ├── tokenization.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── vocab
    │   │   │   │       └── vocab
    │   │   └── simple_examples
    │   │   │   └── mnist
    │   │   │       ├── LICENSE
    │   │   │       ├── README.md
    │   │   │       ├── gpu_migration_logs
    │   │   │           └── gpu_migration_66.log
    │   │   │       └── main.py
    │   └── multi_tenants
    │   │   ├── README.md
    │   │   └── multi_tenants_resnet_pt.sh
    ├── generative_models
    │   └── stable-diffusion
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── configs
    │   │       ├── autoencoder
    │   │       │   ├── autoencoder_kl_16x16x16.yaml
    │   │       │   ├── autoencoder_kl_32x32x4.yaml
    │   │       │   ├── autoencoder_kl_64x64x3.yaml
    │   │       │   └── autoencoder_kl_8x8x64.yaml
    │   │       ├── latent-diffusion
    │   │       │   ├── celebahq-ldm-vq-4.yaml
    │   │       │   ├── cin-ldm-vq-f8.yaml
    │   │       │   ├── cin256-v2.yaml
    │   │       │   ├── ffhq-ldm-vq-4.yaml
    │   │       │   ├── lsun_bedrooms-ldm-vq-4.yaml
    │   │       │   ├── lsun_churches-ldm-kl-8.yaml
    │   │       │   └── txt2img-1p4B-eval.yaml
    │   │       └── stable-diffusion
    │   │       │   ├── dev.yaml
    │   │       │   ├── dev_mn.yaml
    │   │       │   ├── dev_mn_dummy.yaml
    │   │       │   ├── inpainting
    │   │       │       ├── v1-edgeinpainting.yaml
    │   │       │       ├── v1-finetune-for-inpainting-laion-aesthetic-larger-masks-and-ucfg.yaml
    │   │       │       ├── v1-finetune-for-inpainting-laion-aesthetic-larger-masks.yaml
    │   │       │       └── v1-finetune-for-inpainting-laion-iaesthe.yaml
    │   │       │   ├── txt2img-1p4B-multinode-clip-encoder-high-res-512.yaml
    │   │       │   ├── txt2img-1p4B-multinode-clip-encoder.yaml
    │   │       │   ├── txt2img-1p4B-multinode-t5-encoder.yaml
    │   │       │   ├── txt2img-1p4B-multinode.yaml
    │   │       │   ├── txt2img-clip-encoder-dev.yaml
    │   │       │   ├── txt2img-ldm-frozen-dev.yaml
    │   │       │   ├── txt2img-ldm-unfrozen-dev.yaml
    │   │       │   ├── txt2img-ldm-vae-f8.yaml
    │   │       │   ├── txt2img-multinode-clip-encoder-f16-1024-laion-hr.yaml
    │   │       │   ├── txt2img-multinode-clip-encoder-f16-256-pretraining.yaml
    │   │       │   ├── txt2img-multinode-clip-encoder-f16-768-laion-hr-inference.yaml
    │   │       │   ├── txt2img-multinode-clip-encoder-f16-768-laion-hr.yaml
    │   │       │   ├── txt2img-multinode-clip-encoder-f16-768.yaml
    │   │       │   ├── txt2img-t5-encoder-dev.yaml
    │   │       │   ├── txt2img-upscale-clip-encoder-f16-1024-dev.yaml
    │   │       │   ├── txt2img-upscale-clip-encoder-f16-1024.yaml
    │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-256-dev.yaml
    │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-256.yaml
    │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-512-dev.yaml
    │   │       │   ├── txt2img-v2-clip-encoder-improved_aesthetics-512.yaml
    │   │       │   ├── upscaling
    │   │       │       └── upscale-v1-with-f16.yaml
    │   │       │   ├── v1-inference.yaml
    │   │       │   ├── v1_improvedaesthetics.yaml
    │   │       │   ├── v1_laionhr.yaml
    │   │       │   ├── v2_laionhr1024.yaml
    │   │       │   ├── v2_laionhr1024_2.yaml
    │   │       │   ├── v2_pretraining.yaml
    │   │       │   └── v3_pretraining.yaml
    │   │   ├── cpu_config.yaml
    │   │   ├── environment.yaml
    │   │   ├── hpu_config_web_dataset.yaml
    │   │   ├── ldm
    │   │       ├── data
    │   │       │   ├── __init__.py
    │   │       │   ├── base.py
    │   │       │   ├── coco.py
    │   │       │   ├── dummy.py
    │   │       │   ├── imagenet.py
    │   │       │   ├── inpainting
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── synthetic_mask.py
    │   │       │   ├── laion.py
    │   │       │   └── lsun.py
    │   │       ├── lr_scheduler.py
    │   │       ├── models
    │   │       │   ├── autoencoder.py
    │   │       │   └── diffusion
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── classifier.py
    │   │       │   │   ├── ddim.py
    │   │       │   │   ├── ddpm.py
    │   │       │   │   ├── plms.py
    │   │       │   │   └── sampling_util.py
    │   │       ├── modules
    │   │       │   ├── attention.py
    │   │       │   ├── diffusionmodules
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── model.py
    │   │       │   │   ├── openaimodel.py
    │   │       │   │   └── util.py
    │   │       │   ├── distributions
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── distributions.py
    │   │       │   ├── ema.py
    │   │       │   ├── encoders
    │   │       │   │   ├── __init__.py
    │   │       │   │   └── modules.py
    │   │       │   ├── evaluate
    │   │       │   │   ├── adm_evaluator.py
    │   │       │   │   ├── evaluate_perceptualsim.py
    │   │       │   │   ├── frechet_video_distance.py
    │   │       │   │   ├── ssim.py
    │   │       │   │   └── torch_frechet_video_distance.py
    │   │       │   ├── image_degradation
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── bsrgan.py
    │   │       │   │   ├── bsrgan_light.py
    │   │       │   │   ├── utils
    │   │       │   │   │   └── test.png
    │   │       │   │   └── utils_image.py
    │   │       │   ├── losses
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── contperceptual.py
    │   │       │   │   └── vqperceptual.py
    │   │       │   └── x_transformer.py
    │   │       └── util.py
    │   │   ├── main.py
    │   │   ├── models
    │   │       ├── first_stage_models
    │   │       │   ├── kl-f16
    │   │       │   │   └── config.yaml
    │   │       │   ├── kl-f32
    │   │       │   │   └── config.yaml
    │   │       │   ├── kl-f4
    │   │       │   │   └── config.yaml
    │   │       │   ├── kl-f8
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f16
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f4-noattn
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f4
    │   │       │   │   └── config.yaml
    │   │       │   ├── vq-f8-n256
    │   │       │   │   └── config.yaml
    │   │       │   └── vq-f8
    │   │       │   │   └── config.yaml
    │   │       └── ldm
    │   │       │   ├── bsr_sr
    │   │       │       └── config.yaml
    │   │       │   ├── celeba256
    │   │       │       └── config.yaml
    │   │       │   ├── cin256
    │   │       │       └── config.yaml
    │   │       │   ├── ffhq256
    │   │       │       └── config.yaml
    │   │       │   ├── inpainting_big
    │   │       │       └── config.yaml
    │   │       │   ├── layout2img-openimages256
    │   │       │       └── config.yaml
    │   │       │   ├── lsun_beds256
    │   │       │       └── config.yaml
    │   │       │   ├── lsun_churches256
    │   │       │       └── config.yaml
    │   │       │   ├── semantic_synthesis256
    │   │       │       └── config.yaml
    │   │       │   ├── semantic_synthesis512
    │   │       │       └── config.yaml
    │   │       │   └── text2img256
    │   │       │       └── config.yaml
    │   │   ├── notebook_helpers.py
    │   │   ├── ops_bf16.txt
    │   │   ├── ops_fp32.txt
    │   │   ├── requirements.txt
    │   │   ├── scripts
    │   │       ├── autoencoder-eval.py
    │   │       ├── checker.py
    │   │       ├── cmd_on_new_ckpt.py
    │   │       ├── demo
    │   │       │   └── inpainting.py
    │   │       ├── img2img.py
    │   │       ├── inpaint.py
    │   │       ├── inpaint_sd.py
    │   │       ├── logging_template.py
    │   │       ├── mnist-distributed.py
    │   │       ├── printckpt.py
    │   │       ├── prompts
    │   │       │   ├── aesthetic-prompts-plain.txt
    │   │       │   ├── aesthetic-prompts-surrealism.txt
    │   │       │   ├── prompts-with-wings.txt
    │   │       │   ├── six-prompts
    │   │       │   ├── weird-dalle-prompts.txt
    │   │       │   ├── wings1.txt
    │   │       │   ├── wings2.txt
    │   │       │   ├── wings3.txt
    │   │       │   └── wings4.txt
    │   │       ├── prune-ckpt.py
    │   │       ├── sample_diffusion.py
    │   │       ├── slurm
    │   │       │   ├── README.md
    │   │       │   ├── eval_inpainting
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── resume_512
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── resume_512_improvedaesthetic
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── resume_768_hr
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1-upscaling-f16-pretraining-512-aesthetics
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_edgeinpainting
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_iahr_torch111
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_iahr_torch111_ucg
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_improvedaesthetics
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_improvedaesthetics_torch111
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_inpainting_aesthetics-larger-masks-ucg
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_inpainting_aesthetics-larger-masks
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_inpainting_improvedaesthetics_torch111
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v1_laionhr_torch111
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v2_laionhr1024
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v2_laionhr1024_2
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   ├── v2_pretraining
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       │   └── v3_pretraining
    │   │       │   │   ├── launcher.sh
    │   │       │   │   └── sbatch.sh
    │   │       ├── test_gpu.py
    │   │       ├── test_gpu.sh
    │   │       ├── txt2img.py
    │   │       └── vqgan_codebook_visualizer.py
    │   │   └── setup.py
    └── nlp
    │   ├── DeepSpeedExamples
    │       └── deepspeed-bert
    │       │   ├── LICENSE
    │       │   ├── README.md
    │       │   ├── create_pretraining_data.py
    │       │   ├── data
    │       │       ├── BooksDownloader.py
    │       │       ├── BookscorpusTextFormatting.py
    │       │       ├── Downloader.py
    │       │       ├── GooglePretrainedWeightDownloader.py
    │       │       ├── TextSharding.py
    │       │       ├── WikiDownloader.py
    │       │       ├── WikicorpusTextFormatting.py
    │       │       ├── __init__.py
    │       │       ├── bertPrep.py
    │       │       └── create_datasets_from_start.sh
    │       │   ├── file_utils.py
    │       │   ├── lamb.py
    │       │   ├── lamb_exp.py
    │       │   ├── lans.py
    │       │   ├── modeling.py
    │       │   ├── requirements.txt
    │       │   ├── run_pretraining.py
    │       │   ├── schedulers.py
    │       │   ├── scripts
    │       │       ├── bert_1.5b_config.json
    │       │       ├── bert_5b_config.json
    │       │       ├── deepspeed_config_bert_1.5b.json
    │       │       ├── deepspeed_config_bert_5b_lans.json
    │       │       ├── hostsfile
    │       │       ├── run_bert_1.5b_8x.sh
    │       │       └── run_bert_5b_32x_lans.sh
    │       │   ├── tokenization.py
    │       │   └── utils.py
    │   └── bert
    │       ├── LICENSE
    │       ├── README.md
    │       ├── bert_config.json
    │       ├── bert_config_1.2B.json
    │       ├── create_pretraining_data.py
    │       ├── data
    │           ├── BooksDownloader.py
    │           ├── BookscorpusTextFormatting.py
    │           ├── Downloader.py
    │           ├── GooglePretrainedWeightDownloader.py
    │           ├── TextSharding.py
    │           ├── WikiDownloader.py
    │           ├── WikicorpusTextFormatting.py
    │           ├── __init__.py
    │           ├── bertPrep.py
    │           ├── create_datasets_from_start.sh
    │           └── squad
    │           │   └── squad_download.sh
    │       ├── file_utils.py
    │       ├── lamb.py
    │       ├── modeling.py
    │       ├── optimization.py
    │       ├── pack_pretraining_data_pytorch.py
    │       ├── pytorch_packed_data_checker.py
    │       ├── requirements.txt
    │       ├── run_pretraining.py
    │       ├── run_squad.py
    │       ├── schedulers.py
    │       ├── scripts
    │           └── run_pretraining.sh
    │       ├── tokenization.py
    │       └── utils.py
└── README.md


/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | 
 3 | > :memo: Please include a summary of the changes. 
 4 | >   
 5 | > * List any dependencies that are required for the changes.  
 6 | 
 7 | ## Type of changes
 8 | 
 9 | Please specify the type of changes, and delete the options that are not relevant.
10 | 
11 | - [ ] Documentation update
12 | - [ ] Bug fix (changes which fix an issue)
13 | - [ ] Others (please specify)
14 | 
15 | ## Tests
16 | 
17 | > :memo: Please describe the tests that you ran to verify your changes.
18 | >  
19 | > * Provide the instructions so that we can reproduce.  
20 | > * Please also list any relevant details for your test configuration.  
21 | 
22 | ## Checklist
23 | 
24 | - [ ] I agree with the [Developer Certificate of Origin](https://developercertificate.org/).
25 | - [ ] My code conforms to the following coding guidelines:
26 |   - [ ] Use Python 3
27 |   - [ ] Python code follows [PEP 8 Coding Styles](https://www.python.org/dev/peps/pep-0008/)
28 | - [ ] I have performed a self code review.
29 | - [ ] I have made corresponding changes to the documentation.
30 | - [ ] I have added tests that prove my fix is effective or that my feature works.
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.h5
 2 | *__pycache__*
 3 | *.used
 4 | *.log
 5 | *.pbtxt
 6 | *.pyc
 7 | *.raw
 8 | .falsh*
 9 | graphs/*
10 | *.vscode
11 | build/
12 | *.graph_dumps
13 | .idea/
14 | events.out.tfevents*
15 | *.whl
16 | .gitreview
17 | !PyTorch/examples/gpu_migration/**


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "MLPERF4.0/Inference/llama/tgi-gaudi"]
2 | 	path = MLPERF4.0/Inference/llama/tgi-gaudi
3 | 	url = https://github.com/huggingface/tgi-gaudi.git
4 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/configs/fp8.conf:
--------------------------------------------------------------------------------
1 | *.Offline.min_query_count = 98304
2 | 
3 | *.Server.target_qps = 21.1
4 | *.Server.min_query_count = 24576
5 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/config_meas_maxabs_quant_MAXABS_HW.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "method": "HOOKS",
 3 |     "mode": "QUANTIZE",
 4 |     "observer": "maxabs",
 5 |     "scale_method": "maxabs_hw",
 6 |     "allowlist": {"types": [], "names":  []},
 7 |     "blocklist": {"types": [], "names":  []},
 8 |     "dump_stats_path": "./hqt/llama2-70b-8x/measure",
 9 |     "dump_stats_xlsx_path": "./hqt/fp8stats.xlsx"
10 | }
11 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_0_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_0_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_1_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_1_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_2_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_2_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_3_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_3_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_4_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_4_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_5_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_5_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_6_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_6_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_7_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_7_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_0_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_0_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_1_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_1_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_2_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_2_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_3_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_3_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_4_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_4_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_5_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_5_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_6_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_6_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_7_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/llama/hqt/llama2-70b-8x/measure_hooks_maxabs_MAXABS_HW_7_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/act_maxabs_hw_weights_pcs_maxabs_pow2_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "method": "HOOKS",
 3 |     "mode": "QUANTIZE",
 4 |     "observer": "maxabs_per_channel",
 5 |     "scale_method": "ACT_MAXABS_HW_WEIGHTS_PCS_MAXABS_POW2",
 6 |     "whitelist": {"types": [], "names":  []},
 7 |     "blacklist": {"types": [], "names":  ["lm_head"]},
 8 |     "dump_stats_path": "./llama_output",
 9 |     "dump_stats_xlsx_path": "./run_outputs/fp8stats.xlsx"
10 | }
11 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/maxabs_measure.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "HOOKS",
3 |     "mode": "MEASURE",
4 |     "observer": "maxabs",
5 |     "whitelist": {"types": [], "names":  []},
6 |     "blacklist": {"types": [], "names":  []},
7 |     "dump_stats_path": "./llama_output/7b_measure",
8 |     "dump_stats_xlsx_path": "./llama_output/7b_measure/7b_fp8stats.xlsx"
9 | }


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/maxabs_pcq_measure.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "HOOKS",
3 |     "mode": "MEASURE",
4 |     "observer": "maxabs_per_channel",
5 |     "blacklist": {"types": [], "names":  ["lm_head"]},
6 |     "dump_stats_path": "./llama_output",
7 |     "dump_stats_xlsx_path": "./run_outputs/fp8stats.xlsx"
8 | }
9 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/maxabs_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "method": "HOOKS",
 3 |     "mode": "QUANTIZE",
 4 |     "observer": "maxabs",
 5 |     "scale_method": "maxabs_hw",
 6 |     "whitelist": {"types": [], "names":  []},
 7 |     "blacklist": {"types": [], "names":  ["lm_head"]},
 8 |     "dump_stats_path": "./llama_output/7b_measure",
 9 |     "dump_stats_xlsx_path": "./llama_output/7b_measure/7b_fp8stats.xlsx"
10 | }


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/shape_measure.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "HOOKS",
3 |     "mode": "MEASURE",
4 |     "observer": "shape",
5 |     "blacklist": {"types": [], "names":  ["lm_head"]},
6 |     "dump_stats_path": "./llama_output",
7 |     "dump_stats_xlsx_path": "./run_outputs/fp8stats.xlsx"
8 | }
9 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/unit_scale_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "method": "HOOKS",
 3 |     "mode": "QUANTIZE",
 4 |     "observer": "maxabs",
 5 |     "scale_method": "unit_scale",
 6 |     "whitelist": {"types": [], "names":  []},
 7 |     "blacklist": {"types": [], "names":  []},
 8 |     "dump_stats_path": "./llama_output",
 9 |     "dump_stats_xlsx_path": "./run_outputs/fp8stats.xlsx"
10 | }
11 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/quantization_config/without_scale_quant.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "method": "HOOKS",
 3 |     "mode": "QUANTIZE",
 4 |     "observer": "maxabs",
 5 |     "scale_method": "without_scale",
 6 |     "whitelist": {"types": [], "names":  []},
 7 |     "blacklist": {"types": [], "names":  []},
 8 |     "dump_stats_path": "./llama_output",
 9 |     "dump_stats_xlsx_path": "./run_outputs/fp8stats.xlsx"
10 | }
11 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/requirements.txt:
--------------------------------------------------------------------------------
1 | evaluate
2 | rouge_score
3 | accelerate
4 | pandas
5 | git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0
6 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/llama/setup_tgi.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | apt update -y && apt-get install -y psmisc
 4 | script_dir=$(dirname "$(realpath "${BASH_SOURCE[0]}")")
 5 | pushd $HOME
 6 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 7 | source "$HOME/.cargo/env"
 8 | # install protobuf
 9 | PROTOC_ZIP=protoc-21.12-linux-x86_64.zip
10 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP
11 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc
12 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*'
13 | rm -f $PROTOC_ZIP
14 | # prepare TGI with Gaudi support
15 | cd "$script_dir/tgi-gaudi/"
16 | pushd $HOME
17 | mkdir repos
18 | cp -r "$script_dir/tgi-gaudi/" repos/
19 | # build server
20 | cd repos/tgi-gaudi/server
21 | make gen-server
22 | pip install pip --upgrade
23 | # don't try to overwrite torch
24 | grep -v "torch==" requirements.txt | pip install --no-deps -r /dev/stdin
25 | pip install -e .
26 | # this stoped to be installed by TGI but is still required:
27 | pip install outlines==0.0.36
28 | cd ..
29 | # build router
30 | cd router
31 | cargo install --locked --path .
32 | cd ..
33 | # build launcher
34 | cd launcher
35 | cargo install --locked --path . 
36 | cd ..
37 | popd
38 | pip list
39 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/backend.py:
--------------------------------------------------------------------------------
 1 | """
 2 | abstract backend class
 3 | """
 4 | 
 5 | 
 6 | class Backend:
 7 |     def __init__(self):
 8 |         self.inputs = []
 9 |         self.outputs = []
10 | 
11 |     def version(self):
12 |         raise NotImplementedError("Backend:version")
13 | 
14 |     def name(self):
15 |         raise NotImplementedError("Backend:name")
16 | 
17 |     def load(self, model_path, inputs=None, outputs=None):
18 |         raise NotImplementedError("Backend:load")
19 | 
20 |     def predict(self, feed):
21 |         raise NotImplementedError("Backend:predict")
22 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/backend_debug.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import backend
 3 | 
 4 | 
 5 | class BackendDebug(backend.Backend):
 6 |     def __init__(self, image_size=[3, 1024, 1024], **kwargs):
 7 |         super(BackendDebug, self).__init__()
 8 |         self.image_size = image_size
 9 | 
10 |     def version(self):
11 |         return torch.__version__
12 | 
13 |     def name(self):
14 |         return "debug-SUT"
15 | 
16 |     def image_format(self):
17 |         return "NCHW"
18 | 
19 |     def load(self):
20 |         return self
21 | 
22 |     def predict(self, prompts):
23 |         images = []
24 |         with torch.no_grad():
25 |             for prompt in prompts:
26 |                 image = torch.randn(self.image_size)
27 |                 images.append(image)
28 |         return images
29 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/configs/user.conf:
--------------------------------------------------------------------------------
1 | # The format of this config file is 'key = value'.
2 | # The key has the format 'model.scenario.key'. Value is mostly int64_t.
3 | # Model maybe '*' as wildcard. In that case the value applies to all models.
4 | # All times are in milli seconds
5 | stable-diffusion-xl.Offline.min_query_count = 5000
6 | stable-diffusion-xl.Server.min_query_count = 5000
7 | stable-diffusion-xl.Server.target_qps = 6.38
8 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/evaluation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import json
 4 | import argparse
 5 | 
 6 | def get_args():
 7 |     """Parse commandline."""
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument("--mlperf-accuracy-file", default="build/logs/results.json", help="path to results.json")
10 |     args = parser.parse_args()
11 |     return args
12 | 
13 | def main():
14 |     args = get_args()
15 |     with open(args.mlperf_accuracy_file, "r") as file:
16 |         data = json.load(file)
17 |     
18 |     acc_results = data.get("accuracy_results", {"CLIP_SCORE": 0.0, "FID_SCORE": 0.0})
19 |     args = data.get("args", {})
20 |         
21 |     acc_results["gen_num"] = args["count"]
22 |     print("\nResults\n")
23 |     print(acc_results)
24 | if __name__ == "__main__":
25 |     main()
26 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/requirements.txt:
--------------------------------------------------------------------------------
 1 | diffusers==0.21.2
 2 | transformers==4.42.3
 3 | accelerate==0.23.0
 4 | open-clip-torch==2.7.0
 5 | opencv-python==4.8.1.78
 6 | pycocotools==2.0.7
 7 | torchmetrics[image]==1.2.0
 8 | scipy==1.14.0
 9 | flask==3.0.1
10 | numpy==1.26.3
11 | huggingface-hub==0.25.2
12 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/check_latents.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import argparse
 4 | 
 5 | 
 6 | def get_args():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("--torch-input", type=str, default="latents.pt")
 9 |     parser.add_argument("--numpy-input", type=str, default="latents.npy")
10 |     args = parser.parse_args()
11 |     return args
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     args = get_args()
16 |     torch_latents = torch.load(args.torch_input)
17 |     numpy_latents = torch.Tensor(np.load(args.numpy_input))
18 |     print(f"Torch Latents: {torch_latents}\nShape: {torch_latents.shape}")
19 |     print(f"Numpy Latents: {numpy_latents}\nShape: {numpy_latents.shape}")
20 |     assert torch_latents.shape == numpy_latents.shape
21 |     assert (numpy_latents == torch_latents).all().item()
22 |     print("All tests passed!")
23 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/download-coco-2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | : "${DOWNLOAD_PATH:=../coco2014}"
 4 | : "${MAX_IMAGES:=5000}"
 5 | : "${NUM_WORKERS:=1}"
 6 | 
 7 | while [ "$1" != "" ]; do
 8 |     case $1 in
 9 |         -d | --download-path )       shift
10 |                                      DOWNLOAD_PATH=$1
11 |                                      ;;
12 |     esac
13 |     case $1 in
14 |         -m | --max-images  )        shift
15 |                                       MAX_IMAGES=$1
16 |                                       ;;
17 |     esac
18 |     case $1 in
19 |         -n | --num-workers  )        shift
20 |                                       NUM_WORKERS=$1
21 |                                       ;;
22 |     esac
23 |     shift
24 | done
25 | 
26 | if [ -z ${MAX_IMAGES} ];
27 | then
28 |     python3 coco.py \
29 |         --dataset-dir ${DOWNLOAD_PATH} \
30 |         --num-workers ${NUM_WORKERS}
31 | else
32 |     python3 coco.py \
33 |         --dataset-dir ${DOWNLOAD_PATH} \
34 |         --max-images ${MAX_IMAGES} \
35 |         --num-workers ${NUM_WORKERS}
36 | fi


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/fid/README.md:
--------------------------------------------------------------------------------
1 | This is a copy from https://github.com/mseitzer/pytorch-fid/ with the modifications made here https://github.com/ahmadki/mlperf_sd_inference and some additional modifications for taking as dataset of tensors as input


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/latents.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/latents.npy


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/latents.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/latents.pt


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_0_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_0_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_1_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_1_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_2_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_2_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_3_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_3_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_4_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_4_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_5_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_5_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_6_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_6_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_7_8.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_all/fp8_hooks_maxabs_7_8.npz


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/measure_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "HOOKS",
3 |     "mode": "MEASURE",
4 |     "dump_stats_path": "../tools/quantize/measure_all/fp8"
5 | }
6 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/quant_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "HOOKS",
3 |     "mode": "QUANTIZE",
4 |     "scale_method": "maxabs_hw_opt_weight",
5 |     "dump_stats_path": "tools/quantize/measure_all/fp8"
6 | }


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/quantize/quant_config_bmm.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "HOOKS",
3 |     "mode": "QUANTIZE",
4 |     "scale_method": "maxabs_hw_opt_weight",
5 |     "dump_stats_path": "tools/quantize/measure_all/fp8",
6 |     "blocklist": {"types": ["Linear", "Conv2d", "LoRACompatibleLinear", "LoRACompatibleConv"]}
7 | }
8 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/sample_ids.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | def get_args():
 7 |     """Parse commandline."""
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument(
10 |         "--tsv-path", default="../coco2014/captions/captions_source.tsv", help="Dataset download location"
11 |     )
12 |     parser.add_argument(
13 |         "--output-path", default="sample_ids.txt", help="Dataset download location"
14 |     )
15 |     parser.add_argument(
16 |         "--n", type=int, default=10, help="Dataset download location"
17 |     )
18 |     parser.add_argument(
19 |         "--seed", "-s", type=int, default=926019364, help="Dataset download location"
20 |     )
21 |     args = parser.parse_args()
22 |     return args
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     args = get_args()
27 |     np.random.seed(args.seed)
28 |     df_annotations = pd.read_csv(f"{args.tsv_path}", sep="\t")
29 |     sample_ids = list(np.random.choice(df_annotations.shape[0], args.n))
30 |     with open(args.output_path, "w+") as f:
31 |         for i, sample in enumerate(sample_ids):
32 |             if i != (len(sample_ids)-1):
33 |                 f.write(str(sample) + "\n")
34 |             else:
35 |                 f.write(str(sample))
36 |     


--------------------------------------------------------------------------------
/MLPERF4.0/Inference/stable-diffusion-xl/tools/sample_ids.txt:
--------------------------------------------------------------------------------
 1 | 4459
 2 | 4015
 3 | 2705
 4 | 1682
 5 | 4048
 6 | 4683
 7 | 3757
 8 | 1578
 9 | 3319
10 | 95


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @jeffra @samyam @tjruwase @ShadenSmith @conglongli @awan-10 @cli99 @eltonzheng @minjiaz @RezaYazdaniAminabadi @duli2012 @mrwyattii @yaozhewei @arashb @xiaoxiawu-microsoft @guanhuawang
2 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include megatron/data/Makefile
2 | include megatron/data/helpers.cpp
3 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/dataset/README.md:
--------------------------------------------------------------------------------
1 | # Run the scripts below to setup dataset 
2 | 
3 | bash download_books.sh
4 | 
5 | bash download_vocab.sh
6 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/dataset/download_books.sh:
--------------------------------------------------------------------------------
1 | wget https://the-eye.eu/public/AI/pile_neox/data/BookCorpusDataset_text_document.bin
2 | wget https://the-eye.eu/public/AI/pile_neox/data/BookCorpusDataset_text_document.idx


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/dataset/download_ckpt.sh:
--------------------------------------------------------------------------------
1 | mkdir -p checkpoints/gpt2_345m
2 | 
3 | cd checkpoints/gpt2_345m
4 | wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O megatron_lm_345m_v0.0.zip
5 | unzip megatron_lm_345m_v0.0.zip
6 | rm megatron_lm_345m_v0.0.zip
7 | cd ../..
8 | 
9 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/dataset/download_vocab.sh:
--------------------------------------------------------------------------------
1 | wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json
2 | wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/MoE/ds_config_gpt_TEMPLATE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size" : CONFIG_BATCH_SIZE,
 3 |   "train_micro_batch_size_per_gpu": CONFIG_MBSIZE,
 4 |   "steps_per_print": LOG_INTERVAL,
 5 | 
 6 |   "zero_optimization": {
 7 |     "stage": ZERO_STAGE,
 8 |     "elastic_checkpoint": true
 9 |   },
10 | 
11 |   "gradient_clipping": 1.0,
12 |   "prescale_gradients": PRESCALE_GRAD,
13 | 
14 |   "fp16": {
15 |     "enabled": CONFIG_FP16_ENABLED,
16 |     "loss_scale": 0,
17 |     "loss_scale_window": 500,
18 |     "hysteresis": 2,
19 |     "min_loss_scale": 1,
20 |     "initial_scale_power": 11
21 |   },
22 | 
23 |   "bf16": {
24 |     "enabled": CONFIG_BF16_ENABLED
25 |   },
26 |   "curriculum_learning": {
27 |     "enabled": CONFIG_CL_ENABLED,
28 |     "curriculum_type": "seqlen",
29 |     "min_difficulty": CONFIG_CL_MIN,
30 |     "max_difficulty": CONFIG_CL_MAX,
31 |     "schedule_type": "fixed_linear",
32 |     "schedule_config": {
33 |       "total_curriculum_step": CONFIG_CL_DURATION,
34 |       "difficulty_step": 8
35 |     }
36 |   },
37 | 
38 |   "wall_clock_breakdown" : false
39 | }
40 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/MoE/ds_config_gpt_Zero2_TEMPLATE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size" : CONFIG_BATCH_SIZE,
 3 |   "train_micro_batch_size_per_gpu": CONFIG_MBSIZE,
 4 |   "steps_per_print": LOG_INTERVAL,
 5 | 
 6 |   "zero_optimization": {
 7 |     "stage": 2
 8 |   },
 9 | 
10 |   "gradient_clipping": 1.0,
11 |   "prescale_gradients": false,
12 | 
13 |   "fp16": {
14 |     "enabled": CONFIG_FP16_ENABLED,
15 |     "loss_scale": 0,
16 |     "loss_scale_window": 500,
17 |     "hysteresis": 2,
18 |     "min_loss_scale": 1,
19 |     "initial_scale_power": 11
20 |   },
21 | 
22 |   "bf16": {
23 |     "enabled": CONFIG_BF16_ENABLED
24 |   },
25 |   "curriculum_learning": {
26 |     "enabled": CONFIG_CL_ENABLED,
27 |     "curriculum_type": "seqlen",
28 |     "min_difficulty": CONFIG_CL_MIN,
29 |     "max_difficulty": CONFIG_CL_MAX,
30 |     "schedule_type": "fixed_linear",
31 |     "schedule_config": {
32 |       "total_curriculum_step": CONFIG_CL_DURATION,
33 |       "difficulty_step": 8
34 |     }
35 |   },
36 | 
37 |   "wall_clock_breakdown" : false
38 | }
39 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/README.md:
--------------------------------------------------------------------------------
 1 | ## Recipes and Scripts
 2 | 
 3 | ### Azure
 4 | 
 5 | We strongly recommend to start with AzureML recipe in the ```azureml``` folder.
 6 | 
 7 | If you have a custom infrastructure (e.g. HPC clusters) or Azure VM and VMSS based environments, please refer to the bash scripts in the ```azure``` folder.
 8 | 
 9 | ### MoE
10 | 
11 | Please see the ```MoE``` folder for different training recipes and scripts for Mixture-of-expert based models. 
12 | 
13 | ### Curriculum Learning
14 | 
15 | Curriculum learning recipes are in the ```curriculum_learning``` folder. Please refer to the detailed tutorials linked inside. 
16 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/azureml/Dockerfile.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mcr.microsoft.com/azureml/aifx/stable-ubuntu2004-cu115-py38-torch1110
 2 | 
 3 | USER root:root
 4 | 
 5 | RUN pip install pybind11
 6 | 
 7 | RUN pip install git+https://github.com/microsoft/DeepSpeed.git
 8 | 
 9 | # add a100-topo.xml
10 | RUN mkdir -p /opt/microsoft/
11 | RUN wget -O /opt/microsoft/a100-topo.xml https://hpcbenchmarks.blob.core.windows.net/bookcorpus/data/a100-topo.xml
12 | 
13 | # to use on A100, enable env var below in your job
14 | ENV NCCL_TOPO_FILE="/opt/microsoft/a100-topo.xml"
15 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/azureml/README.md:
--------------------------------------------------------------------------------
 1 | ## Megatron-DeepSpeed on AzureML
 2 | Example script for running Megatron-DeepSpeed using Azure Machine Learning.
 3 | 
 4 | ------
 5 | 
 6 | # Workspace Setup
 7 | Setup an AML workspace. Refer to: [set-up doc](https://github.com/Azure/azureml-examples/tree/main/python-sdk#set-up).
 8 | 
 9 | # Dataset Preparation
10 | Create AML Dataset. To run remote AML job, you need to provide AML FileDataset. 
11 | Refer to [prepare_dataset script](prepare_dataset.py) to upload .bin and .idx files to blob store and on how to create FileDataset.
12 | 
13 | # Training
14 | Run Megatron-DeepSpeed on Azure ML. Refer to [aml_submit script](aml_submit.py).
15 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/compression/ds_config_gpt_TEMPLATE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size" : CONFIG_BATCH_SIZE,
 3 |   "train_micro_batch_size_per_gpu": CONFIG_MBSIZE,
 4 |   "steps_per_print": LOG_INTERVAL,
 5 | 
 6 |   "zero_optimization": {
 7 |     "stage": ZERO_STAGE,
 8 |     "elastic_checkpoint": true
 9 |   },
10 | 
11 |   "gradient_clipping": 1.0,
12 |   "prescale_gradients": PRESCALE_GRAD,
13 | 
14 |   "fp16": {
15 |     "enabled": CONFIG_FP16_ENABLED,
16 |     "loss_scale": 0,
17 |     "loss_scale_window": 500,
18 |     "hysteresis": 2,
19 |     "min_loss_scale": 1,
20 |     "initial_scale_power": 11
21 |   },
22 | 
23 |   "bf16": {
24 |     "enabled": CONFIG_BF16_ENABLED
25 |   },
26 |   "curriculum_learning": {
27 |     "enabled": CONFIG_CL_ENABLED,
28 |     "curriculum_type": "seqlen",
29 |     "min_difficulty": CONFIG_CL_MIN,
30 |     "max_difficulty": CONFIG_CL_MAX,
31 |     "schedule_type": "fixed_linear",
32 |     "schedule_config": {
33 |       "total_curriculum_step": CONFIG_CL_DURATION,
34 |       "difficulty_step": 8
35 |     }
36 |   },
37 | 
38 |   "wall_clock_breakdown" : false
39 | }
40 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/create_embeddings.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compute embeddings for each entry of a given dataset (e.g. Wikipedia)
 4 | 
 5 | RANK=0
 6 | WORLD_SIZE=1
 7 | 
 8 | # Wikipedia data can be downloaded from the following link:
 9 | # https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
10 | EVIDENCE_DATA_DIR=<Specify path of Wikipedia dataset>
11 | EMBEDDING_PATH=<Specify path to store embeddings>
12 | CHECKPOINT_PATH=<Specify path of pretrained ICT model>
13 | 
14 | python tools/create_doc_index.py \
15 |     --num-layers 12 \
16 |     --hidden-size 768 \
17 |     --num-attention-heads 12 \
18 |     --tensor-model-parallel-size 1 \
19 |     --micro-batch-size 128 \
20 |     --checkpoint-activations \
21 |     --seq-length 512 \
22 |     --retriever-seq-length 256 \
23 |     --max-position-embeddings 512 \
24 |     --load ${CHECKPOINT_PATH} \
25 |     --evidence-data-path ${EVIDENCE_DATA_DIR} \
26 |     --embedding-path ${EMBEDDING_PATH} \
27 |     --indexer-log-interval 1000 \
28 |     --indexer-batch-size 128 \
29 |     --vocab-file bert-vocab.txt \
30 |     --num-workers 2 \
31 |     --fp16
32 | 
33 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/curriculum_learning/README.md:
--------------------------------------------------------------------------------
1 | This is an example of how to use DeepSpeed's curriculum learning (CL) feature which provides faster and more stable language model pre-training. Currently it is only integrated for GPT pre-training. Note that there are two curriculum learning examples in two different repos for Megatron-LM GPT-2 pre-training. Both of them have some unique features and limitations. See details in our [tutorial](https://www.deepspeed.ai/tutorials/curriculum-learning/). For technical details please refer to our [paper](https://arxiv.org/abs/2108.06084).


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/curriculum_learning/ds_train.sh:
--------------------------------------------------------------------------------
 1 | # # baseline
 2 | # CONFIG=baseline
 3 | # TAG=baseline
 4 | # MODEL_SIZE=1558
 5 | # LR=1.5e-4
 6 | # BSZ=512
 7 | # SEQ_LEN=1024
 8 | # MP_SIZE=1
 9 | # SEED=1234
10 | # SAVE_INTERVAL=5000
11 | # NUM_ITER=600000
12 | # NUM_TOKEN=157286400000
13 | # LR_DECAY_TOKEN=157286400000
14 | # LR_WARMUP_ITER=3000
15 | # CONFIG_TEMPLATE=false
16 | # CURRICULUM_STEP=0
17 | # CURRICULUM_MIN=0
18 | 
19 | # curriculum learning
20 | CONFIG=curriculum_fixed_linear
21 | MODEL_SIZE=1558
22 | LR=6e-4
23 | BSZ=4096
24 | SEQ_LEN=1024
25 | MP_SIZE=1
26 | SEED=1234
27 | SAVE_INTERVAL=1000
28 | NUM_ITER=75000
29 | NUM_TOKEN=157286400000
30 | LR_DECAY_TOKEN=157286400000
31 | LR_WARMUP_ITER=3000
32 | CONFIG_TEMPLATE=true
33 | CURRICULUM_STEP=45000
34 | CURRICULUM_MIN=64
35 | TAG="${CONFIG}_s${CURRICULUM_MIN}to${SEQ_LEN}_step${CURRICULUM_STEP}"
36 | 
37 | bash ds_pretrain_gpt2.sh $CONFIG $TAG $MODEL_SIZE $LR $BSZ $SEQ_LEN $MP_SIZE $SEED $SAVE_INTERVAL $NUM_ITER $NUM_TOKEN $LR_DECAY_TOKEN $LR_WARMUP_ITER $CONFIG_TEMPLATE $CURRICULUM_STEP $CURRICULUM_MIN
38 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/curriculum_learning/ds_zero_stage_1_config_baseline.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": 512,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 1,
 5 |   "zero_optimization": {
 6 |     "stage": 1
 7 |   },
 8 |   "optimizer": {
 9 |     "type": "Adam",
10 |     "params": {
11 |       "lr": 0.00015,
12 |       "max_grad_norm": 1.0,
13 |       "betas": [0.9, 0.95]
14 |     }
15 |   },
16 |   "gradient_clipping": 1.0,
17 |   "fp16": {
18 |     "enabled": true,
19 |     "loss_scale": 0,
20 |     "loss_scale_window": 1000,
21 |     "hysteresis": 2,
22 |     "min_loss_scale": 1
23 |   },
24 |   "wall_clock_breakdown": false,
25 |   "zero_allow_untested_optimizer": false
26 | }
27 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": 512,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 1,
 5 |   "zero_optimization": {
 6 |     "stage": 1
 7 |   },
 8 |   "optimizer": {
 9 |     "type": "Adam",
10 |     "params": {
11 |       "lr": 0.00015,
12 |       "max_grad_norm": 1.0,
13 |       "betas": [0.9, 0.95]
14 |     }
15 |   },
16 |   "gradient_clipping": 1.0,
17 |   "fp16": {
18 |     "enabled": true,
19 |     "loss_scale": 0,
20 |     "loss_scale_window": 1000,
21 |     "hysteresis": 2,
22 |     "min_loss_scale": 1
23 |   },
24 |   "wall_clock_breakdown": false,
25 |   "zero_allow_untested_optimizer": false,
26 |   "curriculum_learning": {
27 |     "enabled": true,
28 |     "curriculum_type": "seqlen",
29 |     "min_difficulty": CONFIG_CL_MIN,
30 |     "max_difficulty": CONFIG_CL_MAX,
31 |     "schedule_type": "fixed_linear",
32 |     "schedule_config": {
33 |       "total_curriculum_step": CONFIG_CL_DURATION,
34 |       "difficulty_step": 8
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/evaluate_ict_zeroshot_nq.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Evaluate natural question test data given Wikipedia embeddings and pretrained
 4 | # ICT model
 5 | 
 6 | # Datasets can be downloaded from the following link:
 7 | # https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
 8 | 
 9 | EVIDENCE_DATA_DIR=<Specify path of Wikipedia dataset>
10 | EMBEDDING_PATH=<Specify path of the embeddings>
11 | CHECKPOINT_PATH=<Specify path of pretrained ICT model>
12 | 
13 | QA_FILE=<Path of the natural question test dataset>
14 | 
15 | python tasks/main.py \
16 |     --task ICT-ZEROSHOT-NQ \
17 |     --tokenizer-type BertWordPieceLowerCase \
18 |     --num-layers 12 \
19 |     --hidden-size 768 \
20 |     --num-attention-heads 12 \
21 |     --tensor-model-parallel-size 1 \
22 |     --micro-batch-size 128 \
23 |     --checkpoint-activations \
24 |     --seq-length 512 \
25 |     --max-position-embeddings 512 \
26 |     --load ${CHECKPOINT_PATH} \
27 |     --evidence-data-path ${EVIDENCE_DATA_DIR} \
28 |     --embedding-path ${EMBEDDING_PATH} \
29 |     --retriever-seq-length 256 \
30 |     --vocab-file  bert-vocab.txt\
31 |     --qa-data-test ${QA_FILE} \
32 |     --num-workers 2 \
33 |     --faiss-use-gpu \
34 |     --retriever-report-topk-accuracies 1 5 20 100 \
35 |     --fp16
36 | 
37 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/merge_mp_bert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TENSOR_MODEL_PARALLEL_SIZE=2
 4 | 
 5 | VOCAB_FILE=bert-vocab.txt
 6 | CHECKPOINT_PATH=checkpoints/bert_345m
 7 | 
 8 | WORLD_SIZE=$TENSOR_MODEL_PARALLEL_SIZE python tools/merge_mp_partitions.py \
 9 |                                 --model-type BERT \
10 |                                 --tensor-model-parallel-size $TENSOR_MODEL_PARALLEL_SIZE \
11 |                                 --tokenizer-type BertWordPieceLowerCase \
12 |                                 --vocab-file $VOCAB_FILE \
13 |                                 --num-layers 24 \
14 |                                 --hidden-size 1024 \
15 |                                 --num-attention-heads 16 \
16 |                                 --seq-length 512 \
17 |                                 --max-position-embeddings 512 \
18 |                                 --load $CHECKPOINT_PATH
19 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/pretrain_bert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RANK=0
 4 | WORLD_SIZE=1
 5 | DATA_PATH=<Specify path and file prefix>_text_sentence
 6 | CHECKPOINT_PATH=<Specify path>
 7 | 
 8 | python pretrain_bert.py \
 9 |        --num-layers 24 \
10 |        --hidden-size 1024 \
11 |        --num-attention-heads 16 \
12 |        --micro-batch-size 4 \
13 |        --global-batch-size 8 \
14 |        --seq-length 512 \
15 |        --max-position-embeddings 512 \
16 |        --train-iters 2000000 \
17 |        --lr-decay-iters 990000 \
18 |        --save $CHECKPOINT_PATH \
19 |        --load $CHECKPOINT_PATH \
20 |        --data-path $DATA_PATH \
21 |        --vocab-file bert-vocab.txt \
22 |        --data-impl mmap \
23 |        --split 949,50,1 \
24 |        --lr 0.0001 \
25 |        --min-lr 0.00001 \
26 |        --lr-decay-style linear \
27 |        --lr-warmup-fraction .01 \
28 |        --weight-decay 1e-2 \
29 |        --clip-grad 1.0 \
30 |        --log-interval 100 \
31 |        --save-interval 10000 \
32 |        --eval-interval 1000 \
33 |        --eval-iters 10 \
34 |        --fp16
35 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/pretrain_gpt.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Runs the "345M" parameter model
 4 | 
 5 | RANK=0
 6 | WORLD_SIZE=1
 7 | 
 8 | DATA_PATH=<Specify path and file prefix>_text_document
 9 | CHECKPOINT_PATH=<Specify path>
10 | 
11 | 
12 | python pretrain_gpt.py \
13 |        --num-layers 24 \
14 |        --hidden-size 1024 \
15 |        --num-attention-heads 16 \
16 |        --micro-batch-size 4 \
17 |        --global-batch-size 8 \
18 |        --seq-length 1024 \
19 |        --max-position-embeddings 1024 \
20 |        --train-iters 500000 \
21 |        --lr-decay-iters 320000 \
22 |        --save $CHECKPOINT_PATH \
23 |        --load $CHECKPOINT_PATH \
24 |        --data-path $DATA_PATH \
25 |        --vocab-file gpt2-vocab.json \
26 |        --merge-file gpt2-merges.txt \
27 |        --data-impl mmap \
28 |        --split 949,50,1 \
29 |        --distributed-backend nccl \
30 |        --lr 0.00015 \
31 |        --min-lr 1.0e-5 \
32 |        --lr-decay-style cosine \
33 |        --weight-decay 1e-2 \
34 |        --clip-grad 1.0 \
35 |        --lr-warmup-fraction .01 \
36 |        --checkpoint-activations \
37 |        --log-interval 100 \
38 |        --save-interval 10000 \
39 |        --eval-interval 1000 \
40 |        --eval-iters 10 \
41 |        --fp16
42 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/examples/pretrain_t5.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RANK=0
 4 | WORLD_SIZE=1
 5 | DATA_PATH=<Specify path and file prefix>
 6 | VOCAB_FILE=<Specify path to vocab.txt>
 7 | CHECKPOINT_PATH=<Specify path>
 8 | 
 9 | python pretrain_t5.py \
10 |        --num-layers 12 \
11 |        --hidden-size 768 \
12 |        --num-attention-heads 12 \
13 |        --kv-channels 64 \
14 |        --ffn-hidden-size 3072 \
15 |        --encoder-seq-length 512 \
16 |        --decoder-seq-length 128 \
17 |        --micro-batch-size 16 \
18 |        --global-batch-size 2048 \
19 |        --max-position-embeddings 512 \
20 |        --train-iters 1000000 \
21 |        --lr-decay-iters 1000000 \
22 |        --save $CHECKPOINT_PATH \
23 |        --load $CHECKPOINT_PATH \
24 |        --data-path $DATA_PATH \
25 |        --vocab-file $VOCAB_FILE \
26 |        --data-impl mmap \
27 |        --split 949,50,1 \
28 |        --lr 0.0001 \
29 |        --min-lr 0.00001 \
30 |        --lr-decay-style linear \
31 |        --lr-warmup-fraction .01 \
32 |        --weight-decay 1e-2 \
33 |        --clip-grad 1.0 \
34 |        --log-interval 100 \
35 |        --save-interval 10000 \
36 |        --eval-interval 1000 \
37 |        --eval-iters 10 \
38 |        --fp16
39 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/images/cases_april2021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Training/benchmarks/gpt3/images/cases_april2021.png


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/data/Makefile:
--------------------------------------------------------------------------------
 1 | CXXFLAGS += -O3 -Wall -shared -std=c++11 -fPIC -fdiagnostics-color
 2 | CPPFLAGS += $(shell python3 -m pybind11 --includes)
 3 | LIBNAME = helpers
 4 | LIBEXT = $(shell python3-config --extension-suffix)
 5 | 
 6 | default: $(LIBNAME)$(LIBEXT)
 7 | 
 8 | %$(LIBEXT): %.cpp
 9 | 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) $< -o $@
10 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import indexed_dataset
2 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/data/test/test_preprocess_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMPL=cached
 4 | python ../preprocess_data.py \
 5 |        --input test_samples.json \
 6 |        --vocab vocab.txt \
 7 |        --dataset-impl ${IMPL} \
 8 |        --output-prefix test_samples_${IMPL} \
 9 |        --workers 1 \
10 |        --log-interval 2
11 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/enums.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2023 Habana Labs, Ltd. an Intel Company.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import enum
18 | 
19 | class LayerType(enum.Enum):
20 |     encoder = 1
21 |     decoder = 2
22 |  
23 | class AttnType(enum.Enum):
24 |     self_attn = 1
25 |     cross_attn = 2
26 | 
27 | class AttnMaskType(enum.Enum):
28 |     padding = 1
29 |     causal = 2
30 |     prefix = 3
31 | 
32 | class PositionEmbeddingType(enum.Enum):
33 |     rotary = 1
34 |     absolute = 2
35 |     alibi = 3
36 |     learnable = 4


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/fused_kernels/compat.h:
--------------------------------------------------------------------------------
 1 | /* coding=utf-8
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /*This code is copied fron NVIDIA apex:
18 |  *     https://github.com/NVIDIA/apex
19 |  *     with minor changes. */
20 | 
21 | 
22 | 
23 | #ifndef TORCH_CHECK
24 | #define TORCH_CHECK AT_CHECK
25 | #endif
26 | 
27 | #ifdef VERSION_GE_1_3
28 | #define DATA_PTR data_ptr
29 | #else
30 | #define DATA_PTR data
31 | #endif
32 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2023 Habana Labs, Ltd. an Intel Company.
 3 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | from .fused_layer_norm import MixedFusedLayerNorm as LayerNorm
19 | from .rmsnorm import RMSNorm
20 | from .utils import CrossEntropy
21 | 
22 | from .distributed import DistributedDataParallel
23 | from .bert_model import BertModel
24 | from .gpt_model import GPTModel, GPTModelPipe
25 | from .llama_model import LLaMAModel, LLaMAModelPipe
26 | from .t5_model import T5Model
27 | from .language_model import get_language_model
28 | from .module import Float16Module
29 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/model/enums.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import enum
17 | 
18 | class LayerType(enum.Enum):
19 |     encoder = 1
20 |     decoder = 2
21 |  
22 | class AttnType(enum.Enum):
23 |     self_attn = 1
24 |     cross_attn = 2
25 | 
26 | class AttnMaskType(enum.Enum):
27 |     padding = 1
28 |     causal = 2
29 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/model/rmsnorm.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2023, Habana Labs, Ltd. an Intel Company
 3 | 
 4 | import torch
 5 | from torch.nn import init
 6 | from torch.nn.parameter import Parameter
 7 | 
 8 | class RMSNorm(torch.nn.Module):
 9 |     def __init__(self, dim, eps=1e-6, sequence_parallel=False):
10 |         super().__init__()
11 |         self.epsilon = eps
12 |         self.weight = Parameter(torch.Tensor(dim))
13 |         init.ones_(self.weight)
14 | 
15 |         if sequence_parallel:
16 |             # set sequence parallelism flag on weight parameter
17 |             setattr(self.weight, 'sequence_parallel', True)
18 | 
19 |     def forward(self, x):
20 |         dtype = x.dtype
21 |         x = x.float()
22 |         norm = torch.mean(x**2, -1, keepdim=True)
23 |         norm = x.mul(norm.add_(self.epsilon).rsqrt_())
24 |         return self.weight * norm.to(dtype)
25 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Training/benchmarks/gpt3/megatron/mpu/tests/__init__.py


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/megatron/tokenizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .tokenizer import build_tokenizer
18 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/requirements.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/mlcommons/logging.git@2.1.0-rc1
 2 | git+https://github.com/NVIDIA/mlperf-common.git
 3 | pybind11
 4 | torch
 5 | six
 6 | regex
 7 | numpy
 8 | sentencepiece
 9 | torchmetrics>=0.8.0
10 | einops


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tasks/eval_harness/download.py:
--------------------------------------------------------------------------------
 1 | # This code is originally from https://github.com/bigscience-workshop/Megatron-DeepSpeed
 2 | # under the license https://huggingface.co/spaces/bigscience/license
 3 | 
 4 | # Downloads the specified taks in the evaluation harness
 5 | # This is particularly useful when running in environments where the GPU nodes 
 6 | # do not have internet access. This way we can pre-download them and use the cached data-set during evaluation.
 7 | 
 8 | from lm_eval import tasks
 9 | from lm_eval.tasks import ALL_TASKS
10 | import argparse
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='Download evaluation harness', allow_abbrev=False)
15 | parser.add_argument('--task_list', type=str, default = "all", help='Either "all" or comma separated list of tasks to download.')
16 | args = parser.parse_args()
17 | 
18 | def main():
19 |     task_list = ALL_TASKS if args.task_list == 'all' else args.task_list.split(',')
20 |     tasks.get_task_dict(task_list)
21 | 
22 | if __name__ == '__main__':
23 |     main()
24 | 
25 | 
26 |     


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tests/ds_config_bf16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 1,
 3 |   "train_batch_size": 16,
 4 |   "gradient_clipping": 1.0,
 5 |   "zero_optimization": {
 6 |     "stage": 0
 7 |   },
 8 |   "bf16": {
 9 |     "enabled": true
10 |   },
11 |   "zero_allow_untested_optimizer": true,
12 |   "steps_per_print": 2000,
13 |   "wall_clock_breakdown": false
14 | }
15 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tests/test_basic.py:
--------------------------------------------------------------------------------
1 | def test_import():
2 |     import megatron
3 | 
4 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/MLPERF4.0/Training/benchmarks/gpt3/tools/__init__.py


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tools/convert_checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | from .verify_checkpoint_non_tp_consistency import verify_checkpoint


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tools/convert_checkpoint/common_bf16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "optimizer": {
 3 |     "optimizer": {
 4 |       "param_groups": [
 5 |         {
 6 |           "wd_mult": 1.0,
 7 |           "lr_mult": 1.0,
 8 |           "lr": 5.984178321979609e-05,
 9 |           "bias_correction": true,
10 |           "betas": [
11 |             0.9,
12 |             0.95
13 |           ],
14 |           "eps": 1e-08,
15 |           "weight_decay": 0.1,
16 |           "step": 4000
17 |         }
18 |       ]
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tools/convert_checkpoint/inspect_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import sys
 3 | import os
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | def dump_data(datum, name_list=[]):
 8 |     if type(datum) in (dict, OrderedDict):
 9 |         for k, v in datum.items():
10 |             dump_data(v, name_list+[str(k)])
11 |     elif type(datum) in (list, tuple):
12 |         for v in datum:
13 |             dump_data(v, name_list)
14 |     elif torch.is_tensor(datum):
15 |         prefix = '.'.join(name_list)
16 |         print(f'[tensor] {prefix} = {datum.shape}')
17 |     else:
18 |         #pass 
19 |         prefix = '.'.join(name_list)
20 |         print(f'[other] {prefix} = {datum}')
21 | 
22 | def main():
23 |     if len(sys.argv) < 2:
24 |         print(f'Usage: {sys.argv[0]} <checkpoint file>')
25 |         exit(1)
26 | 
27 |     ckpt_file = sys.argv[1]
28 |     if not os.path.isfile(ckpt_file):
29 |         print(f'{ckpt_file} is not a valid file')
30 |         exit(1)
31 | 
32 |     print(f'loading checkpoint file: {ckpt_file}')
33 |     sd = torch.load(ckpt_file, map_location=torch.device('cpu'))
34 |     dump_data(sd)
35 | 
36 |     quit()
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/gpt3/tools/create_doc_index.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
 4 |                                              os.path.pardir)))
 5 | 
 6 | from megatron import print_rank_0
 7 | from megatron.indexer import IndexBuilder
 8 | from megatron.initialize import initialize_megatron
 9 | 
10 | 
11 | def main():
12 |     """Create a BlockData data structure by running an IndexBuilder over an ICT Dataset
13 |     - Include all args needed for initial model specification
14 | 
15 |     Other key args:
16 |         --block-data-path: path to write to
17 |         --ict-load or --realm-load: path to checkpoint with which to embed
18 |         --data-path and --titles-data-path: paths for dataset
19 |         --indexer-log-interval: reporting interval
20 |         --indexer-batch-size: size specific for indexer jobs
21 | 
22 |     Check README.md for example script
23 |     """
24 | 
25 |     initialize_megatron(extra_args_provider=None,
26 |                         args_defaults={'tokenizer_type': 'BertWordPieceLowerCase'})
27 |     index_builder = IndexBuilder()
28 |     index_builder.build_and_save_index()
29 |     print_rank_0("Build and save indices: done!")
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 
34 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/llm_finetune/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "regisss/llama2-70b-fused-qkv-mlperf",
 3 |   "architectures": [
 4 |     "LlamaForCausalLM"
 5 |   ],
 6 |   "fused_qkv": true,
 7 |   "attention_bias": false,
 8 |   "attention_dropout": 0.0,
 9 |   "bos_token_id": 1,
10 |   "eos_token_id": 2,
11 |   "hidden_act": "silu",
12 |   "hidden_size": 8192,
13 |   "initializer_range": 0.02,
14 |   "intermediate_size": 28672,
15 |   "max_position_embeddings": 8192,
16 |   "model_type": "llama",
17 |   "num_attention_heads": 64,
18 |   "num_hidden_layers": 80,
19 |   "num_key_value_heads": 8,
20 |   "pretraining_tp": 1,
21 |   "rms_norm_eps": 1e-05,
22 |   "rope_scaling": null,
23 |   "rope_theta": 10000.0,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "float32",
26 |   "transformers_version": "4.37.2",
27 |   "use_cache": false,
28 |   "vocab_size": 32000
29 | }
30 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/llm_finetune/configs/ds_zero3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "steps_per_print": 1,
 3 |     "train_batch_size": "auto",
 4 |     "train_micro_batch_size_per_gpu": "auto",
 5 |     "gradient_accumulation_steps": "auto",
 6 |     "bf16": {
 7 |         "enabled": true
 8 |     },
 9 |     "gradient_clipping": "auto",
10 |     "zero_optimization": {
11 |         "stage": 3,
12 |       	"overlap_comm": false,
13 |         "contiguous_gradients": false,
14 |         "stage3_gather_16bit_weights_on_model_save": true
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/llm_finetune/ops_bf16.txt:
--------------------------------------------------------------------------------
 1 | addmm
 2 | addbmm
 3 | batch_norm
 4 | baddbmm
 5 | bmm
 6 | conv1d
 7 | conv2d
 8 | conv3d
 9 | conv_transpose1d
10 | conv_transpose2d
11 | conv_transpose3d
12 | dot
13 | dropout
14 | feature_dropout
15 | group_norm
16 | instance_norm
17 | layer_norm
18 | leaky_relu
19 | linear
20 | matmul
21 | mean
22 | mm
23 | mul
24 | mv
25 | softmax
26 | log_softmax
27 | sin
28 | cos
29 | add
30 | div
31 | gather
32 | embedding
33 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/llm_finetune/requirements.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/mlcommons/logging.git
 2 | datasets==2.17.1
 3 | torch >= 1.3
 4 | datasets >= 2.4.0
 5 | sentencepiece != 0.1.92
 6 | protobuf
 7 | evaluate
 8 | scikit-learn
 9 | peft >= 0.10.0
10 | 


--------------------------------------------------------------------------------
/MLPERF4.0/Training/benchmarks/llm_finetune/scripts/create_warmup_data.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Copyright (C) 2024 Habana Labs, Ltd. an Intel Company
 3 | ###############################################################################
 4 | 
 5 | import json
 6 | 
 7 | # Opening JSON file
 8 | import numpy as np
 9 | # returns JSON object as
10 | # a dictionary
11 | 
12 | for i in range(160):
13 |     train_dict = {}
14 |     train_dict["input_ids"]=np.random.randint(8192, size=(8192)).tolist()
15 |     train_dict["labels"]=np.random.randint(8192, size=(8192)).tolist()
16 |     tweets = []
17 |     with open("./train_warmup.json", "a") as outfile:
18 |         json.dump(train_dict, outfile)
19 |         outfile.write('\n')
20 | 
21 | for i in range(173):
22 |     train_dict = {}
23 |     train_dict["input_ids"]=np.random.randint(8192, size=(8192)).tolist()
24 |     train_dict["labels"]=[-100] * 8192
25 |     tweets = []
26 |     with open("./eval_warmup.json", "a") as outfile:
27 |         json.dump(train_dict, outfile)
28 |         outfile.write('\n')
29 | 
30 | # Closing file
31 | 


--------------------------------------------------------------------------------
/PyTorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/__init__.py


--------------------------------------------------------------------------------
/PyTorch/audio/wav2vec2/inference/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.41.1
2 | datasets>=2.8.0,<=2.10.0
3 | soundfile==0.11.0
4 | librosa==0.9.2
5 | jiwer
6 | fsspec==2023.9.2
7 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Habana Labs, Ltd. an Intel Company
 4 | Copyright (c) 2020 jeonsworld
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/img/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/classification/ViT/img/figure1.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/img/figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/classification/ViT/img/figure2.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/img/figure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/classification/ViT/img/figure3.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/ops_bf16.txt:
--------------------------------------------------------------------------------
 1 | add
 2 | addmm
 3 | bmm
 4 | dot
 5 | iadd
 6 | layer_norm
 7 | matmul
 8 | mm
 9 | rsub
10 | softmax
11 | mul
12 | mean
13 | dropout
14 | linear
15 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/ops_fp32.txt:
--------------------------------------------------------------------------------
1 | cross_entropy
2 | log_softmax
3 | embedding
4 | binary_cross_entropy
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | tqdm
3 | scipy
4 | ml-collections
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/ViT/vit_utils/dist_util.py:
--------------------------------------------------------------------------------
 1 | import torch.distributed as dist
 2 | 
 3 | def get_rank():
 4 |     if not dist.is_available():
 5 |         return 0
 6 |     if not dist.is_initialized():
 7 |         return 0
 8 |     return dist.get_rank()
 9 | 
10 | def get_world_size():
11 |     if not dist.is_available():
12 |         return 1
13 |     if not dist.is_initialized():
14 |         return 1
15 |     return dist.get_world_size()
16 | 
17 | def is_main_process():
18 |     return get_rank() == 0
19 | 
20 | def format_step(step):
21 |     if isinstance(step, str):
22 |         return step
23 |     s = ""
24 |     if len(step) > 0:
25 |         s += "Training Epoch: {} ".format(step[0])
26 |     if len(step) > 1:
27 |         s += "Training Iteration: {} ".format(step[1])
28 |     if len(step) > 2:
29 |         s += "Validation Iteration: {} ".format(step[2])
30 |     return s
31 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/media_pipe_settings.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Habana Labs, Ltd. an Intel Company
 2 | 
 3 | TRAIN_RESIZE_DIM = 224
 4 | EVAL_RESIZE_DIM = 256
 5 | CROP_DIM = 224
 6 | 
 7 | DECODER_SCALE_MIN = 0.08
 8 | DECODER_SCALE_MAX = 1.0
 9 | DECODER_RATIO_MIN = 0.75
10 | DECODER_RATIO_MAX = 1.3333333333333333
11 | 
12 | USE_HORIZONTAL_FLIP = 1
13 | FLIP_PROBABILITY = 0.5
14 | 
15 | RGB_MEAN_VALUES = [0.485, 0.456, 0.406]
16 | RGB_STD_VALUES = [0.229, 0.224, 0.225]
17 | RGB_MULTIPLIER = 255
18 | 
19 | EVAL_CROP_X = 0.5
20 | EVAL_CROP_Y = 0.5
21 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/model/utils.py:
--------------------------------------------------------------------------------
1 | try:
2 |     from torch.hub import load_state_dict_from_url
3 | except ImportError:
4 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/ops_bf16_Resnet.txt:
--------------------------------------------------------------------------------
 1 | addmm
 2 | avg_pool2d
 3 | bmm
 4 | conv2d
 5 | dot
 6 | max_pool2d
 7 | mm
 8 | mv
 9 | relu
10 | t
11 | linear


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/ops_fp32_Resnet.txt:
--------------------------------------------------------------------------------
1 | cross_entropy
2 | log_softmax
3 | nll_loss
4 | softmax
5 | topk
6 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/requirements.txt:
--------------------------------------------------------------------------------
1 | mpi4py>=3.0.3
2 | scipy>=1.7.1
3 | colorlog==6.6.0
4 | numpy==1.23.5
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/classification/torchvision/requirements_u24.txt:
--------------------------------------------------------------------------------
1 | mpi4py>=3.0.3
2 | scipy>=1.7.1
3 | colorlog==6.6.0
4 | numpy==1.26.4
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/assets/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/detection/yolox/assets/demo.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/assets/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/detection/yolox/assets/dog.jpg


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/assets/git_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/detection/yolox/assets/git_fig.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/detection/yolox/assets/logo.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/MegEngine/python/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-Python-MegEngine
 2 | 
 3 | Python version of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine).
 4 | 
 5 | ## Tutorial
 6 | 
 7 | ### Step1: install requirements
 8 | 
 9 | ```
10 | python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html
11 | ```
12 | 
13 | ### Step2: convert checkpoint weights from torch's path file
14 | 
15 | ```
16 | python3 convert_weights.py -w yolox_s.pth -o yolox_s_mge.pkl
17 | ```
18 | 
19 | ### Step3: run demo
20 | 
21 | This part is the same as torch's python demo, but no need to specify device.
22 | 
23 | ```
24 | python3 demo.py image -n yolox-s -c yolox_s_mge.pkl --path ../../../assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result
25 | ```
26 | 
27 | ###  [Optional]Step4: dump model for cpp inference
28 | 
29 | > **Note**: result model is dumped with `optimize_for_inference` and `enable_fuse_conv_bias_nonlinearity`.
30 | 
31 | ```
32 | python3 dump.py -n yolox-s -c yolox_s_mge.pkl --dump_path yolox_s.mge
33 | ```
34 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/MegEngine/python/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .darknet import CSPDarknet, Darknet
 6 | from .yolo_fpn import YOLOFPN
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | from .yolox import YOLOX
10 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/MegEngine/python/models/yolox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import megengine.module as M
 6 | 
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | 
10 | 
11 | class YOLOX(M.Module):
12 |     """
13 |     YOLOX model module. The module list is defined by create_yolov3_modules function.
14 |     The network returns loss values from three YOLO layers during training
15 |     and detection results during test.
16 |     """
17 | 
18 |     def __init__(self, backbone=None, head=None):
19 |         super().__init__()
20 |         if backbone is None:
21 |             backbone = YOLOPAFPN()
22 |         if head is None:
23 |             head = YOLOXHead(80)
24 | 
25 |         self.backbone = backbone
26 |         self.head = head
27 | 
28 |     def forward(self, x):
29 |         # fpn output content features of [dark3, dark4, dark5]
30 |         fpn_outs = self.backbone(x)
31 |         assert not self.training
32 |         outputs = self.head(fpn_outs)
33 | 
34 |         return outputs
35 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/OpenVINO/README.md:
--------------------------------------------------------------------------------
1 | ## YOLOX for OpenVINO
2 | 
3 | * [C++ Demo](./cpp)
4 | * [Python Demo](./python)


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/OpenVINO/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4.1)
 2 | set(CMAKE_CXX_STANDARD 14)
 3 | 
 4 | project(yolox_openvino_demo)
 5 | 
 6 | find_package(OpenCV REQUIRED)
 7 | find_package(InferenceEngine REQUIRED)
 8 | find_package(ngraph REQUIRED)
 9 | 
10 | include_directories(
11 |     ${OpenCV_INCLUDE_DIRS}
12 |     ${CMAKE_CURRENT_SOURCE_DIR}
13 |     ${CMAKE_CURRENT_BINARY_DIR}
14 | )
15 | 
16 | add_executable(yolox_openvino yolox_openvino.cpp)
17 | 
18 | target_link_libraries(
19 |      yolox_openvino
20 |     ${InferenceEngine_LIBRARIES}
21 |     ${NGRAPH_LIBRARIES}
22 |     ${OpenCV_LIBS} 
23 | )


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-Android-ncnn
 2 | 
 3 | Andoird app of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn)
 4 | 
 5 | 
 6 | ## Tutorial
 7 | 
 8 | ### Step1
 9 | 
10 | Download ncnn-android-vulkan.zip from [releases of ncnn](https://github.com/Tencent/ncnn/releases). This repo uses
11 | [20210525 release](https://github.com/Tencent/ncnn/releases/download/20210525/ncnn-20210525-android-vulkan.zip) for building.
12 | 
13 | ### Step2
14 | 
15 | After downloading, please extract your zip file. Then, there are two ways to finish this step:
16 | * put your extracted directory into **app/src/main/jni**
17 | * change the **ncnn_DIR** path in **app/src/main/jni/CMakeLists.txt** to your extracted directory
18 | 
19 | ### Step3
20 | Download example param and bin file from [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ESXBH_GSSmFMszWJ6YG2VkQB5cWDfqVWXgk0D996jH0rpQ?e=qzEqUh) or [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz). Unzip the file to **app/src/main/assets**.
21 | 
22 | ### Step4
23 | Open this project with Android Studio, build it and enjoy!
24 | 
25 | ## Reference
26 | 
27 | * [ncnn-android-yolov5](https://github.com/nihui/ncnn-android-yolov5)
28 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/app/build.gradle:
--------------------------------------------------------------------------------
 1 | apply plugin: 'com.android.application'
 2 | 
 3 | android {
 4 |     compileSdkVersion 24
 5 |     buildToolsVersion "29.0.2"
 6 | 
 7 |     defaultConfig {
 8 |         applicationId "com.megvii.yoloXncnn"
 9 |         archivesBaseName = "$applicationId"
10 | 
11 |         ndk {
12 |             moduleName "ncnn"
13 |             abiFilters "armeabi-v7a", "arm64-v8a"
14 |         }
15 |         minSdkVersion 24
16 |     }
17 | 
18 |     externalNativeBuild {
19 |         cmake {
20 |             version "3.10.2"
21 |             path file('src/main/jni/CMakeLists.txt')
22 |         }
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/app/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <manifest xmlns:android="http://schemas.android.com/apk/res/android"
 3 |       package="com.megvii.yoloXncnn"
 4 |       android:versionCode="1"
 5 |       android:versionName="1.1">
 6 |     <application android:label="@string/app_name" >
 7 |         <activity android:name="MainActivity"
 8 |                   android:label="@string/app_name">
 9 |             <intent-filter>
10 |                 <action android:name="android.intent.action.MAIN" />
11 |                 <category android:name="android.intent.category.LAUNCHER" />
12 |             </intent-filter>
13 |         </activity>
14 |     </application>
15 | </manifest> 
16 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/YOLOXncnn.java:
--------------------------------------------------------------------------------
 1 | // Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.
 2 | 
 3 | package com.megvii.yoloXncnn;
 4 | 
 5 | import android.content.res.AssetManager;
 6 | import android.graphics.Bitmap;
 7 | 
 8 | public class YOLOXncnn
 9 | {
10 |     public native boolean Init(AssetManager mgr);
11 | 
12 |     public class Obj
13 |     {
14 |         public float x;
15 |         public float y;
16 |         public float w;
17 |         public float h;
18 |         public String label;
19 |         public float prob;
20 |     }
21 | 
22 |     public native Obj[] Detect(Bitmap bitmap, boolean use_gpu);
23 | 
24 |     static {
25 |         System.loadLibrary("yoloXncnn");
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/yoloXncnn.java:
--------------------------------------------------------------------------------
 1 | // Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.
 2 | 
 3 | package com.megvii.yoloXncnn;
 4 | 
 5 | import android.content.res.AssetManager;
 6 | import android.graphics.Bitmap;
 7 | 
 8 | public class YOLOXncnn
 9 | {
10 |     public native boolean Init(AssetManager mgr);
11 | 
12 |     public class Obj
13 |     {
14 |         public float x;
15 |         public float y;
16 |         public float w;
17 |         public float h;
18 |         public String label;
19 |         public float prob;
20 |     }
21 | 
22 |     public native Obj[] Detect(Bitmap bitmap, boolean use_gpu);
23 | 
24 |     static {
25 |         System.loadLibrary("yoloXncnn");
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/app/src/main/jni/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(yoloXncnn)
 2 | 
 3 | cmake_minimum_required(VERSION 3.4.1)
 4 | 
 5 | set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20210525-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
 6 | find_package(ncnn REQUIRED)
 7 | 
 8 | add_library(yoloXncnn SHARED yoloXncnn_jni.cpp)
 9 | 
10 | target_link_libraries(yoloXncnn
11 |     ncnn
12 | 
13 |     jnigraphics
14 | )
15 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/app/src/main/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <resources>
3 |     <string name="app_name">yoloXncnn</string>
4 | </resources>
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/build.gradle:
--------------------------------------------------------------------------------
 1 | // Top-level build file where you can add configuration options common to all sub-projects/modules.
 2 | buildscript {
 3 |     repositories {
 4 |         jcenter()
 5 |         google()
 6 |     }
 7 |     dependencies {
 8 |         classpath 'com.android.tools.build:gradle:3.5.0'
 9 |     }
10 | }
11 | 
12 | allprojects {
13 |     repositories {
14 |         jcenter()
15 |         google()
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Sun Aug 25 10:34:48 CST 2019
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip
7 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/demo/ncnn/android/settings.gradle:
--------------------------------------------------------------------------------
1 | include ':app'
2 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  * some extra css to make markdown look similar between github/sphinx
 4 |  */
 5 | 
 6 | /*
 7 |  * Below is for install.md:
 8 |  */
 9 |  .rst-content code {
10 |     white-space: pre;
11 |     border: 0px;
12 |   }
13 |   
14 |   .rst-content th {
15 |     border: 1px solid #e1e4e5;
16 |   }
17 |   
18 |   .rst-content th p {
19 |     /* otherwise will be default 24px for regular paragraph */
20 |     margin-bottom: 0px;
21 |   }
22 |   
23 |   .rst-content .line-block {
24 |     /* otherwise will be 24px */
25 |     margin-bottom: 0px;
26 |   }
27 |   
28 |   div.section > details {
29 |     padding-bottom: 1em;
30 |   }
31 |   


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/megengine_cpp_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/MegEngine/cpp/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/megengine_py_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/MegEngine/python/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/ncnn_android_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/ncnn/android/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/ncnn_cpp_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/ncnn/cpp/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/onnx_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/ONNXRuntime/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/openvino_cpp_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/OpenVINO/cpp/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/openvino_py_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/OpenVINO/python/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/trt_cpp_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/TensorRT/cpp/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/demo/trt_py_readme.md:
--------------------------------------------------------------------------------
1 | ../../demo/TensorRT/python/README.md


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Welcome to YOLOX's documentation!
 3 | ======================================
 4 | 
 5 | .. image:: ../assets/logo.png
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :caption: Quick Run
10 |    
11 |    quick_run
12 |    model_zoo
13 | 
14 | .. toctree::
15 |    :maxdepth: 2
16 |    :caption: Tutorials
17 | 
18 |    train_custom_data
19 | 
20 | .. toctree::
21 |    :maxdepth: 2
22 |    :caption: Demployment
23 | 
24 |    demo/trt_py_readme
25 |    demo/trt_cpp_readme
26 |    demo/megengine_cpp_readme
27 |    demo/megengine_py_readme
28 |    demo/ncnn_android_readme
29 |    demo/ncnn_cpp_readme
30 |    demo/onnx_readme
31 |    demo/openvino_py_readme
32 |    demo/openvino_cpp_readme


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/docs/requirements-doc.txt:
--------------------------------------------------------------------------------
1 | docutils==0.16
2 | # https://github.com/sphinx-doc/sphinx/commit/7acd3ada3f38076af7b2b5c9f3b60bb9c2587a3d
3 | sphinx==3.2.0
4 | recommonmark==0.6.0
5 | sphinx_rtd_theme
6 | omegaconf>=2.1.0.dev24
7 | hydra-core>=1.1.0.dev5
8 | sphinx-markdown-tables==0.0.15
9 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/download_dataset.sh:
--------------------------------------------------------------------------------
1 | # Get COCO 2017 data sets
2 | dir=$(pwd)
3 | mkdir -p /data/COCO; cd /data/COCO
4 | curl -O http://images.cocodataset.org/zips/train2017.zip; unzip train2017.zip
5 | curl -O http://images.cocodataset.org/zips/val2017.zip; unzip val2017.zip
6 | curl -O http://images.cocodataset.org/annotations/annotations_trainval2017.zip; unzip annotations_trainval2017.zip
7 | cd $dir
8 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/yolov3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 1.0
16 |         self.width = 1.0
17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
18 | 
19 |     def get_model(self, sublinear=False):
20 |         def init_yolo(M):
21 |             for m in M.modules():
22 |                 if isinstance(m, nn.BatchNorm2d):
23 |                     m.eps = 1e-3
24 |                     m.momentum = 0.03
25 |         if "model" not in self.__dict__:
26 |             from yolox.models import YOLOX, YOLOFPN, YOLOXHead
27 |             backbone = YOLOFPN()
28 |             head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
29 |             self.model = YOLOX(backbone, head)
30 |         self.model.apply(init_yolo)
31 |         self.model.head.initialize_biases(1e-2)
32 | 
33 |         return self.model
34 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/yolox_l.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.0
14 |         self.width = 1.0
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/yolox_m.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.67
14 |         self.width = 0.75
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.50
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/yolox_tiny.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.375
15 |         self.input_size = (416, 416)
16 |         self.mosaic_scale = (0.5, 1.5)
17 |         self.random_size = (10, 20)
18 |         self.test_size = (416, 416)
19 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20 |         self.enable_mixup = False
21 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/default/yolox_x.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.33
14 |         self.width = 1.25
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/exps/example/custom/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | import os
 5 | 
 6 | from yolox.exp import Exp as MyExp
 7 | 
 8 | 
 9 | class Exp(MyExp):
10 |     def __init__(self):
11 |         super(Exp, self).__init__()
12 |         self.depth = 0.33
13 |         self.width = 0.50
14 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
15 | 
16 |         # Define yourself dataset path
17 |         self.data_dir = "datasets/coco128"
18 |         self.train_ann = "instances_train2017.json"
19 |         self.val_ann = "instances_val2017.json"
20 | 
21 |         self.num_classes = 71
22 | 
23 |         self.max_epoch = 300
24 |         self.data_num_workers = 4
25 |         self.eval_interval = 1
26 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/hubconf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | """
 5 | Usage example:
 6 |     import torch
 7 |     model = torch.hub.load("Megvii-BaseDetection/YOLOX", "yolox_s")
 8 | """
 9 | dependencies = ["torch"]
10 | 
11 | from yolox.models import (  # isort:skip  # noqa: F401, E402
12 |     yolox_tiny,
13 |     yolox_nano,
14 |     yolox_s,
15 |     yolox_m,
16 |     yolox_l,
17 |     yolox_x,
18 |     yolov3,
19 | )
20 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/ops_bf16_yolox.txt:
--------------------------------------------------------------------------------
 1 | max_pool2d_with_indices
 2 | max_pool2d
 3 | conv2d
 4 | bmm
 5 | mul
 6 | mm
 7 | mv
 8 | div
 9 | batch_norm
10 | sigmoid
11 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/ops_fp32_yolox.txt:
--------------------------------------------------------------------------------
1 | cross_entropy
2 | log_softmax
3 | nll_loss
4 | softmax
5 | binary_cross_entropy
6 | l1_loss
7 | view
8 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/requirements.txt:
--------------------------------------------------------------------------------
 1 | # TODO: Update with exact module version
 2 | opencv_python
 3 | loguru
 4 | scikit-image
 5 | pillow==10.3.0
 6 | thop
 7 | ninja
 8 | tabulate
 9 | cython
10 | 
11 | # verified versions
12 | # pycocotools corresponds to https://github.com/ppwwyyxx/cocoapi
13 | pycocotools>=2.0.2
14 | 
15 | # no need onnox for hpu enablement
16 | # latest version of thop require onnx
17 | #onnx==1.8.1
18 | onnxruntime==1.18.1
19 | #onnx-simplifier==0.3.5
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 100
 3 | multi_line_output = 3
 4 | balanced_wrapping = True
 5 | known_standard_library = setuptools
 6 | known_third_party = tqdm,loguru
 7 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib,scikit_image
 8 | known_datasets = pycocotools
 9 | known_deeplearning = torch,torchvision,caffe2,onnx,apex,timm,thop,torch2trt,tensorrt,openvino,onnxruntime
10 | known_myself = yolox
11 | sections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER
12 | no_lines_before=STDLIB,THIRDPARTY,datasets
13 | default_section = FIRSTPARTY
14 | 
15 | [flake8]
16 | max-line-length = 100
17 | max-complexity = 18
18 | exclude = __init__.py
19 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/tools/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .utils import configure_module
5 | 
6 | configure_module()
7 | 
8 | __version__ = "0.2.0"
9 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .launch import launch
6 | from .trainer import Trainer
7 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/data/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from .data_augment import TrainTransform, ValTransform
 6 | from .data_prefetcher import DataPrefetcher
 7 | from .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed
 8 | from .datasets import *
 9 | from .samplers import InfiniteSampler, YoloBatchSampler
10 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from .coco import COCODataset
 6 | from .coco_classes import COCO_CLASSES
 7 | from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset
 8 | from .mosaicdetection import MosaicDetection
 9 | from .voc import VOCDetection
10 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/data/datasets/voc_classes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | # VOC_CLASSES = ( '__background__', # always index 0
 6 | VOC_CLASSES = (
 7 |     "aeroplane",
 8 |     "bicycle",
 9 |     "bird",
10 |     "boat",
11 |     "bottle",
12 |     "bus",
13 |     "car",
14 |     "cat",
15 |     "chair",
16 |     "cow",
17 |     "diningtable",
18 |     "dog",
19 |     "horse",
20 |     "motorbike",
21 |     "person",
22 |     "pottedplant",
23 |     "sheep",
24 |     "sofa",
25 |     "train",
26 |     "tvmonitor",
27 | )
28 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .coco_evaluator import COCOEvaluator
6 | from .voc_evaluator import VOCEvaluator
7 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii Inc. All rights reserved.
4 | 
5 | from .base_exp import BaseExp
6 | from .build import get_exp
7 | from .yolox_base import Exp
8 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/exp/default/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | # This file is used for package installation and find default exp file
 6 | 
 7 | import importlib
 8 | import sys
 9 | from pathlib import Path
10 | 
11 | _EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / "exps" / "default"
12 | 
13 | if _EXP_PATH.is_dir():
14 |     # This is true only for in-place installation (pip install -e, setup.py develop),
15 |     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
16 | 
17 |     class _ExpFinder(importlib.abc.MetaPathFinder):
18 |         
19 |         def find_spec(self, name, path, target=None):
20 |             if not name.startswith("yolox.exp.default"):
21 |                 return
22 |             project_name = name.split(".")[-1] + ".py"
23 |             target_file = _EXP_PATH / project_name
24 |             if not target_file.is_file():
25 |                 return
26 |             return importlib.util.spec_from_file_location(name, target_file)
27 | 
28 |     sys.meta_path.append(_ExpFinder())
29 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/layers/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .fast_coco_eval_api import COCOeval_opt
6 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | #include "cocoeval/cocoeval.h"
 2 | 
 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 4 |     m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
 5 |     m.def(
 6 |         "COCOevalEvaluateImages",
 7 |         &COCOeval::EvaluateImages,
 8 |         "COCOeval::EvaluateImages");
 9 |     pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
10 |         .def(pybind11::init<uint64_t, double, double, bool, bool>());
11 |     pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
12 |         .def(pybind11::init<>());
13 | }
14 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | from .build import *
 6 | from .darknet import CSPDarknet, Darknet
 7 | from .losses import IOUloss
 8 | from .yolo_fpn import YOLOFPN
 9 | from .yolo_head import YOLOXHead
10 | from .yolo_pafpn import YOLOPAFPN
11 | from .yolox import YOLOX
12 | try:
13 |     from .yolo_head_script import YOLOXHeadScript
14 | except RuntimeError:
15 |     pass


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | # This file is used for package installation. Script of train/eval/export will be available.
 6 | 
 7 | import importlib
 8 | import sys
 9 | from pathlib import Path
10 | 
11 | _TOOLS_PATH = Path(__file__).resolve().parent.parent.parent / "tools"
12 | 
13 | if _TOOLS_PATH.is_dir():
14 |     # This is true only for in-place installation (pip install -e, setup.py develop),
15 |     # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
16 | 
17 |     class _PathFinder(importlib.abc.MetaPathFinder):
18 | 
19 |         def find_spec(self, name, path, target=None):
20 |             if not name.startswith("yolox.tools."):
21 |                 return
22 |             project_name = name.split(".")[-1] + ".py"
23 |             target_file = _TOOLS_PATH / project_name
24 |             if not target_file.is_file():
25 |                 return
26 |             return importlib.util.spec_from_file_location(name, target_file)
27 | 
28 |     sys.meta_path.append(_PathFinder())
29 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii Inc. All rights reserved.
 4 | 
 5 | from .allreduce_norm import *
 6 | from .boxes import *
 7 | from .checkpoint import load_ckpt, save_checkpoint
 8 | from .compat import meshgrid
 9 | from .demo_utils import *
10 | from .dist import *
11 | from .ema import *
12 | from .logger import WandbLogger, setup_logger
13 | from .lr_scheduler import LRScheduler
14 | from .metric import *
15 | from .model_utils import *
16 | from .setup_env import *
17 | from .visualize import *
18 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/detection/yolox/yolox/utils/compat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | 
 6 | _TORCH_VER = [int(x) for x in torch.__version__.split(".")[:2]]
 7 | 
 8 | __all__ = ["meshgrid"]
 9 | 
10 | 
11 | def meshgrid(*tensors):
12 |     if _TORCH_VER >= [1, 10]:
13 |         return torch.meshgrid(*tensors, indexing="ij")
14 |     else:
15 |         return torch.meshgrid(*tensors)
16 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:21.02-py3
 2 | FROM ${FROM_IMAGE_NAME}
 3 | 
 4 | ADD . /workspace/nnunet_pyt
 5 | WORKDIR /workspace/nnunet_pyt
 6 | 
 7 | RUN pip install --upgrade pip
 8 | RUN pip install --disable-pip-version-check -r requirements.txt
 9 | RUN pip install pytorch-lightning==1.0.0 --no-dependencies
10 | RUN pip install monai==0.4.0 --no-dependencies
11 | RUN pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/ nvidia-dali-cuda110==0.30.0
12 | RUN pip install torch_optimizer==0.0.1a15 --no-dependencies
13 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
14 | RUN unzip -qq awscliv2.zip
15 | RUN ./aws/install
16 | RUN rm -rf awscliv2.zip aws
17 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/config/ops_bf16_unet.txt:
--------------------------------------------------------------------------------
 1 | addmm
 2 | avg_pool2d
 3 | bmm
 4 | conv_transpose1d
 5 | conv_transpose2d
 6 | conv_transpose3d
 7 | conv1d
 8 | conv2d
 9 | conv3d
10 | dot
11 | leaky_relu
12 | linear
13 | matmul
14 | max_pool2d
15 | mm
16 | mv
17 | relu
18 | t


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/config/ops_fp32_unet.txt:
--------------------------------------------------------------------------------
1 | cross_entropy
2 | log_softmax
3 | nll_loss
4 | softmax
5 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/images/unet3d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/segmentation/Unet/images/unet3d.png


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/lightning_trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/segmentation/Unet/lightning_trainer/__init__.py


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/requirements.txt:
--------------------------------------------------------------------------------
 1 | # The extra-index-url is for downloading the nvidia package \
 2 | # It is used only as a fallback mechanism if download fails from \
 3 | # primary source
 4 | --extra-index-url https://developer.download.nvidia.com/compute/redist
 5 | git+https://github.com/NVIDIA/dllogger
 6 | nibabel==3.2.1
 7 | scikit-learn==1.2.1 ; python_version <= '3.8'
 8 | scikit-learn==1.5.0 ; python_version > '3.8'
 9 | pynvml==12.0.0
10 | pytorch_ranger==0.1.1
11 | dropblock==0.3.0
12 | monai==1.4.0
13 | nvidia-dali-cuda110==1.32.0
14 | torch-optimizer==0.0.1a15
15 | scikit-image==0.19.3
16 | awscli
17 | lightning==2.5.1
18 | lightning-habana==1.6.0
19 | numpy==1.24.0
20 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/requirements_u22.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/NVIDIA/dllogger
 2 | nibabel==3.2.1
 3 | scikit-learn==1.5.0
 4 | pynvml==12.0.0
 5 | pytorch_ranger==0.1.1
 6 | dropblock==0.3.0
 7 | monai==1.4.0
 8 | nvidia-dali-cuda110==1.32.0
 9 | torch-optimizer==0.0.1a15
10 | scikit-image==0.19.3
11 | awscli
12 | lightning==2.5.1
13 | lightning-habana==1.6.0
14 | numpy==1.24.0
15 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/requirements_u24.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/NVIDIA/dllogger
 2 | nibabel==3.2.1
 3 | scikit-learn==1.5.0
 4 | pynvml==12.0.0
 5 | pytorch_ranger==0.1.1
 6 | dropblock==0.3.0
 7 | monai==1.4.0
 8 | nvidia-dali-cuda110==1.34.0
 9 | torch-optimizer==0.0.1a15
10 | scikit-image==0.24.0
11 | awscli
12 | lightning==2.5.1
13 | lightning-habana==1.6.0
14 | numpy==1.26.4
15 | 


--------------------------------------------------------------------------------
/PyTorch/computer_vision/segmentation/Unet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/computer_vision/segmentation/Unet/utils/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/DeepSpeed/cifar_example/ds_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": 16,
 3 |   "steps_per_print": 2000,
 4 |   "optimizer": {
 5 |     "type": "Adam",
 6 |     "params": {
 7 |       "lr": 0.001,
 8 |       "betas": [
 9 |         0.8,
10 |         0.999
11 |       ],
12 |       "eps": 1e-8,
13 |       "weight_decay": 3e-7
14 |     }
15 |   },
16 |   "scheduler": {
17 |     "type": "WarmupLR",
18 |     "params": {
19 |       "warmup_min_lr": 0,
20 |       "warmup_max_lr": 0.001,
21 |       "warmup_num_steps": 1000
22 |     }
23 |   },
24 |   "gradient_clipping": 1.0,
25 |   "prescale_gradients": false,
26 |   "bf16": {"enabled": true},
27 |   "fp16": {"enabled": false},
28 |   "wall_clock_breakdown": false,
29 |   "zero_optimization": {
30 |       "stage": 1,
31 |       "allgather_partitions": true,
32 |       "reduce_scatter": true,
33 |       "allgather_bucket_size": 50000000,
34 |       "reduce_bucket_size": 50000000,
35 |       "overlap_comm": true,
36 |       "contiguous_gradients": true,
37 |       "cpu_offload": false
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/PyTorch/examples/DeepSpeed/cifar_example/requirements.txt:
--------------------------------------------------------------------------------
1 | pillow>=7.1.0
2 | matplotlib
3 | torchmetrics>=0.8.0
4 | 


--------------------------------------------------------------------------------
/PyTorch/examples/DeepSpeed/cifar_example/run_ds_habanax8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | PT_HPU_LAZY_MODE=0 deepspeed --num_nodes=1 --num_gpus=8 cifar10_deepspeed.py --deepspeed --deepspeed_config ds_config.json $@
4 | 


--------------------------------------------------------------------------------
/PyTorch/examples/bucketing/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | tqdm
3 | datasets
4 | transformers
5 | pulp
6 | scipy
7 | pytest


--------------------------------------------------------------------------------
/PyTorch/examples/bucketing/run_demo_bucketing_gaussian.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Habana Labs, Ltd. an Intel Company
 2 | 
 3 | import itertools
 4 | from plotting import plot_bucket_analysis_results
 5 | from bucket import bucket_analysis, lp_bucket, const_bucket, uniform_bucket, percentile_bucket, lloyd_max_bucketing, brute_force_min_pad_waste
 6 | from datasets_library import generate_random_gaussian
 7 | 
 8 | shapes = list(itertools.islice(generate_random_gaussian(), 1000))
 9 | results = bucket_analysis(shapes, [("lp_bucket", lp_bucket), ("const_bucket", const_bucket), ("uniform_bucket", uniform_bucket), \
10 |     ("percentile_bucket", percentile_bucket), ("lloyd_max_bucketing", lloyd_max_bucketing), \
11 |         ("brute_force_min_pad_waste", brute_force_min_pad_waste)], [2,3,4,5,6,10,20])
12 | plot_bucket_analysis_results(results, 'bucket_analysis_bar_gaussian.svg')
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/PyTorch/examples/bucketing/run_demo_bucketing_squad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Habana Labs, Ltd. an Intel Company
 2 | 
 3 | import itertools
 4 | from plotting import plot_bucket_analysis_results
 5 | from bucket import bucket_analysis, lp_bucket, const_bucket, uniform_bucket, percentile_bucket, lloyd_max_bucketing, brute_force_min_pad_waste
 6 | from datasets_library import squad
 7 | 
 8 | shapes = squad(4)
 9 | results = bucket_analysis(shapes, [("const_bucket", const_bucket), ("uniform_bucket", uniform_bucket), \
10 |     ("percentile_bucket", percentile_bucket), ("lloyd_max_bucketing", lloyd_max_bucketing), \
11 |         ("brute_force_min_pad_waste", brute_force_min_pad_waste)], [2,3,4,5,6,10,20])
12 | 
13 | plot_bucket_analysis_results(results, "bucket_analysis_bar_squad.svg")
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/PyTorch/examples/bucketing/run_demo_controlling_num_steps.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Habana Labs, Ltd. an Intel Company
 2 | 
 3 | import itertools
 4 | from plotting import plot_bucket_analysis_results
 5 | from bucket import bucket_analysis, lloyd_max_bucketing, brute_force_min_pad_waste
 6 | from datasets_library import generate_random_gaussian
 7 | 
 8 | shapes = list(itertools.islice(generate_random_gaussian(), 10000))
 9 | 
10 | lloyd_max_set_step = lambda step : (lambda shp, num_buckets : lloyd_max_bucketing(shp, num_buckets, step))
11 | bruteforce_set_threshold = lambda th : (lambda shp, num_buckets : brute_force_min_pad_waste(shp, num_buckets, th))
12 | 
13 | 
14 | results = bucket_analysis(shapes, [("lloyd_max_02", lloyd_max_set_step(2)), ("lloyd_max_10", lloyd_max_set_step(10)), ("lloyd_max_20", lloyd_max_set_step(20)), \
15 |             ("lloyd_max_30", lloyd_max_set_step(30)), ("bruteforce_100k", bruteforce_set_threshold(100000)), ("bruteforce_1M", bruteforce_set_threshold(1000000))], [6, 8, 10])
16 | plot_bucket_analysis_results(results, 'bucket_analysis_num_steps_gaussian.svg')
17 | 


--------------------------------------------------------------------------------
/PyTorch/examples/bucketing/run_demo_gaussian.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2023 Habana Labs, Ltd. an Intel Company
 2 | 
 3 | from datasets_library import gaussian, batched_gaussian, batch_by_formula, sample_from_pdf
 4 | from plotting import plotter
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     print("Plotting gaussian")
 9 |     num_samples = 100000
10 |     bs = 4
11 |     gs = gaussian(num_samples)
12 |     orig = batched_gaussian(gs, 1, max)
13 |     max_batch4 = batched_gaussian(gs, bs, max)
14 |     min_batch4 = batched_gaussian(gs, bs, min)
15 |     max_formula_batch4 = sample_from_pdf(batch_by_formula(gs, bs, 'max'), num_samples)
16 |     min_formula_batch4 = sample_from_pdf(batch_by_formula(gs, bs, 'min'), num_samples)
17 |     max_batch32 = batched_gaussian(gs, 8*bs, max)
18 |     min_batch32 = batched_gaussian(gs, 8*bs, min)
19 |     max_formula_batch32 = sample_from_pdf(batch_by_formula(gs, 8*bs, 'max'), num_samples)
20 |     min_formula_batch32 = sample_from_pdf(batch_by_formula(gs, 8*bs, 'min'), num_samples)
21 |     plotter([orig, max_batch4, max_formula_batch4, min_batch4, min_formula_batch4, max_batch32, max_formula_batch32, min_batch32, min_formula_batch32], 'gaussian.svg', ['original', 'bs4_max', 'bs4_max_formula', 'bs4_min', 'bs4_min_formula', 'bs32_max', 'bs32_max_formula', 'bs32_min', 'bs32_min_formula'])


--------------------------------------------------------------------------------
/PyTorch/examples/bucketing/run_demo_squad.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Habana Labs, Ltd. an Intel Company
2 | 
3 | from datasets_library import squad
4 | from plotting import plotter
5 | 
6 | 
7 | if __name__ == '__main__':
8 |     print("Plotting squad, takes 2-3 mins to run")
9 |     plotter([squad(1), squad(4), squad(16), squad(64), squad(256), squad(512)], 'squad.svg', ['bs='+str(bs) for bs in [1,4,16,64,256,512]])


--------------------------------------------------------------------------------
/PyTorch/examples/custom_op/custom_fusedsdpa/README.md:
--------------------------------------------------------------------------------
 1 | ## Table of Contents
 2 | 
 3 | * [Model-References](../../../../README.md)
 4 | * [Applying SDPA CustomOp to Bert NV](#applying-customops-to-a-real-training-model-example)
 5 | 
 6 | A brief description of Scaled Dot Product Attention (SDPA) kernel is provided in 
 7 | [FusedSDPA section](https://docs.habana.ai/en/latest/PyTorch/Python_Packages.html#hpex-kernels-fusedsdpa).
 8 | 
 9 | The usage of the SDPA is demonstrated through the BERT Fine tuning training model.
10 | The changes required to invoke SDPA are available in `custom_fusedsdpa_op.patch`.
11 | The BERT FT model can be patched with `custom_fusedsdpa_op.patch` and trained using SDPA.
12 | 
13 | Below are the steps to patch and run the BERT FT training script. The commands to run the 
14 | training remain unmodified.
15 | 
16 | ## Applying SDPA CustomOp to BERT Fine-Tuning
17 | 
18 | 1. Apply the patch `custom_fusedsdpa_op.patch` to PyTorch/nlp/bert/modeling.py:
19 |    - Go to the main directory in the repository.
20 |    - Run `git apply --verbose PyTorch/examples/custom_op/custom_fusedsdpa/custom_fusedsdpa_op.patch`
21 | 2. Run the model.
22 | 


--------------------------------------------------------------------------------
/PyTorch/examples/custom_op/legacy_custom_op_API/custom_relu/__init__.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright (C) 2020-2021 Habana Labs, Ltd. an Intel Company
 3 | # All Rights Reserved.
 4 | #
 5 | # Unauthorized copying of this file or any element(s) within it, via any medium
 6 | # is strictly prohibited.
 7 | # This file contains Habana Labs, Ltd. proprietary and confidential information
 8 | # and is subject to the confidentiality and license agreements under which it
 9 | # was provided.
10 | #
11 | # ******************************************************************************
12 | 
13 | from .custom_relu import CustomReLU
14 | 
15 | __all__ = [CustomReLU]
16 | 
17 | 


--------------------------------------------------------------------------------
/PyTorch/examples/custom_op/pt2_custom_op_API/custom_relu/__init__.py:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright (C) 2024 Habana Labs, Ltd. an Intel Company
 3 | # All Rights Reserved.
 4 | #
 5 | # Unauthorized copying of this file or any element(s) within it, via any medium
 6 | # is strictly prohibited.
 7 | # This file contains Habana Labs, Ltd. proprietary and confidential information
 8 | # and is subject to the confidentiality and license agreements under which it
 9 | # was provided.
10 | #
11 | # ******************************************************************************
12 | 
13 | from .custom_relu import CustomReLU
14 | 
15 | __all__ = [CustomReLU]
16 | 
17 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (C) 2021 Habana Labs Ltd. an Intel Company
 4 | Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: ldm
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.8.5
 7 |   - pip=20.3
 8 |   - cudatoolkit=11.0
 9 |   - pytorch=1.7.0
10 |   - torchvision=0.8.1
11 |   - numpy=1.19.2
12 |   - pip:
13 |     - albumentations==0.4.3
14 |     - opencv-python==4.1.2.30
15 |     - pudb==2019.2
16 |     - imageio==2.9.0
17 |     - imageio-ffmpeg==0.4.2
18 |     - pytorch-lightning==1.4.2
19 |     - omegaconf==2.1.1
20 |     - test-tube>=0.7.5
21 |     - streamlit>=0.73.1
22 |     - einops==0.3.0
23 |     - torch-fidelity==0.3.0
24 |     - transformers==4.3.1
25 |     - webdataset==0.2.5
26 |     - kornia==0.6
27 |     - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
28 |     - -e git+https://github.com/openai/CLIP.git@main#egg=clip
29 |     - -e .
30 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/data/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/data/dummy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import string
 4 | from torch.utils.data import Dataset, Subset
 5 | 
 6 | class DummyData(Dataset):
 7 |     def __init__(self, length, size):
 8 |         self.length = length
 9 |         self.size = size
10 | 
11 |     def __len__(self):
12 |         return self.length
13 | 
14 |     def __getitem__(self, i):
15 |         x = np.random.randn(*self.size)
16 |         letters = string.ascii_lowercase
17 |         y = ''.join(random.choice(string.ascii_lowercase) for i in range(10))
18 |         return {"jpg": x, "txt": y}
19 | 
20 | 
21 | class DummyDataWithEmbeddings(Dataset):
22 |     def __init__(self, length, size, emb_size):
23 |         self.length = length
24 |         self.size = size
25 |         self.emb_size = emb_size
26 | 
27 |     def __len__(self):
28 |         return self.length
29 | 
30 |     def __getitem__(self, i):
31 |         x = np.random.randn(*self.size)
32 |         y = np.random.randn(*self.emb_size).astype(np.float32)
33 |         return {"jpg": x, "txt": y}
34 | 
35 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/data/inpainting/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/data/inpainting/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr
2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light
3 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/image_degradation/utils/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/image_degradation/utils/test.png


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/ldm/modules/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.losses.contperceptual import LPIPSWithDiscriminator


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/models/first_stage_models/kl-f16/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 16
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 16
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 1
23 |       - 2
24 |       - 2
25 |       - 4
26 |       num_res_blocks: 2
27 |       attn_resolutions:
28 |       - 16
29 |       dropout: 0.0
30 | data:
31 |   target: main.DataModuleFromConfig
32 |   params:
33 |     batch_size: 6
34 |     wrap: true
35 |     train:
36 |       target: ldm.data.openimages.FullOpenImagesTrain
37 |       params:
38 |         size: 384
39 |         crop_size: 256
40 |     validation:
41 |       target: ldm.data.openimages.FullOpenImagesValidation
42 |       params:
43 |         size: 384
44 |         crop_size: 256
45 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/models/first_stage_models/kl-f32/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 64
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 64
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 1
23 |       - 2
24 |       - 2
25 |       - 4
26 |       - 4
27 |       num_res_blocks: 2
28 |       attn_resolutions:
29 |       - 16
30 |       - 8
31 |       dropout: 0.0
32 | data:
33 |   target: main.DataModuleFromConfig
34 |   params:
35 |     batch_size: 6
36 |     wrap: true
37 |     train:
38 |       target: ldm.data.openimages.FullOpenImagesTrain
39 |       params:
40 |         size: 384
41 |         crop_size: 256
42 |     validation:
43 |       target: ldm.data.openimages.FullOpenImagesValidation
44 |       params:
45 |         size: 384
46 |         crop_size: 256
47 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/models/first_stage_models/kl-f4/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 3
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 3
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 2
23 |       - 4
24 |       num_res_blocks: 2
25 |       attn_resolutions: []
26 |       dropout: 0.0
27 | data:
28 |   target: main.DataModuleFromConfig
29 |   params:
30 |     batch_size: 10
31 |     wrap: true
32 |     train:
33 |       target: ldm.data.openimages.FullOpenImagesTrain
34 |       params:
35 |         size: 384
36 |         crop_size: 256
37 |     validation:
38 |       target: ldm.data.openimages.FullOpenImagesValidation
39 |       params:
40 |         size: 384
41 |         crop_size: 256
42 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/models/first_stage_models/kl-f8/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 4
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 2
23 |       - 4
24 |       - 4
25 |       num_res_blocks: 2
26 |       attn_resolutions: []
27 |       dropout: 0.0
28 | data:
29 |   target: main.DataModuleFromConfig
30 |   params:
31 |     batch_size: 4
32 |     wrap: true
33 |     train:
34 |       target: ldm.data.openimages.FullOpenImagesTrain
35 |       params:
36 |         size: 384
37 |         crop_size: 256
38 |     validation:
39 |       target: ldm.data.openimages.FullOpenImagesValidation
40 |       params:
41 |         size: 384
42 |         crop_size: 256
43 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/models/first_stage_models/vq-f4-noattn/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 3
 6 |     n_embed: 8192
 7 |     monitor: val/rec_loss
 8 | 
 9 |     ddconfig:
10 |       attn_type: none
11 |       double_z: false
12 |       z_channels: 3
13 |       resolution: 256
14 |       in_channels: 3
15 |       out_ch: 3
16 |       ch: 128
17 |       ch_mult:
18 |       - 1
19 |       - 2
20 |       - 4
21 |       num_res_blocks: 2
22 |       attn_resolutions: []
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_start: 11
30 |         disc_weight: 0.75
31 |         codebook_weight: 1.0
32 | 
33 | data:
34 |   target: main.DataModuleFromConfig
35 |   params:
36 |     batch_size: 8
37 |     num_workers: 12
38 |     wrap: true
39 |     train:
40 |       target: ldm.data.openimages.FullOpenImagesTrain
41 |       params:
42 |         crop_size: 256
43 |     validation:
44 |       target: ldm.data.openimages.FullOpenImagesValidation
45 |       params:
46 |         crop_size: 256
47 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/models/first_stage_models/vq-f4/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 3
 6 |     n_embed: 8192
 7 |     monitor: val/rec_loss
 8 | 
 9 |     ddconfig:
10 |       double_z: false
11 |       z_channels: 3
12 |       resolution: 256
13 |       in_channels: 3
14 |       out_ch: 3
15 |       ch: 128
16 |       ch_mult:
17 |       - 1
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions: []
22 |       dropout: 0.0
23 |     lossconfig:
24 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
25 |       params:
26 |         disc_conditional: false
27 |         disc_in_channels: 3
28 |         disc_start: 0
29 |         disc_weight: 0.75
30 |         codebook_weight: 1.0
31 | 
32 | data:
33 |   target: main.DataModuleFromConfig
34 |   params:
35 |     batch_size: 8
36 |     num_workers: 16
37 |     wrap: true
38 |     train:
39 |       target: ldm.data.openimages.FullOpenImagesTrain
40 |       params:
41 |         crop_size: 256
42 |     validation:
43 |       target: ldm.data.openimages.FullOpenImagesValidation
44 |       params:
45 |         crop_size: 256
46 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/patches/minimal_changes.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/scripts/txt2img.py b/scripts/txt2img.py
 2 | index ef52ee0..d7791b6 100644
 3 | --- a/scripts/txt2img.py
 4 | +++ b/scripts/txt2img.py
 5 | @@ -263,6 +263,8 @@
 6 |                                  base_count += 1
 7 |                          all_samples.append(x_samples_ddim)
 8 |  
 9 | +                        import habana_frameworks.torch.core as htcore
10 | +                        htcore.mark_step()
11 |                  if not opt.skip_grid:
12 |                      # additionally, save as grid
13 |                      grid = torch.stack(all_samples, 0)
14 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/patches/randn_to_cpu.diff:
--------------------------------------------------------------------------------
 1 | diff --git a/ldm/models/diffusion/ddim.py b/ldm/models/diffusion/ddim.py
 2 | index aa3fbec..894f258 100644
 3 | --- a/ldm/models/diffusion/ddim.py
 4 | +++ b/ldm/models/diffusion/ddim.py
 5 | @@ -125,7 +125,8 @@ class DDIMSampler(object):
 6 |          device = self.model.betas.device
 7 |          b = shape[0]
 8 |          if x_T is None:
 9 | -            img = torch.randn(shape, device=device)
10 | +            img = torch.randn(shape, device=torch.device("cpu"))
11 | +            img = torch.tensor(img, device=device).clone().detach()
12 |          else:
13 |              img = x_T
14 |  
15 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/requirements.txt:
--------------------------------------------------------------------------------
 1 | albumentations==0.4.3
 2 | opencv-python
 3 | pudb==2019.2
 4 | imageio==2.9.0
 5 | imageio-ffmpeg==0.4.2
 6 | omegaconf==2.1.1
 7 | test-tube>=0.7.5
 8 | streamlit>=0.73.1
 9 | einops==0.3.0
10 | torch-fidelity==0.3.0
11 | transformers==4.38.0
12 | webdataset==0.2.5
13 | -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
14 | -e git+https://github.com/openai/CLIP.git@main#egg=clip
15 | -e .
16 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/cmd_on_new_ckpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import time
 4 | import fire
 5 | 
 6 | 
 7 | class Checker(object):
 8 |     def __init__(self, filename, interval=60):
 9 |         self._cached_stamp = 0
10 |         self.filename = filename
11 |         self.interval = interval
12 |     
13 |     def check(self, cmd):
14 |         while True:
15 |             stamp = os.stat(self.filename).st_mtime
16 |             if stamp != self._cached_stamp:
17 |                 self._cached_stamp = stamp
18 |                 print(f"{self.__class__.__name__}: Detected a new file at {self.filename}, running evaluation commands on it.")
19 |                 subprocess.run(cmd, shell=True)
20 |             else:
21 |                 time.sleep(self.interval)
22 | 
23 | 
24 | def run(filename, cmd):
25 |     checker = Checker(filename, interval=60)
26 |     checker.check(cmd)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     fire.Fire(run)
31 |                 
32 | 
33 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/printckpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import fire
 4 | 
 5 | 
 6 | def printit(p):
 7 |     print(f"printin' in path: {p}")
 8 |     size_initial = os.path.getsize(p)
 9 |     nsd = dict()
10 |     sd = torch.load(p, map_location="cpu")
11 |     if "global_step" in sd:
12 |         print(f"This is global step {sd['global_step']}.")
13 |     if "model_ema.num_updates" in sd["state_dict"]:
14 |         print(f"And we got {sd['state_dict']['model_ema.num_updates']} EMA updates.")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     fire.Fire(printit)
19 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/prompts/six-prompts:
--------------------------------------------------------------------------------
1 | the Tower of Babel by J.M.W. Turner
2 | advertisement for a psychedelic virtual reality headset, 16 bit sprite pixel art
3 | the gateway between dreams, trending on ArtStation
4 | Humanity is killed by AI, by James Gurney
5 | A fantasy painting of a city in a deep valley by Ivan Aivazovsky
6 | Darth Vader at Woodstock (1969)
7 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/prompts/weird-dalle-prompts.txt:
--------------------------------------------------------------------------------
 1 | # TODO, check out Twitter.
 2 | Darth Vader at Woodstock (1969)
 3 | Bunny Vikings
 4 | The Demogorgon from Stranger Thinhs holding a basketball
 5 | Hamster in my microwave
 6 | a courtroom sketch of a Ford Transit van
 7 | PS1 Hagrid at MCDonalds
 8 | Karl Marx in KFC Logo
 9 | Moai Statue giving a TED talk
10 | wahing machine trail cam
11 | minions at cross burning
12 | Hindenburg disaster in Fortnite


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/prompts/wings1.txt:
--------------------------------------------------------------------------------
 1 | A portrait of Abraham Lincoln
 2 | A portrait of Barack Obama
 3 | A portrait of a nekomimi girl smiling
 4 | a portrait of isaac newton the alchemist
 5 | A portrait of Friedrich Nietzsche wearing an open double breasted suit with a bowtie
 6 | Portrait of a cyberpunk cyborg man wearing alternate reality goggles
 7 | Portrait of a woman screaming
 8 | A portrait of a man in a flight jacket leaning against a biplane
 9 | a cold landscape by Albert Bierstadt
10 | the monument of the ancients by van gogh
11 | the universal library
12 | a vision of paradise. unreal engine
13 | matte painting of cozy underground bunker wholefoods aisle, trending on artstation
14 | illustration of wooly mammoths reclaiming the arctic, trending on artstation
15 | a mountain range in the desert, Provia, Velvia
16 | the gateway between dreams, trending on ArtStation
17 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/prompts/wings2.txt:
--------------------------------------------------------------------------------
 1 | a cityscape at night
 2 | starry night by cyberpunk
 3 | A fantasy painting of a city in a deep valley by Ivan Aivazovsky
 4 | An oil painting of The New York City Skyline by Natalia Goncharova
 5 | a rainy city street in the style of cyberpunk noir, trending on ArtStation
 6 | an astral city in the style of cyberpunk noir art deco
 7 | The Golden Gate Bridge in the style of art deco
 8 | a city on a 70s science fiction novel cover
 9 | An oil painting of A Vase Of Flowers
10 | still life oil painting of a smooth silver steel tungsten square cube box by Albrecht DÃ¼rer
11 | An oil painting of a bookshelf crammed with books, trending on artstation
12 | An N95 respirator mask in the style of art deco
13 | a surreal and organic stone monument to a plutonium atom
14 | oil painting of a candy dish of glass candies, mints, and other assorted sweets
15 | illustration of a ford model-t in pristine condition, trending on artstation
16 | illustration of DEC minicomputer console monitor retrocomputing teletype interdata PDP-11 univac, trending on artstation
17 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/prompts/wings3.txt:
--------------------------------------------------------------------------------
 1 | The Rise Of Consciousness
 2 | The Human Utility Function
 3 | Revolution of the Souls
 4 | a good amphetamine spirit
 5 | Control The Soul
 6 | The Lunatic, The Lover, and The Poet
 7 | A Planet Ruled By Angels
 8 | the Tower of Babel by J.M.W. Turner
 9 | sketch of a 3D printer by Leonardo da Vinci
10 | In The Style Of M.C. Escher
11 | A cup of coffee by Picasso
12 | The US Capitol Building in the style of Kandinsky
13 | A Mysterious Orb by Andy Warhol
14 | The everlasting zero, a glimpse of a million, by Salvador Dali
15 | a painting of a haunted house with Halloween decorations by Giovanni Paolo Panini
16 | a painting of drops of Venus by Vincent van Gogh
17 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/prompts/wings4.txt:
--------------------------------------------------------------------------------
 1 | ascii art of a man riding a bicycle
 2 | cyberpunk noir art deco detective in space
 3 | a cyborg angel in the style of ukiyo-e
 4 | Hell in the style of pointillism
 5 | Moloch in the style of socialist realism
 6 | Metaphysics in the style of WPAP
 7 | advertisement for a psychedelic virtual reality headset, 16 bit sprite pixel art
 8 | a watercolor painting of a Christmas tree
 9 | control room monitors televisions screens computers hacker lab, concept art, matte painting, trending on artstation
10 | a group of surgeons wait to cryonically suspend a patient
11 | technological singularity cult by James Gurney
12 | an autogyro flying car, trending on artstation
13 | illustration of airship zepplins in the skies, trending on artstation
14 | watercolor illustration of a martian colony geodesic dome aquaponics farming on the surface, trending on artstation
15 | humanity is killed by AI, by James Gurney
16 | the Vitruvian Man as a propaganda poster for transhumanism


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/slurm/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | 
 3 | Resume f8 @ 512 on Laion-HR
 4 | 
 5 | ```
 6 | sbatch scripts/slurm/resume_512/sbatch.sh
 7 | ```
 8 | 
 9 | # Reuse
10 | 
11 | To reuse this as a template, copy `sbatch.sh` and `launcher.sh` somewhere. In
12 | `sbatch.sh`, adjust the lines
13 | 
14 | ```
15 | #SBATCH --job-name=stable-diffusion-512cont
16 | #SBATCH --nodes=24
17 | ```
18 | 
19 | and the path to your `launcher.sh` in the last line,
20 | 
21 | ```
22 | srun bash /fsx/stable-diffusion/stable-diffusion/scripts/slurm/resume_512/launcher.sh
23 | ```
24 | 
25 | In `launcher.sh`, adjust `CONFIG` and `EXTRA`. Maybe give it a test run with
26 | debug flags uncommented and a reduced number of nodes.
27 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/slurm/resume_512_improvedaesthetic/launcher.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export NODE_RANK=${SLURM_NODEID}
 3 | echo "##########################################"
 4 | echo MASTER_ADDR=${MASTER_ADDR}
 5 | echo MASTER_PORT=${MASTER_PORT}
 6 | echo NODE_RANK=${NODE_RANK}
 7 | echo WORLD_SIZE=${WORLD_SIZE}
 8 | echo "##########################################"
 9 | # debug environment worked great so we stick with it
10 | # no magic there, just a miniconda python=3.9, pytorch=1.12, cudatoolkit=11.3
11 | # env with pip dependencies from stable diffusion's requirements.txt
12 | eval "$(/fsx/stable-diffusion/debug/miniconda3/bin/conda shell.bash hook)"
13 | conda activate stable
14 | cd /fsx/stable-diffusion/stable-diffusion
15 | 
16 | CONFIG=configs/stable-diffusion/txt2img-1p4B-multinode-clip-encoder-high-res-512-improvedaesthetic.yaml
17 | EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/logs/2022-07-09T11-06-38_txt2img-1p4B-multinode-clip-encoder-high-res-512_improvedaesthetic/checkpoints/last.ckpt"
18 | DEBUG="-d True lightning.callbacks.image_logger.params.batch_frequency=5"
19 | 
20 | python main.py --base $CONFIG --gpus 0,1,2,3,4,5,6,7 -t --num_nodes ${WORLD_SIZE} --scale_lr False $EXTRA #$DEBUG
21 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/scripts/test_gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | eval "$(/fsx/stable-diffusion/debug/miniconda3/bin/conda shell.bash hook)"
3 | conda activate stable
4 | cd /fsx/stable-diffusion/stable-diffusion
5 | python scripts/test_gpu.py
6 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/generative_models/stable-diffusion/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='latent-diffusion',
 5 |     version='0.0.1',
 6 |     description='',
 7 |     packages=find_packages(),
 8 |     install_requires=[
 9 |         'torch',
10 |         'numpy',
11 |         'tqdm',
12 |     ],
13 | )


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @jeffra @samyam @tjruwase @ShadenSmith @conglongli @awan-10 @cli99 @eltonzheng @minjiaz @RezaYazdaniAminabadi @duli2012 @mrwyattii @yaozhewei @arashb @xiaoxiawu-microsoft @guanhuawang
2 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include megatron/data/Makefile
2 | include megatron/data/helpers.cpp
3 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/dataset/README.md:
--------------------------------------------------------------------------------
1 | # Run the scripts below to setup dataset 
2 | 
3 | bash download_books.sh
4 | 
5 | bash download_vocab.sh
6 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/dataset/download_books.sh:
--------------------------------------------------------------------------------
1 | wget https://the-eye.eu/public/AI/pile_neox/data/BookCorpusDataset_text_document.bin
2 | wget https://the-eye.eu/public/AI/pile_neox/data/BookCorpusDataset_text_document.idx


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/dataset/download_ckpt.sh:
--------------------------------------------------------------------------------
1 | mkdir -p checkpoints/gpt2_345m
2 | 
3 | cd checkpoints/gpt2_345m
4 | wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O megatron_lm_345m_v0.0.zip
5 | unzip megatron_lm_345m_v0.0.zip
6 | rm megatron_lm_345m_v0.0.zip
7 | cd ../..
8 | 
9 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/dataset/download_vocab.sh:
--------------------------------------------------------------------------------
1 | wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json
2 | wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/MoE/ds_config_gpt_TEMPLATE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size" : CONFIG_BATCH_SIZE,
 3 |   "train_micro_batch_size_per_gpu": CONFIG_MBSIZE,
 4 |   "steps_per_print": LOG_INTERVAL,
 5 | 
 6 |   "zero_optimization": {
 7 |     "stage": ZERO_STAGE,
 8 |     "elastic_checkpoint": true
 9 |   },
10 | 
11 |   "gradient_clipping": 1.0,
12 |   "prescale_gradients": PRESCALE_GRAD,
13 | 
14 |   "fp16": {
15 |     "enabled": CONFIG_FP16_ENABLED,
16 |     "loss_scale": 0,
17 |     "loss_scale_window": 500,
18 |     "hysteresis": 2,
19 |     "min_loss_scale": 1,
20 |     "initial_scale_power": 11
21 |   },
22 | 
23 |   "bf16": {
24 |     "enabled": CONFIG_BF16_ENABLED
25 |   },
26 |   "curriculum_learning": {
27 |     "enabled": CONFIG_CL_ENABLED,
28 |     "curriculum_type": "seqlen",
29 |     "min_difficulty": CONFIG_CL_MIN,
30 |     "max_difficulty": CONFIG_CL_MAX,
31 |     "schedule_type": "fixed_linear",
32 |     "schedule_config": {
33 |       "total_curriculum_step": CONFIG_CL_DURATION,
34 |       "difficulty_step": 8
35 |     }
36 |   },
37 | 
38 |   "wall_clock_breakdown" : false
39 | }
40 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/MoE/ds_config_gpt_Zero2_TEMPLATE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size" : CONFIG_BATCH_SIZE,
 3 |   "train_micro_batch_size_per_gpu": CONFIG_MBSIZE,
 4 |   "steps_per_print": LOG_INTERVAL,
 5 | 
 6 |   "zero_optimization": {
 7 |     "stage": 2
 8 |   },
 9 | 
10 |   "gradient_clipping": 1.0,
11 |   "prescale_gradients": false,
12 | 
13 |   "fp16": {
14 |     "enabled": CONFIG_FP16_ENABLED,
15 |     "loss_scale": 0,
16 |     "loss_scale_window": 500,
17 |     "hysteresis": 2,
18 |     "min_loss_scale": 1,
19 |     "initial_scale_power": 11
20 |   },
21 | 
22 |   "bf16": {
23 |     "enabled": CONFIG_BF16_ENABLED
24 |   },
25 |   "curriculum_learning": {
26 |     "enabled": CONFIG_CL_ENABLED,
27 |     "curriculum_type": "seqlen",
28 |     "min_difficulty": CONFIG_CL_MIN,
29 |     "max_difficulty": CONFIG_CL_MAX,
30 |     "schedule_type": "fixed_linear",
31 |     "schedule_config": {
32 |       "total_curriculum_step": CONFIG_CL_DURATION,
33 |       "difficulty_step": 8
34 |     }
35 |   },
36 | 
37 |   "wall_clock_breakdown" : false
38 | }
39 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/README.md:
--------------------------------------------------------------------------------
 1 | ## Recipes and Scripts
 2 | 
 3 | ### Azure
 4 | 
 5 | We strongly recommend to start with AzureML recipe in the ```azureml``` folder.
 6 | 
 7 | If you have a custom infrastructure (e.g. HPC clusters) or Azure VM and VMSS based environments, please refer to the bash scripts in the ```azure``` folder.
 8 | 
 9 | ### MoE
10 | 
11 | Please see the ```MoE``` folder for different training recipes and scripts for Mixture-of-expert based models. 
12 | 
13 | ### Curriculum Learning
14 | 
15 | Curriculum learning recipes are in the ```curriculum_learning``` folder. Please refer to the detailed tutorials linked inside. 
16 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/azureml/Dockerfile.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mcr.microsoft.com/azureml/aifx/stable-ubuntu2004-cu115-py38-torch1110
 2 | 
 3 | USER root:root
 4 | 
 5 | RUN pip install pybind11
 6 | 
 7 | RUN pip install git+https://github.com/microsoft/DeepSpeed.git
 8 | 
 9 | # add a100-topo.xml
10 | RUN mkdir -p /opt/microsoft/
11 | RUN wget -O /opt/microsoft/a100-topo.xml https://hpcbenchmarks.blob.core.windows.net/bookcorpus/data/a100-topo.xml
12 | 
13 | # to use on A100, enable env var below in your job
14 | ENV NCCL_TOPO_FILE="/opt/microsoft/a100-topo.xml"
15 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/azureml/README.md:
--------------------------------------------------------------------------------
 1 | ## Megatron-DeepSpeed on AzureML
 2 | Example script for running Megatron-DeepSpeed using Azure Machine Learning.
 3 | 
 4 | ------
 5 | 
 6 | # Workspace Setup
 7 | Setup an AML workspace. Refer to: [set-up doc](https://github.com/Azure/azureml-examples/tree/main/python-sdk#set-up).
 8 | 
 9 | # Dataset Preparation
10 | Create AML Dataset. To run remote AML job, you need to provide AML FileDataset. 
11 | Refer to [prepare_dataset script](prepare_dataset.py) to upload .bin and .idx files to blob store and on how to create FileDataset.
12 | 
13 | # Training
14 | Run Megatron-DeepSpeed on Azure ML. Refer to [aml_submit script](aml_submit.py).
15 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/compression/ds_config_gpt_TEMPLATE.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size" : CONFIG_BATCH_SIZE,
 3 |   "train_micro_batch_size_per_gpu": CONFIG_MBSIZE,
 4 |   "steps_per_print": LOG_INTERVAL,
 5 | 
 6 |   "zero_optimization": {
 7 |     "stage": ZERO_STAGE,
 8 |     "elastic_checkpoint": true
 9 |   },
10 | 
11 |   "gradient_clipping": 1.0,
12 |   "prescale_gradients": PRESCALE_GRAD,
13 | 
14 |   "fp16": {
15 |     "enabled": CONFIG_FP16_ENABLED,
16 |     "loss_scale": 0,
17 |     "loss_scale_window": 500,
18 |     "hysteresis": 2,
19 |     "min_loss_scale": 1,
20 |     "initial_scale_power": 11
21 |   },
22 | 
23 |   "bf16": {
24 |     "enabled": CONFIG_BF16_ENABLED
25 |   },
26 |   "curriculum_learning": {
27 |     "enabled": CONFIG_CL_ENABLED,
28 |     "curriculum_type": "seqlen",
29 |     "min_difficulty": CONFIG_CL_MIN,
30 |     "max_difficulty": CONFIG_CL_MAX,
31 |     "schedule_type": "fixed_linear",
32 |     "schedule_config": {
33 |       "total_curriculum_step": CONFIG_CL_DURATION,
34 |       "difficulty_step": 8
35 |     }
36 |   },
37 | 
38 |   "wall_clock_breakdown" : false
39 | }
40 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/create_embeddings.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compute embeddings for each entry of a given dataset (e.g. Wikipedia)
 4 | 
 5 | RANK=0
 6 | WORLD_SIZE=1
 7 | 
 8 | # Wikipedia data can be downloaded from the following link:
 9 | # https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
10 | EVIDENCE_DATA_DIR=<Specify path of Wikipedia dataset>
11 | EMBEDDING_PATH=<Specify path to store embeddings>
12 | CHECKPOINT_PATH=<Specify path of pretrained ICT model>
13 | 
14 | python tools/create_doc_index.py \
15 |     --num-layers 12 \
16 |     --hidden-size 768 \
17 |     --num-attention-heads 12 \
18 |     --tensor-model-parallel-size 1 \
19 |     --micro-batch-size 128 \
20 |     --checkpoint-activations \
21 |     --seq-length 512 \
22 |     --retriever-seq-length 256 \
23 |     --max-position-embeddings 512 \
24 |     --load ${CHECKPOINT_PATH} \
25 |     --evidence-data-path ${EVIDENCE_DATA_DIR} \
26 |     --embedding-path ${EMBEDDING_PATH} \
27 |     --indexer-log-interval 1000 \
28 |     --indexer-batch-size 128 \
29 |     --vocab-file bert-vocab.txt \
30 |     --num-workers 2 \
31 |     --fp16
32 | 
33 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/curriculum_learning/README.md:
--------------------------------------------------------------------------------
1 | This is an example of how to use DeepSpeed's curriculum learning (CL) feature which provides faster and more stable language model pre-training. Currently it is only integrated for GPT pre-training. Note that there are two curriculum learning examples in two different repos for Megatron-LM GPT-2 pre-training. Both of them have some unique features and limitations. See details in our [tutorial](https://www.deepspeed.ai/tutorials/curriculum-learning/). For technical details please refer to our [paper](https://arxiv.org/abs/2108.06084).


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/curriculum_learning/ds_train.sh:
--------------------------------------------------------------------------------
 1 | # # baseline
 2 | # CONFIG=baseline
 3 | # TAG=baseline
 4 | # MODEL_SIZE=1558
 5 | # LR=1.5e-4
 6 | # BSZ=512
 7 | # SEQ_LEN=1024
 8 | # MP_SIZE=1
 9 | # SEED=1234
10 | # SAVE_INTERVAL=5000
11 | # NUM_ITER=600000
12 | # NUM_TOKEN=157286400000
13 | # LR_DECAY_TOKEN=157286400000
14 | # LR_WARMUP_ITER=3000
15 | # CONFIG_TEMPLATE=false
16 | # CURRICULUM_STEP=0
17 | # CURRICULUM_MIN=0
18 | 
19 | # curriculum learning
20 | CONFIG=curriculum_fixed_linear
21 | MODEL_SIZE=1558
22 | LR=6e-4
23 | BSZ=4096
24 | SEQ_LEN=1024
25 | MP_SIZE=1
26 | SEED=1234
27 | SAVE_INTERVAL=1000
28 | NUM_ITER=75000
29 | NUM_TOKEN=157286400000
30 | LR_DECAY_TOKEN=157286400000
31 | LR_WARMUP_ITER=3000
32 | CONFIG_TEMPLATE=true
33 | CURRICULUM_STEP=45000
34 | CURRICULUM_MIN=64
35 | TAG="${CONFIG}_s${CURRICULUM_MIN}to${SEQ_LEN}_step${CURRICULUM_STEP}"
36 | 
37 | bash ds_pretrain_gpt2.sh $CONFIG $TAG $MODEL_SIZE $LR $BSZ $SEQ_LEN $MP_SIZE $SEED $SAVE_INTERVAL $NUM_ITER $NUM_TOKEN $LR_DECAY_TOKEN $LR_WARMUP_ITER $CONFIG_TEMPLATE $CURRICULUM_STEP $CURRICULUM_MIN
38 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/curriculum_learning/ds_zero_stage_1_config_baseline.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": 512,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 1,
 5 |   "zero_optimization": {
 6 |     "stage": 1
 7 |   },
 8 |   "optimizer": {
 9 |     "type": "Adam",
10 |     "params": {
11 |       "lr": 0.00015,
12 |       "max_grad_norm": 1.0,
13 |       "betas": [0.9, 0.95]
14 |     }
15 |   },
16 |   "gradient_clipping": 1.0,
17 |   "fp16": {
18 |     "enabled": true,
19 |     "loss_scale": 0,
20 |     "loss_scale_window": 1000,
21 |     "hysteresis": 2,
22 |     "min_loss_scale": 1
23 |   },
24 |   "wall_clock_breakdown": false,
25 |   "zero_allow_untested_optimizer": false
26 | }
27 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": 512,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 1,
 5 |   "zero_optimization": {
 6 |     "stage": 1
 7 |   },
 8 |   "optimizer": {
 9 |     "type": "Adam",
10 |     "params": {
11 |       "lr": 0.00015,
12 |       "max_grad_norm": 1.0,
13 |       "betas": [0.9, 0.95]
14 |     }
15 |   },
16 |   "gradient_clipping": 1.0,
17 |   "fp16": {
18 |     "enabled": true,
19 |     "loss_scale": 0,
20 |     "loss_scale_window": 1000,
21 |     "hysteresis": 2,
22 |     "min_loss_scale": 1
23 |   },
24 |   "wall_clock_breakdown": false,
25 |   "zero_allow_untested_optimizer": false,
26 |   "curriculum_learning": {
27 |     "enabled": true,
28 |     "curriculum_type": "seqlen",
29 |     "min_difficulty": CONFIG_CL_MIN,
30 |     "max_difficulty": CONFIG_CL_MAX,
31 |     "schedule_type": "fixed_linear",
32 |     "schedule_config": {
33 |       "total_curriculum_step": CONFIG_CL_DURATION,
34 |       "difficulty_step": 8
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/merge_mp_bert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TENSOR_MODEL_PARALLEL_SIZE=2
 4 | 
 5 | VOCAB_FILE=bert-vocab.txt
 6 | CHECKPOINT_PATH=checkpoints/bert_345m
 7 | 
 8 | WORLD_SIZE=$TENSOR_MODEL_PARALLEL_SIZE python tools/merge_mp_partitions.py \
 9 |                                 --model-type BERT \
10 |                                 --tensor-model-parallel-size $TENSOR_MODEL_PARALLEL_SIZE \
11 |                                 --tokenizer-type BertWordPieceLowerCase \
12 |                                 --vocab-file $VOCAB_FILE \
13 |                                 --num-layers 24 \
14 |                                 --hidden-size 1024 \
15 |                                 --num-attention-heads 16 \
16 |                                 --seq-length 512 \
17 |                                 --max-position-embeddings 512 \
18 |                                 --load $CHECKPOINT_PATH
19 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/pretrain_bert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RANK=0
 4 | WORLD_SIZE=1
 5 | DATA_PATH=<Specify path and file prefix>_text_sentence
 6 | CHECKPOINT_PATH=<Specify path>
 7 | 
 8 | python pretrain_bert.py \
 9 |        --num-layers 24 \
10 |        --hidden-size 1024 \
11 |        --num-attention-heads 16 \
12 |        --micro-batch-size 4 \
13 |        --global-batch-size 8 \
14 |        --seq-length 512 \
15 |        --max-position-embeddings 512 \
16 |        --train-iters 2000000 \
17 |        --lr-decay-iters 990000 \
18 |        --save $CHECKPOINT_PATH \
19 |        --load $CHECKPOINT_PATH \
20 |        --data-path $DATA_PATH \
21 |        --vocab-file bert-vocab.txt \
22 |        --data-impl mmap \
23 |        --split 949,50,1 \
24 |        --lr 0.0001 \
25 |        --min-lr 0.00001 \
26 |        --lr-decay-style linear \
27 |        --lr-warmup-fraction .01 \
28 |        --weight-decay 1e-2 \
29 |        --clip-grad 1.0 \
30 |        --log-interval 100 \
31 |        --save-interval 10000 \
32 |        --eval-interval 1000 \
33 |        --eval-iters 10 \
34 |        --fp16
35 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/pretrain_gpt.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Runs the "345M" parameter model
 4 | 
 5 | RANK=0
 6 | WORLD_SIZE=1
 7 | 
 8 | DATA_PATH=<Specify path and file prefix>_text_document
 9 | CHECKPOINT_PATH=<Specify path>
10 | 
11 | 
12 | python pretrain_gpt.py \
13 |        --num-layers 24 \
14 |        --hidden-size 1024 \
15 |        --num-attention-heads 16 \
16 |        --micro-batch-size 4 \
17 |        --global-batch-size 8 \
18 |        --seq-length 1024 \
19 |        --max-position-embeddings 1024 \
20 |        --train-iters 500000 \
21 |        --lr-decay-iters 320000 \
22 |        --save $CHECKPOINT_PATH \
23 |        --load $CHECKPOINT_PATH \
24 |        --data-path $DATA_PATH \
25 |        --vocab-file gpt2-vocab.json \
26 |        --merge-file gpt2-merges.txt \
27 |        --data-impl mmap \
28 |        --split 949,50,1 \
29 |        --distributed-backend nccl \
30 |        --lr 0.00015 \
31 |        --min-lr 1.0e-5 \
32 |        --lr-decay-style cosine \
33 |        --weight-decay 1e-2 \
34 |        --clip-grad 1.0 \
35 |        --lr-warmup-fraction .01 \
36 |        --checkpoint-activations \
37 |        --log-interval 100 \
38 |        --save-interval 10000 \
39 |        --eval-interval 1000 \
40 |        --eval-iters 10 \
41 |        --fp16
42 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/examples/pretrain_t5.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RANK=0
 4 | WORLD_SIZE=1
 5 | DATA_PATH=<Specify path and file prefix>
 6 | VOCAB_FILE=<Specify path to vocab.txt>
 7 | CHECKPOINT_PATH=<Specify path>
 8 | 
 9 | python pretrain_t5.py \
10 |        --num-layers 12 \
11 |        --hidden-size 768 \
12 |        --num-attention-heads 12 \
13 |        --kv-channels 64 \
14 |        --ffn-hidden-size 3072 \
15 |        --encoder-seq-length 512 \
16 |        --decoder-seq-length 128 \
17 |        --micro-batch-size 16 \
18 |        --global-batch-size 2048 \
19 |        --max-position-embeddings 512 \
20 |        --train-iters 1000000 \
21 |        --lr-decay-iters 1000000 \
22 |        --save $CHECKPOINT_PATH \
23 |        --load $CHECKPOINT_PATH \
24 |        --data-path $DATA_PATH \
25 |        --vocab-file $VOCAB_FILE \
26 |        --data-impl mmap \
27 |        --split 949,50,1 \
28 |        --lr 0.0001 \
29 |        --min-lr 0.00001 \
30 |        --lr-decay-style linear \
31 |        --lr-warmup-fraction .01 \
32 |        --weight-decay 1e-2 \
33 |        --clip-grad 1.0 \
34 |        --log-interval 100 \
35 |        --save-interval 10000 \
36 |        --eval-interval 1000 \
37 |        --eval-iters 10 \
38 |        --fp16
39 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/images/cases_april2021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/images/cases_april2021.png


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/data/Makefile:
--------------------------------------------------------------------------------
 1 | CXXFLAGS += -O3 -Wall -shared -std=c++11 -fPIC -fdiagnostics-color
 2 | CPPFLAGS += $(shell python3 -m pybind11 --includes)
 3 | LIBNAME = helpers
 4 | LIBEXT = $(shell python3-config --extension-suffix)
 5 | 
 6 | default: $(LIBNAME)$(LIBEXT)
 7 | 
 8 | %$(LIBEXT): %.cpp
 9 | 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) $< -o $@
10 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import indexed_dataset
2 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/data/test/test_preprocess_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMPL=cached
 4 | python ../preprocess_data.py \
 5 |        --input test_samples.json \
 6 |        --vocab vocab.txt \
 7 |        --dataset-impl ${IMPL} \
 8 |        --output-prefix test_samples_${IMPL} \
 9 |        --workers 1 \
10 |        --log-interval 2
11 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/enums.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import enum
17 | 
18 | class LayerType(enum.Enum):
19 |     encoder = 1
20 |     decoder = 2
21 |  
22 | class AttnType(enum.Enum):
23 |     self_attn = 1
24 |     cross_attn = 2
25 | 
26 | class AttnMaskType(enum.Enum):
27 |     padding = 1
28 |     causal = 2
29 |     prefix = 3
30 | 
31 | class PositionEmbeddingType(enum.Enum):
32 |     rotary = 1
33 |     absolute = 2
34 |     alibi = 3
35 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/fused_kernels/compat.h:
--------------------------------------------------------------------------------
 1 | /* coding=utf-8
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /*This code is copied fron NVIDIA apex:
18 |  *     https://github.com/NVIDIA/apex
19 |  *     with minor changes. */
20 | 
21 | 
22 | 
23 | #ifndef TORCH_CHECK
24 | #define TORCH_CHECK AT_CHECK
25 | #endif
26 | 
27 | #ifdef VERSION_GE_1_3
28 | #define DATA_PTR data_ptr
29 | #else
30 | #define DATA_PTR data
31 | #endif
32 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from apex.normalization import MixedFusedLayerNorm as LayerNorm
17 | 
18 | from .distributed import DistributedDataParallel
19 | from .bert_model import BertModel
20 | from .gpt_model import GPTModel, GPTModelPipe
21 | from .t5_model import T5Model
22 | from .language_model import get_language_model
23 | from .module import Float16Module
24 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/model/enums.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import enum
17 | 
18 | class LayerType(enum.Enum):
19 |     encoder = 1
20 |     decoder = 2
21 |  
22 | class AttnType(enum.Enum):
23 |     self_attn = 1
24 |     cross_attn = 2
25 | 
26 | class AttnMaskType(enum.Enum):
27 |     padding = 1
28 |     causal = 2
29 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/mpu/tests/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/megatron/tokenizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .tokenizer import build_tokenizer
18 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/requirements.txt:
--------------------------------------------------------------------------------
1 | pybind11
2 | torch
3 | six
4 | regex
5 | numpy
6 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/tasks/eval_harness/download.py:
--------------------------------------------------------------------------------
 1 | # This code is originally from https://github.com/bigscience-workshop/Megatron-DeepSpeed
 2 | # under the license https://huggingface.co/spaces/bigscience/license
 3 | 
 4 | # Downloads the specified taks in the evaluation harness
 5 | # This is particularly useful when running in environments where the GPU nodes 
 6 | # do not have internet access. This way we can pre-download them and use the cached data-set during evaluation.
 7 | 
 8 | from lm_eval import tasks
 9 | from lm_eval.tasks import ALL_TASKS
10 | import argparse
11 | import os
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='Download evaluation harness', allow_abbrev=False)
15 | parser.add_argument('--task_list', type=str, default = "all", help='Either "all" or comma separated list of tasks to download.')
16 | args = parser.parse_args()
17 | 
18 | def main():
19 |     task_list = ALL_TASKS if args.task_list == 'all' else args.task_list.split(',')
20 |     tasks.get_task_dict(task_list)
21 | 
22 | if __name__ == '__main__':
23 |     main()
24 | 
25 | 
26 |     


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/tests/test_basic.py:
--------------------------------------------------------------------------------
1 | def test_import():
2 |     import megatron
3 | 
4 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/DeepSpeedExamples/Megatron-DeepSpeed/tools/convert_checkpoint/inspect_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import sys
 3 | import os
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | def dump_data(datum, name_list=[]):
 8 |     if type(datum) in (dict, OrderedDict):
 9 |         for k, v in datum.items():
10 |             dump_data(v, name_list+[str(k)])
11 |     elif type(datum) in (list, tuple):
12 |         for v in datum:
13 |             dump_data(v, name_list)
14 |     elif torch.is_tensor(datum):
15 |         prefix = '.'.join(name_list)
16 |         print(f'[tensor] {prefix} = {datum.shape}')
17 |     else:
18 |         #pass 
19 |         prefix = '.'.join(name_list)
20 |         print(f'[other] {prefix} = {datum}')
21 | 
22 | def main():
23 |     if len(sys.argv) < 2:
24 |         print(f'Usage: {sys.argv[0]} <checkpoint file>')
25 |         exit(1)
26 | 
27 |     ckpt_file = sys.argv[1]
28 |     if not os.path.isfile(ckpt_file):
29 |         print(f'{ckpt_file} is not a valid file')
30 |         exit(1)
31 | 
32 |     print(f'loading checkpoint file: {ckpt_file}')
33 |     sd = torch.load(ckpt_file)
34 |     dump_data(sd)
35 | 
36 |     quit()
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/NOTICE:
--------------------------------------------------------------------------------
1 | BERT PyTorch
2 | 
3 | This repository includes software from https://github.com/huggingface/pytorch-pretrained-BERT
4 | licensed under the Apache License 2.0.
5 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/bert_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_probs_dropout_prob": 0.1,
 3 |   "hidden_act": "gelu",
 4 |   "hidden_dropout_prob": 0.1,
 5 |   "hidden_size": 1024,
 6 |   "initializer_range": 0.02,
 7 |   "intermediate_size": 4096,
 8 |   "max_position_embeddings": 512,
 9 |   "num_attention_heads": 16,
10 |   "num_hidden_layers": 24,
11 |   "type_vocab_size": 2,
12 |   "vocab_size": 30522
13 | }
14 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/checkpoints/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/bert/checkpoints/.keep


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/data/BooksDownloader.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 
14 | import subprocess
15 | 
16 | class BooksDownloader:
17 |     def __init__(self, save_path):
18 |         self.save_path = save_path
19 |         pass
20 | 
21 | 
22 |     def download(self):
23 |         bookscorpus_download_command = 'python3 /workspace/bookcorpus/download_files.py --list /workspace/bookcorpus/url_list.jsonl --out'
24 |         bookscorpus_download_command += ' ' + self.save_path + '/bookscorpus'
25 |         bookscorpus_download_command += ' --trash-bad-count'
26 |         bookscorpus_download_process = subprocess.run(bookscorpus_download_command, shell=True, check=True)
27 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/data/NVIDIAPretrainedWeightDownloader.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 
14 | import os
15 | 
16 | class NVIDIAPretrainedWeightDownloader:
17 |     def __init__(self, save_path):
18 |         self.save_path = save_path + '/nvidia_pretrained_weights'
19 | 
20 |         if not os.path.exists(self.save_path):
21 |             os.makedirs(self.save_path)
22 | 
23 |         pass
24 | 
25 | 
26 |     def download(self):
27 |         assert False, 'NVIDIAPretrainedWeightDownloader not implemented yet.'


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/processors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/bert/processors/__init__.py


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/requirements.txt:
--------------------------------------------------------------------------------
 1 | # progress bars in model download and training scripts
 2 | tqdm
 3 | # Accessing files from S3 directly.
 4 | boto3
 5 | # Used for downloading models over HTTP
 6 | requests
 7 | six
 8 | ipdb
 9 | #Data processing
10 | h5py
11 | html2text
12 | nltk
13 | progressbar
14 | #Others
15 | git+https://github.com/NVIDIA/dllogger
16 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/results/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/bert/results/.keep


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/results/checkpoints/lddl_log/node-0.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/bert/results/checkpoints/lddl_log/node-0.txt


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/results/checkpoints/lddl_log/node-0_local-0.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/bert/results/checkpoints/lddl_log/node-0_local-0.txt


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/results/dllogger.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/examples/gpu_migration/nlp/bert/results/dllogger.json


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/scripts/docker/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | docker build --network=host . --rm --pull --no-cache -t bert
3 | 


--------------------------------------------------------------------------------
/PyTorch/examples/gpu_migration/nlp/bert/scripts/docker/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CMD=${1:-/bin/bash}
 4 | NV_VISIBLE_DEVICES=${2:-"all"}
 5 | DOCKER_BRIDGE=${3:-"host"}
 6 | 
 7 | docker run -it --rm \
 8 |   --gpus device=$NV_VISIBLE_DEVICES \
 9 |   --net=$DOCKER_BRIDGE \
10 |   --shm-size=1g \
11 |   --ulimit memlock=-1 \
12 |   --ulimit stack=67108864 \
13 |   -e LD_LIBRARY_PATH='/workspace/install/lib/' \
14 |   -v $PWD:/workspace/bert \
15 |   -v $PWD/results:/results \
16 |   bert $CMD
17 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | Copyright (C) 2023 Habana Labs, Ltd. an Intel Company
 3 | Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: ldm
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.8.5
 7 |   - pip=20.3
 8 |   - cudatoolkit=11.3
 9 |   - pytorch=1.11.0
10 |   - torchvision=0.12.0
11 |   - numpy=1.19.2
12 |   - pip:
13 |     - albumentations==0.4.3
14 |     - diffusers
15 |     - opencv-python==4.1.2.30
16 |     - pudb==2019.2
17 |     - invisible-watermark
18 |     - imageio==2.9.0
19 |     - imageio-ffmpeg==0.4.2
20 |     - pytorch-lightning==1.4.2
21 |     - omegaconf==2.1.1
22 |     - test-tube>=0.7.5
23 |     - streamlit>=0.73.1
24 |     - einops==0.3.0
25 |     - torch-fidelity==0.3.0
26 |     - transformers==4.19.2
27 |     - torchmetrics==0.6.0
28 |     - kornia==0.6
29 |     - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
30 |     - -e git+https://github.com/openai/CLIP.git@main#egg=clip
31 |     - -e .
32 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/data/__init__.py


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/data/dummy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import string
 4 | from torch.utils.data import Dataset, Subset
 5 | 
 6 | class DummyData(Dataset):
 7 |     def __init__(self, length, size):
 8 |         self.length = length
 9 |         self.size = size
10 | 
11 |     def __len__(self):
12 |         return self.length
13 | 
14 |     def __getitem__(self, i):
15 |         x = np.random.randn(*self.size)
16 |         letters = string.ascii_lowercase
17 |         y = ''.join(random.choice(string.ascii_lowercase) for i in range(10))
18 |         return {"jpg": x, "txt": y}
19 | 
20 | 
21 | class DummyDataWithEmbeddings(Dataset):
22 |     def __init__(self, length, size, emb_size):
23 |         self.length = length
24 |         self.size = size
25 |         self.emb_size = emb_size
26 | 
27 |     def __len__(self):
28 |         return self.length
29 | 
30 |     def __getitem__(self, i):
31 |         x = np.random.randn(*self.size)
32 |         y = np.random.randn(*self.emb_size).astype(np.float32)
33 |         return {"jpg": x, "txt": y}
34 | 
35 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/data/inpainting/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/data/inpainting/__init__.py


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr
2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light
3 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/modules/image_degradation/utils/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HabanaAI/Model-References/da5f246c18f41063a89a04f19ce306d1e61f6345/PyTorch/generative_models/stable-diffusion/ldm/modules/image_degradation/utils/test.png


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ldm/modules/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.losses.contperceptual import LPIPSWithDiscriminator


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/kl-f16/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 16
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 16
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 1
23 |       - 2
24 |       - 2
25 |       - 4
26 |       num_res_blocks: 2
27 |       attn_resolutions:
28 |       - 16
29 |       dropout: 0.0
30 | data:
31 |   target: main.DataModuleFromConfig
32 |   params:
33 |     batch_size: 6
34 |     wrap: true
35 |     train:
36 |       target: ldm.data.openimages.FullOpenImagesTrain
37 |       params:
38 |         size: 384
39 |         crop_size: 256
40 |     validation:
41 |       target: ldm.data.openimages.FullOpenImagesValidation
42 |       params:
43 |         size: 384
44 |         crop_size: 256
45 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/kl-f32/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 64
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 64
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 1
23 |       - 2
24 |       - 2
25 |       - 4
26 |       - 4
27 |       num_res_blocks: 2
28 |       attn_resolutions:
29 |       - 16
30 |       - 8
31 |       dropout: 0.0
32 | data:
33 |   target: main.DataModuleFromConfig
34 |   params:
35 |     batch_size: 6
36 |     wrap: true
37 |     train:
38 |       target: ldm.data.openimages.FullOpenImagesTrain
39 |       params:
40 |         size: 384
41 |         crop_size: 256
42 |     validation:
43 |       target: ldm.data.openimages.FullOpenImagesValidation
44 |       params:
45 |         size: 384
46 |         crop_size: 256
47 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/kl-f4/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 3
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 3
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 2
23 |       - 4
24 |       num_res_blocks: 2
25 |       attn_resolutions: []
26 |       dropout: 0.0
27 | data:
28 |   target: main.DataModuleFromConfig
29 |   params:
30 |     batch_size: 10
31 |     wrap: true
32 |     train:
33 |       target: ldm.data.openimages.FullOpenImagesTrain
34 |       params:
35 |         size: 384
36 |         crop_size: 256
37 |     validation:
38 |       target: ldm.data.openimages.FullOpenImagesValidation
39 |       params:
40 |         size: 384
41 |         crop_size: 256
42 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/kl-f8/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.AutoencoderKL
 4 |   params:
 5 |     monitor: val/rec_loss
 6 |     embed_dim: 4
 7 |     lossconfig:
 8 |       target: ldm.modules.losses.LPIPSWithDiscriminator
 9 |       params:
10 |         disc_start: 50001
11 |         kl_weight: 1.0e-06
12 |         disc_weight: 0.5
13 |     ddconfig:
14 |       double_z: true
15 |       z_channels: 4
16 |       resolution: 256
17 |       in_channels: 3
18 |       out_ch: 3
19 |       ch: 128
20 |       ch_mult:
21 |       - 1
22 |       - 2
23 |       - 4
24 |       - 4
25 |       num_res_blocks: 2
26 |       attn_resolutions: []
27 |       dropout: 0.0
28 | data:
29 |   target: main.DataModuleFromConfig
30 |   params:
31 |     batch_size: 4
32 |     wrap: true
33 |     train:
34 |       target: ldm.data.openimages.FullOpenImagesTrain
35 |       params:
36 |         size: 384
37 |         crop_size: 256
38 |     validation:
39 |       target: ldm.data.openimages.FullOpenImagesValidation
40 |       params:
41 |         size: 384
42 |         crop_size: 256
43 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/vq-f4-noattn/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 3
 6 |     n_embed: 8192
 7 |     monitor: val/rec_loss
 8 | 
 9 |     ddconfig:
10 |       attn_type: none
11 |       double_z: false
12 |       z_channels: 3
13 |       resolution: 256
14 |       in_channels: 3
15 |       out_ch: 3
16 |       ch: 128
17 |       ch_mult:
18 |       - 1
19 |       - 2
20 |       - 4
21 |       num_res_blocks: 2
22 |       attn_resolutions: []
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_start: 11
30 |         disc_weight: 0.75
31 |         codebook_weight: 1.0
32 | 
33 | data:
34 |   target: main.DataModuleFromConfig
35 |   params:
36 |     batch_size: 8
37 |     num_workers: 12
38 |     wrap: true
39 |     train:
40 |       target: ldm.data.openimages.FullOpenImagesTrain
41 |       params:
42 |         crop_size: 256
43 |     validation:
44 |       target: ldm.data.openimages.FullOpenImagesValidation
45 |       params:
46 |         crop_size: 256
47 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/vq-f4/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 3
 6 |     n_embed: 8192
 7 |     monitor: val/rec_loss
 8 | 
 9 |     ddconfig:
10 |       double_z: false
11 |       z_channels: 3
12 |       resolution: 256
13 |       in_channels: 3
14 |       out_ch: 3
15 |       ch: 128
16 |       ch_mult:
17 |       - 1
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions: []
22 |       dropout: 0.0
23 |     lossconfig:
24 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
25 |       params:
26 |         disc_conditional: false
27 |         disc_in_channels: 3
28 |         disc_start: 0
29 |         disc_weight: 0.75
30 |         codebook_weight: 1.0
31 | 
32 | data:
33 |   target: main.DataModuleFromConfig
34 |   params:
35 |     batch_size: 8
36 |     num_workers: 16
37 |     wrap: true
38 |     train:
39 |       target: ldm.data.openimages.FullOpenImagesTrain
40 |       params:
41 |         crop_size: 256
42 |     validation:
43 |       target: ldm.data.openimages.FullOpenImagesValidation
44 |       params:
45 |         crop_size: 256
46 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/models/first_stage_models/vq-f8-n256/config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 4.5e-06
 3 |   target: ldm.models.autoencoder.VQModel
 4 |   params:
 5 |     embed_dim: 4
 6 |     n_embed: 256
 7 |     monitor: val/rec_loss
 8 |     ddconfig:
 9 |       double_z: false
10 |       z_channels: 4
11 |       resolution: 256
12 |       in_channels: 3
13 |       out_ch: 3
14 |       ch: 128
15 |       ch_mult:
16 |       - 1
17 |       - 2
18 |       - 2
19 |       - 4
20 |       num_res_blocks: 2
21 |       attn_resolutions:
22 |       - 32
23 |       dropout: 0.0
24 |     lossconfig:
25 |       target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
26 |       params:
27 |         disc_conditional: false
28 |         disc_in_channels: 3
29 |         disc_start: 250001
30 |         disc_weight: 0.75
31 |         codebook_weight: 1.0
32 | 
33 | data:
34 |   target: main.DataModuleFromConfig
35 |   params:
36 |     batch_size: 10
37 |     num_workers: 20
38 |     wrap: true
39 |     train:
40 |       target: ldm.data.openimages.FullOpenImagesTrain
41 |       params:
42 |         size: 384
43 |         crop_size: 256
44 |     validation:
45 |       target: ldm.data.openimages.FullOpenImagesValidation
46 |       params:
47 |         size: 384
48 |         crop_size: 256
49 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ops_bf16.txt:
--------------------------------------------------------------------------------
 1 | addmm
 2 | addcmul
 3 | bmm
 4 | conv2d
 5 | layer_norm
 6 | batch_norm
 7 | linear
 8 | dot
 9 | mm
10 | matmul
11 | mv
12 | conv_transpose2d
13 | dropout
14 | gelu
15 | t
16 | div
17 | truediv
18 | softmax
19 | einsum
20 | group_norm
21 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/ops_fp32.txt:
--------------------------------------------------------------------------------
1 | cross_entropy
2 | log_softmax
3 | nll_loss
4 | topk
5 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/requirements.txt:
--------------------------------------------------------------------------------
 1 | albumentations==0.4.3
 2 | opencv-python
 3 | pudb==2019.2
 4 | imageio==2.9.0
 5 | imageio-ffmpeg==0.4.2
 6 | lightning==2.5.1
 7 | lightning-habana==1.6.0
 8 | torchmetrics==0.10.3
 9 | omegaconf==2.1.1
10 | test-tube>=0.7.5
11 | streamlit>=0.73.1
12 | einops==0.3.0
13 | torch-fidelity==0.3.0
14 | transformers==4.48.0
15 | kornia==0.6
16 | webdataset==0.2.5
17 | -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
18 | -e git+https://github.com/openai/CLIP.git@main#egg=clip
19 | -e .
20 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/cmd_on_new_ckpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import time
 4 | import fire
 5 | 
 6 | 
 7 | class Checker(object):
 8 |     def __init__(self, filename, interval=60):
 9 |         self._cached_stamp = 0
10 |         self.filename = filename
11 |         self.interval = interval
12 |     
13 |     def check(self, cmd):
14 |         while True:
15 |             stamp = os.stat(self.filename).st_mtime
16 |             if stamp != self._cached_stamp:
17 |                 self._cached_stamp = stamp
18 |                 print(f"{self.__class__.__name__}: Detected a new file at {self.filename}, running evaluation commands on it.")
19 |                 subprocess.run(cmd, shell=True)
20 |             else:
21 |                 time.sleep(self.interval)
22 | 
23 | 
24 | def run(filename, cmd):
25 |     checker = Checker(filename, interval=60)
26 |     checker.check(cmd)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     fire.Fire(run)
31 |                 
32 | 
33 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/printckpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import fire
 4 | 
 5 | 
 6 | def printit(p):
 7 |     print(f"printin' in path: {p}")
 8 |     size_initial = os.path.getsize(p)
 9 |     nsd = dict()
10 |     sd = torch.load(p, map_location="cpu")
11 |     if "global_step" in sd:
12 |         print(f"This is global step {sd['global_step']}.")
13 |     if "model_ema.num_updates" in sd["state_dict"]:
14 |         print(f"And we got {sd['state_dict']['model_ema.num_updates']} EMA updates.")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     fire.Fire(printit)
19 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/prompts/six-prompts:
--------------------------------------------------------------------------------
1 | the Tower of Babel by J.M.W. Turner
2 | advertisement for a psychedelic virtual reality headset, 16 bit sprite pixel art
3 | the gateway between dreams, trending on ArtStation
4 | Humanity is killed by AI, by James Gurney
5 | A fantasy painting of a city in a deep valley by Ivan Aivazovsky
6 | Darth Vader at Woodstock (1969)
7 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/prompts/weird-dalle-prompts.txt:
--------------------------------------------------------------------------------
 1 | # TODO, check out Twitter.
 2 | Darth Vader at Woodstock (1969)
 3 | Bunny Vikings
 4 | The Demogorgon from Stranger Thinhs holding a basketball
 5 | Hamster in my microwave
 6 | a courtroom sketch of a Ford Transit van
 7 | PS1 Hagrid at MCDonalds
 8 | Karl Marx in KFC Logo
 9 | Moai Statue giving a TED talk
10 | wahing machine trail cam
11 | minions at cross burning
12 | Hindenburg disaster in Fortnite


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/prompts/wings1.txt:
--------------------------------------------------------------------------------
 1 | A portrait of Abraham Lincoln
 2 | A portrait of Barack Obama
 3 | A portrait of a nekomimi girl smiling
 4 | a portrait of isaac newton the alchemist
 5 | A portrait of Friedrich Nietzsche wearing an open double breasted suit with a bowtie
 6 | Portrait of a cyberpunk cyborg man wearing alternate reality goggles
 7 | Portrait of a woman screaming
 8 | A portrait of a man in a flight jacket leaning against a biplane
 9 | a cold landscape by Albert Bierstadt
10 | the monument of the ancients by van gogh
11 | the universal library
12 | a vision of paradise. unreal engine
13 | matte painting of cozy underground bunker wholefoods aisle, trending on artstation
14 | illustration of wooly mammoths reclaiming the arctic, trending on artstation
15 | a mountain range in the desert, Provia, Velvia
16 | the gateway between dreams, trending on ArtStation
17 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/prompts/wings2.txt:
--------------------------------------------------------------------------------
 1 | a cityscape at night
 2 | starry night by cyberpunk
 3 | A fantasy painting of a city in a deep valley by Ivan Aivazovsky
 4 | An oil painting of The New York City Skyline by Natalia Goncharova
 5 | a rainy city street in the style of cyberpunk noir, trending on ArtStation
 6 | an astral city in the style of cyberpunk noir art deco
 7 | The Golden Gate Bridge in the style of art deco
 8 | a city on a 70s science fiction novel cover
 9 | An oil painting of A Vase Of Flowers
10 | still life oil painting of a smooth silver steel tungsten square cube box by Albrecht DÃ¼rer
11 | An oil painting of a bookshelf crammed with books, trending on artstation
12 | An N95 respirator mask in the style of art deco
13 | a surreal and organic stone monument to a plutonium atom
14 | oil painting of a candy dish of glass candies, mints, and other assorted sweets
15 | illustration of a ford model-t in pristine condition, trending on artstation
16 | illustration of DEC minicomputer console monitor retrocomputing teletype interdata PDP-11 univac, trending on artstation
17 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/prompts/wings3.txt:
--------------------------------------------------------------------------------
 1 | The Rise Of Consciousness
 2 | The Human Utility Function
 3 | Revolution of the Souls
 4 | a good amphetamine spirit
 5 | Control The Soul
 6 | The Lunatic, The Lover, and The Poet
 7 | A Planet Ruled By Angels
 8 | the Tower of Babel by J.M.W. Turner
 9 | sketch of a 3D printer by Leonardo da Vinci
10 | In The Style Of M.C. Escher
11 | A cup of coffee by Picasso
12 | The US Capitol Building in the style of Kandinsky
13 | A Mysterious Orb by Andy Warhol
14 | The everlasting zero, a glimpse of a million, by Salvador Dali
15 | a painting of a haunted house with Halloween decorations by Giovanni Paolo Panini
16 | a painting of drops of Venus by Vincent van Gogh
17 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/prompts/wings4.txt:
--------------------------------------------------------------------------------
 1 | ascii art of a man riding a bicycle
 2 | cyberpunk noir art deco detective in space
 3 | a cyborg angel in the style of ukiyo-e
 4 | Hell in the style of pointillism
 5 | Moloch in the style of socialist realism
 6 | Metaphysics in the style of WPAP
 7 | advertisement for a psychedelic virtual reality headset, 16 bit sprite pixel art
 8 | a watercolor painting of a Christmas tree
 9 | control room monitors televisions screens computers hacker lab, concept art, matte painting, trending on artstation
10 | a group of surgeons wait to cryonically suspend a patient
11 | technological singularity cult by James Gurney
12 | an autogyro flying car, trending on artstation
13 | illustration of airship zepplins in the skies, trending on artstation
14 | watercolor illustration of a martian colony geodesic dome aquaponics farming on the surface, trending on artstation
15 | humanity is killed by AI, by James Gurney
16 | the Vitruvian Man as a propaganda poster for transhumanism


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/slurm/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | 
 3 | Resume f8 @ 512 on Laion-HR
 4 | 
 5 | ```
 6 | sbatch scripts/slurm/resume_512/sbatch.sh
 7 | ```
 8 | 
 9 | # Reuse
10 | 
11 | To reuse this as a template, copy `sbatch.sh` and `launcher.sh` somewhere. In
12 | `sbatch.sh`, adjust the lines
13 | 
14 | ```
15 | #SBATCH --job-name=stable-diffusion-512cont
16 | #SBATCH --nodes=24
17 | ```
18 | 
19 | and the path to your `launcher.sh` in the last line,
20 | 
21 | ```
22 | srun bash /fsx/stable-diffusion/stable-diffusion/scripts/slurm/resume_512/launcher.sh
23 | ```
24 | 
25 | In `launcher.sh`, adjust `CONFIG` and `EXTRA`. Maybe give it a test run with
26 | debug flags uncommented and a reduced number of nodes.
27 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/slurm/resume_512_improvedaesthetic/launcher.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export NODE_RANK=${SLURM_NODEID}
 3 | echo "##########################################"
 4 | echo MASTER_ADDR=${MASTER_ADDR}
 5 | echo MASTER_PORT=${MASTER_PORT}
 6 | echo NODE_RANK=${NODE_RANK}
 7 | echo WORLD_SIZE=${WORLD_SIZE}
 8 | echo "##########################################"
 9 | # debug environment worked great so we stick with it
10 | # no magic there, just a miniconda python=3.9, pytorch=1.12, cudatoolkit=11.3
11 | # env with pip dependencies from stable diffusion's requirements.txt
12 | eval "$(/fsx/stable-diffusion/debug/miniconda3/bin/conda shell.bash hook)"
13 | conda activate stable
14 | cd /fsx/stable-diffusion/stable-diffusion
15 | 
16 | CONFIG=configs/stable-diffusion/txt2img-1p4B-multinode-clip-encoder-high-res-512-improvedaesthetic.yaml
17 | EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/logs/2022-07-09T11-06-38_txt2img-1p4B-multinode-clip-encoder-high-res-512_improvedaesthetic/checkpoints/last.ckpt"
18 | DEBUG="-d True lightning.callbacks.image_logger.params.batch_frequency=5"
19 | 
20 | python main.py --base $CONFIG --gpus 0,1,2,3,4,5,6,7 -t --num_nodes ${WORLD_SIZE} --scale_lr False $EXTRA #$DEBUG
21 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/slurm/resume_768_hr/launcher.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export NODE_RANK=${SLURM_NODEID}
 3 | echo "##########################################"
 4 | echo MASTER_ADDR=${MASTER_ADDR}
 5 | echo MASTER_PORT=${MASTER_PORT}
 6 | echo NODE_RANK=${NODE_RANK}
 7 | echo WORLD_SIZE=${WORLD_SIZE}
 8 | echo "##########################################"
 9 | # debug environment worked great so we stick with it
10 | # no magic there, just a miniconda python=3.9, pytorch=1.12, cudatoolkit=11.3
11 | # env with pip dependencies from stable diffusion's requirements.txt
12 | eval "$(/fsx/stable-diffusion/debug/miniconda3/bin/conda shell.bash hook)"
13 | conda activate stable
14 | cd /fsx/stable-diffusion/stable-diffusion
15 | 
16 | CONFIG=configs/stable-diffusion/txt2img-multinode-clip-encoder-f16-768-laion-hr.yaml
17 | # EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/checkpoints/f16-33k+12k-hr_pruned.ckpt"
18 | EXTRA="model.params.ckpt_path=/fsx/stable-diffusion/stable-diffusion/logs/2022-07-09T20-06-38_txt2img-multinode-clip-encoder-f16-768-laion-hr/checkpoints/last.ckpt"
19 | DEBUG="-d True lightning.callbacks.image_logger.params.batch_frequency=5"
20 | 
21 | python main.py --base $CONFIG --gpus 0,1,2,3,4,5,6,7 -t --num_nodes ${WORLD_SIZE} --scale_lr False $EXTRA #$DEBUG
22 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/scripts/test_gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | eval "$(/fsx/stable-diffusion/debug/miniconda3/bin/conda shell.bash hook)"
3 | conda activate stable
4 | cd /fsx/stable-diffusion/stable-diffusion
5 | python scripts/test_gpu.py
6 | 


--------------------------------------------------------------------------------
/PyTorch/generative_models/stable-diffusion/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='latent-diffusion',
 5 |     version='0.0.1',
 6 |     description='',
 7 |     packages=find_packages(),
 8 |     install_requires=[
 9 |         'torch',
10 |         'numpy',
11 |         'tqdm',
12 |     ],
13 | )


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Used for downloading models over HTTP
 2 | ipdb==0.13.9
 3 | #Data processing
 4 | h5py==3.9.0 ; python_version < '3.12'
 5 | h5py==3.13.0 ; python_version >= '3.12'
 6 | html2text==2020.1.16
 7 | nltk>=3.6.7
 8 | progressbar==2.5
 9 | #Others
10 | git+https://github.com/NVIDIA/dllogger.git@26a0f8f1958de2c0c460925ff6102a4d2486d6cc
11 | torchmetrics>=0.8.0
12 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts/bert_1.5b_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "hidden_size": 1600,
 3 |     "hidden_act": "gelu",
 4 |     "initializer_range": 0.02,
 5 |     "vocab_size": 30522,
 6 |     "hidden_dropout_prob": 0.1,
 7 |     "num_attention_heads": 25,
 8 |     "type_vocab_size": 2,
 9 |     "max_position_embeddings": 512,
10 |     "num_hidden_layers": 48,
11 |     "intermediate_size": 6400,
12 |     "attention_probs_dropout_prob": 0.1,
13 |     "layer_norm_large_model": true
14 | }
15 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts/bert_5b_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "hidden_size": 2560,
 3 |     "hidden_act": "gelu",
 4 |     "initializer_range": 0.02,
 5 |     "vocab_size": 512035,
 6 |     "hidden_dropout_prob": 0.1,
 7 |     "num_attention_heads": 40,
 8 | 	"type_vocab_size": 2,
 9 | 	"max_position_embeddings": 512,
10 |     "num_hidden_layers": 63,
11 |     "intermediate_size": 10240,
12 |     "attention_probs_dropout_prob": 0.1,
13 | 	"layer_norm_large_model": true
14 | }
15 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts/deepspeed_config_bert_1.5b.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "steps_per_print": 48,
 3 |   "train_batch_size": 12288,
 4 |   "train_micro_batch_size_per_gpu": 32,
 5 |   "tensorboard": {
 6 |     "enabled": true,
 7 |     "output_path": "./results/bert_1.5b/tensorboard",
 8 |     "job_name": "bert_1.5b_lans_zero1_bf16"
 9 |   },
10 |   "bf16":  { "enabled": true },
11 |   "gradient_clipping": 1.0,
12 |   "zero_optimization":  {
13 |     "stage": 1,
14 |     "contiguous_gradients": false
15 |   },
16 |   "zero_allow_untested_optimizer": true,
17 |   "timers": {
18 |     "throughput": {
19 |       "enabled": true,
20 |       "synchronized": false
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts/deepspeed_config_bert_5b_lans.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "steps_per_print": 4,
 3 |   "train_batch_size": 12288,
 4 |   "train_micro_batch_size_per_gpu": 32,
 5 |   "tensorboard": {
 6 |     "enabled": false,
 7 |     "output_path": "./results/bert_5b_lans/tensorboard",
 8 |     "job_name": "bert_5b_lans_zero2_bf16"
 9 |   },
10 |   "bf16":  { "enabled": true },
11 |   "gradient_clipping": 1.0,
12 | 
13 |   "zero_optimization":  { "stage": 2,
14 |     "overlap_comm": false,
15 |     "reduce_scatter" : false,
16 |     "contiguous_gradients" : false,
17 |     "reduce_bucket_size": 150000000
18 |   },
19 |   "zero_allow_untested_optimizer": true,
20 |   "timers": {
21 |     "throughput": {
22 |       "enabled": true,
23 |       "synchronized": false
24 |     }
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts/hostsfile:
--------------------------------------------------------------------------------
1 | 10.10.100.101 slots=8
2 | 10.10.100.102 slots=8
3 | 10.10.100.103 slots=8
4 | 10.10.100.104 slots=8


--------------------------------------------------------------------------------
/PyTorch/nlp/bert/bert_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_probs_dropout_prob": 0.1,
 3 |   "hidden_act": "gelu",
 4 |   "hidden_dropout_prob": 0.1,
 5 |   "hidden_size": 1024,
 6 |   "initializer_range": 0.02,
 7 |   "intermediate_size": 4096,
 8 |   "max_position_embeddings": 512,
 9 |   "num_attention_heads": 16,
10 |   "num_hidden_layers": 24,
11 |   "type_vocab_size": 2,
12 |   "vocab_size": 30522
13 | }
14 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/bert/bert_config_1.2B.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_probs_dropout_prob": 0.1,
 3 |   "hidden_act": "gelu",
 4 |   "hidden_dropout_prob": 0.1,
 5 |   "hidden_size": 1536,
 6 |   "initializer_range": 0.02,
 7 |   "intermediate_size": 6144,
 8 |   "max_position_embeddings": 512,
 9 |   "num_attention_heads": 16,
10 |   "num_hidden_layers": 40,
11 |   "type_vocab_size": 2,
12 |   "vocab_size": 30522
13 | }
14 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/bert/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 


--------------------------------------------------------------------------------
/PyTorch/nlp/bert/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Accessing files from S3 directly.
 2 | boto3==1.26.75
 3 | # Used for downloading models over HTTP
 4 | ipdb==0.13.9
 5 | #Data processing
 6 | h5py==3.9.0
 7 | html2text==2020.1.16
 8 | nltk==3.8.1
 9 | progressbar==2.5
10 | #Others
11 | git+https://github.com/NVIDIA/dllogger.git@26a0f8f1958de2c0c460925ff6102a4d2486d6cc
12 | 


--------------------------------------------------------------------------------