├── .github
    ├── CODEOWNERS
    └── workflows
    │   └── cla.yml
├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── benchmark_readme_template.md
├── graph_neural_network
    ├── Dockerfile
    ├── Dockerfile.h100
    ├── README.md
    ├── build_partition_feature.py
    ├── compress_graph.py
    ├── dataset.py
    ├── dist_train_rgnn.py
    ├── download.py
    ├── download_igbh_full.sh
    ├── mlperf_logging_utils.py
    ├── partition.py
    ├── rgnn.py
    ├── split_seeds.py
    ├── train_rgnn_multi_gpu.py
    └── utilities.py
├── install_cuda_docker.sh
├── language_model
    └── tensorflow
    │   └── bert
    │       ├── README.md
    │       ├── __init__.py
    │       ├── checkpoint_add_gradacc.py
    │       ├── cleanup_scripts
    │           ├── clean.sh
    │           ├── cleanup_file.py
    │           ├── create_pretraining_data.py
    │           ├── do_gather.py
    │           ├── do_sentence_segmentation.py
    │           ├── download_and_uncompress.sh
    │           ├── eval.md5
    │           ├── pick_eval_samples.py
    │           ├── process_wiki.sh
    │           ├── sample_data
    │           │   ├── wiki_00
    │           │   ├── wiki_01
    │           │   ├── wiki_02
    │           │   └── wiki_03
    │           ├── seperate_test_set.py
    │           └── tokenization.py
    │       ├── dataset.md
    │       ├── deferred_grad_optimizer.py
    │       ├── distribution_utils.py
    │       ├── extract_features.py
    │       ├── lamb_optimizer_v1.py
    │       ├── mlp_logging.py
    │       ├── modeling.py
    │       ├── optimization.py
    │       ├── run_pretraining.py
    │       └── tpu_lib.py
├── large_language_model_pretraining
    └── nemo
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── callbacks.py
    │   ├── config.sh
    │   ├── mcore.patch
    │   ├── pretrain_llama31.py
    │   ├── run_llama31.sh
    │   └── utils
    │       ├── consolidate_data.sh
    │       ├── launch_nemo_convert.sh
    │       ├── nemo_convert.py
    │       └── preprocess.sh
├── llama2_70b_lora
    ├── Dockerfile
    ├── README.md
    ├── configs
    │   └── default_config.yaml
    ├── convergence_example.txt
    ├── requirements.txt
    ├── run_docker.sh
    ├── run_llama_70B_scrolls_r16.sh
    └── scripts
    │   ├── mlperf_logging_utils.py
    │   ├── train.py
    │   └── utils.py
├── recommendation_v2
    └── torchrec_dlrm
    │   ├── Dockerfile
    │   ├── README.MD
    │   ├── __init__.py
    │   ├── aws_component.py
    │   ├── data
    │       ├── __init__.py
    │       ├── dlrm_dataloader.py
    │       └── multi_hot_criteo.py
    │   ├── dlrm_main.py
    │   ├── lr_scheduler.py
    │   ├── md5sums_MLPerf_v2_synthetic_multi_hot_sparse_dataset.txt
    │   ├── md5sums_preprocessed_criteo_click_logs_dataset.txt
    │   ├── mlperf_logging_utils.py
    │   ├── multi_hot.py
    │   ├── requirements.txt
    │   ├── scripts
    │       ├── materialize_synthetic_multihot_dataset.py
    │       └── process_Criteo_1TB_Click_Logs_dataset.sh
    │   └── tests
    │       └── test_dlrm_main.py
├── reference_results.md
├── retired_benchmarks
    ├── dlrm
    │   ├── download_dataset.sh
    │   └── verify_dataset.sh
    ├── gnmt
    │   ├── .dockerignore
    │   ├── .gitignore
    │   ├── README.md
    │   ├── download_dataset.sh
    │   ├── pytorch
    │   │   ├── Dockerfile
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── run_and_time.sh
    │   │   ├── scripts
    │   │   │   ├── docker
    │   │   │   │   ├── build.sh
    │   │   │   │   └── interactive.sh
    │   │   │   └── filter_dataset.py
    │   │   ├── seq2seq
    │   │   │   ├── data
    │   │   │   │   ├── config.py
    │   │   │   │   ├── dataset.py
    │   │   │   │   ├── sampler.py
    │   │   │   │   └── tokenizer.py
    │   │   │   ├── inference
    │   │   │   │   ├── beam_search.py
    │   │   │   │   └── inference.py
    │   │   │   ├── models
    │   │   │   │   ├── attention.py
    │   │   │   │   ├── decoder.py
    │   │   │   │   ├── encoder.py
    │   │   │   │   ├── gnmt.py
    │   │   │   │   └── seq2seq_base.py
    │   │   │   ├── train
    │   │   │   │   ├── fp_optimizers.py
    │   │   │   │   ├── lr_scheduler.py
    │   │   │   │   ├── smoothing.py
    │   │   │   │   └── trainer.py
    │   │   │   └── utils.py
    │   │   ├── train.py
    │   │   └── translate.py
    │   └── verify_dataset.sh
    ├── gpt3
    │   ├── megatron-lm
    │   │   ├── .gitignore
    │   │   ├── Dockerfile
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── checksums
    │   │   │   ├── additional_checkpoint_files
    │   │   │   │   ├── common.pt
    │   │   │   │   └── metadata.json
    │   │   │   ├── dataset_checksum.log
    │   │   │   └── fp32_checkpoint_checksum.log
    │   │   ├── megatron
    │   │   │   ├── __init__.py
    │   │   │   ├── arguments.py
    │   │   │   ├── checkpointing.py
    │   │   │   ├── core
    │   │   │   │   └── dist_checkpointing
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── core.py
    │   │   │   │   │   ├── dict_utils.py
    │   │   │   │   │   ├── mapping.py
    │   │   │   │   │   ├── optimizer.py
    │   │   │   │   │   ├── serialization.py
    │   │   │   │   │   ├── strategies
    │   │   │   │   │       ├── __init__.py
    │   │   │   │   │       ├── base.py
    │   │   │   │   │       └── zarr.py
    │   │   │   │   │   ├── tests
    │   │   │   │   │       ├── __init__.py
    │   │   │   │   │       ├── common.py
    │   │   │   │   │       ├── test_correctness.py
    │   │   │   │   │       └── test_load_check.py
    │   │   │   │   │   └── utils.py
    │   │   │   ├── data
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── autoaugment.py
    │   │   │   │   ├── biencoder_dataset_utils.py
    │   │   │   │   ├── blendable_dataset.py
    │   │   │   │   ├── data_samplers.py
    │   │   │   │   ├── dataset_utils.py
    │   │   │   │   ├── gpt_dataset.py
    │   │   │   │   ├── helpers.cpp
    │   │   │   │   ├── ict_dataset.py
    │   │   │   │   ├── indexed_dataset.py
    │   │   │   │   ├── orqa_wiki_dataset.py
    │   │   │   │   ├── realm_dataset_utils.py
    │   │   │   │   ├── realm_index.py
    │   │   │   │   └── test
    │   │   │   │   │   ├── test_indexed_dataset.py
    │   │   │   │   │   └── test_preprocess_data.sh
    │   │   │   ├── dist_signal_handler.py
    │   │   │   ├── fp16_deprecated
    │   │   │   │   └── loss_scaler.py
    │   │   │   ├── fused_kernels
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── compat.h
    │   │   │   │   ├── fused_weight_gradient_dense.cpp
    │   │   │   │   ├── fused_weight_gradient_dense.cu
    │   │   │   │   ├── layer_norm_cuda.cpp
    │   │   │   │   ├── layer_norm_cuda_kernel.cu
    │   │   │   │   ├── scaled_masked_softmax.cpp
    │   │   │   │   ├── scaled_masked_softmax.h
    │   │   │   │   ├── scaled_masked_softmax_cuda.cu
    │   │   │   │   ├── scaled_softmax.cpp
    │   │   │   │   ├── scaled_softmax_cuda.cu
    │   │   │   │   ├── scaled_upper_triang_masked_softmax.cpp
    │   │   │   │   ├── scaled_upper_triang_masked_softmax.h
    │   │   │   │   ├── scaled_upper_triang_masked_softmax_cuda.cu
    │   │   │   │   ├── tests
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── test_fused_kernels.py
    │   │   │   │   └── type_shim.h
    │   │   │   ├── global_vars.py
    │   │   │   ├── indexer.py
    │   │   │   ├── initialize.py
    │   │   │   ├── learning_rates.py
    │   │   │   ├── memory.py
    │   │   │   ├── microbatches.py
    │   │   │   ├── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── biencoder_model.py
    │   │   │   │   ├── classification.py
    │   │   │   │   ├── distributed.py
    │   │   │   │   ├── enums.py
    │   │   │   │   ├── fused_bias_gelu.py
    │   │   │   │   ├── fused_layer_norm.py
    │   │   │   │   ├── fused_softmax.py
    │   │   │   │   ├── gpt_model.py
    │   │   │   │   ├── language_model.py
    │   │   │   │   ├── module.py
    │   │   │   │   ├── multiple_choice.py
    │   │   │   │   ├── realm_model.py
    │   │   │   │   ├── transformer.py
    │   │   │   │   └── utils.py
    │   │   │   ├── mpu
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── cross_entropy.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── initialize.py
    │   │   │   │   ├── layers.py
    │   │   │   │   ├── mappings.py
    │   │   │   │   ├── random.py
    │   │   │   │   ├── tests
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── commons.py
    │   │   │   │   │   ├── test_cross_entropy.py
    │   │   │   │   │   ├── test_data.py
    │   │   │   │   │   ├── test_initialize.py
    │   │   │   │   │   ├── test_layers.py
    │   │   │   │   │   └── test_random.py
    │   │   │   │   └── utils.py
    │   │   │   ├── optimizer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── clip_grads.py
    │   │   │   │   ├── distrib_optimizer.py
    │   │   │   │   ├── grad_scaler.py
    │   │   │   │   └── optimizer.py
    │   │   │   ├── optimizer_param_scheduler.py
    │   │   │   ├── p2p_communication.py
    │   │   │   ├── schedules.py
    │   │   │   ├── static
    │   │   │   │   └── index.html
    │   │   │   ├── text_generation
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── api.py
    │   │   │   │   ├── beam_utils.py
    │   │   │   │   ├── communication.py
    │   │   │   │   ├── forward_step.py
    │   │   │   │   ├── generation.py
    │   │   │   │   ├── sampling.py
    │   │   │   │   └── tokenization.py
    │   │   │   ├── text_generation_server.py
    │   │   │   ├── timers.py
    │   │   │   ├── tokenizer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bert_tokenization.py
    │   │   │   │   ├── gpt2_tokenization.py
    │   │   │   │   └── tokenizer.py
    │   │   │   ├── training.py
    │   │   │   └── utils.py
    │   │   ├── pretrain_gpt.py
    │   │   ├── requirements.txt
    │   │   ├── run_gpt3.sh
    │   │   ├── scripts
    │   │   │   ├── common_bf16.json
    │   │   │   ├── common_fp32.json
    │   │   │   ├── convert_paxml_to_megatron_distributed.py
    │   │   │   ├── json_to_torch.py
    │   │   │   ├── load_checkpoint.md
    │   │   │   ├── load_checkpoint.py
    │   │   │   ├── preprocess.sh
    │   │   │   └── preprocess_val.sh
    │   │   ├── tasks
    │   │   │   ├── data_utils.py
    │   │   │   ├── ensemble_classifier.py
    │   │   │   ├── eval_utils.py
    │   │   │   ├── finetune_utils.py
    │   │   │   ├── glue
    │   │   │   │   ├── data.py
    │   │   │   │   ├── finetune.py
    │   │   │   │   ├── mnli.py
    │   │   │   │   └── qqp.py
    │   │   │   ├── main.py
    │   │   │   ├── orqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── evaluate_orqa.py
    │   │   │   │   ├── evaluate_utils.py
    │   │   │   │   ├── supervised
    │   │   │   │   │   ├── data.py
    │   │   │   │   │   ├── eval_utils.py
    │   │   │   │   │   └── finetune.py
    │   │   │   │   └── unsupervised
    │   │   │   │   │   ├── nq.py
    │   │   │   │   │   ├── qa_utils.py
    │   │   │   │   │   └── tokenizers.py
    │   │   │   ├── race
    │   │   │   │   ├── data.py
    │   │   │   │   └── finetune.py
    │   │   │   ├── vision
    │   │   │   │   ├── classification.py
    │   │   │   │   ├── eval_utils.py
    │   │   │   │   ├── finetune_utils.py
    │   │   │   │   └── main.py
    │   │   │   └── zeroshot_gpt
    │   │   │   │   ├── datasets.py
    │   │   │   │   ├── detokenizer.py
    │   │   │   │   └── evaluate.py
    │   │   └── tools
    │   │   │   └── preprocess_data.py
    │   └── paxml
    │   │   ├── README.md
    │   │   ├── c4.py
    │   │   ├── c4_mllog.py
    │   │   ├── lm_cloud.py
    │   │   ├── model_params.py
    │   │   └── utils
    │   │       ├── generate_spm.md
    │   │       ├── load_ts_ckpt.md
    │   │       ├── load_ts_ckpt.py
    │   │       ├── select_example.md
    │   │       ├── select_example.py
    │   │       └── select_text.py
    ├── maskrcnn
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── download_dataset.sh
    │   ├── pytorch
    │   │   ├── .flake8
    │   │   ├── .github
    │   │   │   └── ISSUE_TEMPLATE
    │   │   │   │   ├── bug-report.md
    │   │   │   │   ├── feature-request.md
    │   │   │   │   └── questions-help-support.md
    │   │   ├── .gitignore
    │   │   ├── ABSTRACTIONS.md
    │   │   ├── CODE_OF_CONDUCT.md
    │   │   ├── CONTRIBUTING.md
    │   │   ├── INSTALL.md
    │   │   ├── LICENSE
    │   │   ├── MODEL_ZOO.md
    │   │   ├── README.md
    │   │   ├── TROUBLESHOOTING.md
    │   │   ├── configs
    │   │   │   └── e2e_mask_rcnn_R_50_FPN_1x.yaml
    │   │   ├── demo
    │   │   │   ├── Mask_R-CNN_demo.ipynb
    │   │   │   ├── README.md
    │   │   │   ├── demo_e2e_mask_rcnn_R_50_FPN_1x.png
    │   │   │   ├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
    │   │   │   ├── predictor.py
    │   │   │   └── webcam.py
    │   │   ├── docker
    │   │   │   ├── Dockerfile
    │   │   │   └── docker-jupyter
    │   │   │   │   ├── Dockerfile
    │   │   │   │   └── jupyter_notebook_config.py
    │   │   ├── maskrcnn_benchmark
    │   │   │   ├── __init__.py
    │   │   │   ├── config
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── defaults.py
    │   │   │   │   └── paths_catalog.py
    │   │   │   ├── csrc
    │   │   │   │   ├── ROIAlign.h
    │   │   │   │   ├── ROIPool.h
    │   │   │   │   ├── SigmoidFocalLoss.h
    │   │   │   │   ├── cpu
    │   │   │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   │   │   ├── nms_cpu.cpp
    │   │   │   │   │   └── vision.h
    │   │   │   │   ├── cuda
    │   │   │   │   │   ├── ROIAlign_cuda.cu
    │   │   │   │   │   ├── ROIPool_cuda.cu
    │   │   │   │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   │   │   │   ├── nms.cu
    │   │   │   │   │   └── vision.h
    │   │   │   │   ├── nms.h
    │   │   │   │   └── vision.cpp
    │   │   │   ├── data
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── build.py
    │   │   │   │   ├── collate_batch.py
    │   │   │   │   ├── datasets
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── coco.py
    │   │   │   │   │   ├── concat_dataset.py
    │   │   │   │   │   ├── evaluation
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── coco
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   └── coco_eval.py
    │   │   │   │   │   │   └── voc
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   └── voc_eval.py
    │   │   │   │   │   ├── list_dataset.py
    │   │   │   │   │   └── voc.py
    │   │   │   │   ├── samplers
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── distributed.py
    │   │   │   │   │   ├── grouped_batch_sampler.py
    │   │   │   │   │   └── iteration_based_batch_sampler.py
    │   │   │   │   └── transforms
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── build.py
    │   │   │   │   │   └── transforms.py
    │   │   │   ├── engine
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── tester.py
    │   │   │   │   └── trainer.py
    │   │   │   ├── layers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── _utils.py
    │   │   │   │   ├── batch_norm.py
    │   │   │   │   ├── misc.py
    │   │   │   │   ├── nms.py
    │   │   │   │   ├── roi_align.py
    │   │   │   │   ├── roi_pool.py
    │   │   │   │   ├── sigmoid_focal_loss.py
    │   │   │   │   └── smooth_l1_loss.py
    │   │   │   ├── modeling
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── backbone
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── backbone.py
    │   │   │   │   │   ├── fpn.py
    │   │   │   │   │   └── resnet.py
    │   │   │   │   ├── balanced_positive_negative_sampler.py
    │   │   │   │   ├── box_coder.py
    │   │   │   │   ├── detector
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── detectors.py
    │   │   │   │   │   └── generalized_rcnn.py
    │   │   │   │   ├── make_layers.py
    │   │   │   │   ├── matcher.py
    │   │   │   │   ├── poolers.py
    │   │   │   │   ├── registry.py
    │   │   │   │   ├── roi_heads
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── box_head
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── box_head.py
    │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   ├── loss.py
    │   │   │   │   │   │   ├── roi_box_feature_extractors.py
    │   │   │   │   │   │   └── roi_box_predictors.py
    │   │   │   │   │   ├── keypoint_head
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   ├── keypoint_head.py
    │   │   │   │   │   │   ├── loss.py
    │   │   │   │   │   │   ├── roi_keypoint_feature_extractors.py
    │   │   │   │   │   │   └── roi_keypoint_predictors.py
    │   │   │   │   │   ├── mask_head
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   ├── loss.py
    │   │   │   │   │   │   ├── mask_head.py
    │   │   │   │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   │   │   │   └── roi_mask_predictors.py
    │   │   │   │   │   └── roi_heads.py
    │   │   │   │   ├── rpn
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── anchor_generator.py
    │   │   │   │   │   ├── inference.py
    │   │   │   │   │   ├── loss.py
    │   │   │   │   │   ├── retinanet
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   ├── loss.py
    │   │   │   │   │   │   └── retinanet.py
    │   │   │   │   │   ├── rpn.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   └── utils.py
    │   │   │   ├── solver
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── build.py
    │   │   │   │   └── lr_scheduler.py
    │   │   │   ├── structures
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bounding_box.py
    │   │   │   │   ├── boxlist_ops.py
    │   │   │   │   ├── image_list.py
    │   │   │   │   ├── keypoint.py
    │   │   │   │   └── segmentation_mask.py
    │   │   │   └── utils
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── c2_model_loading.py
    │   │   │   │   ├── checkpoint.py
    │   │   │   │   ├── collect_env.py
    │   │   │   │   ├── comm.py
    │   │   │   │   ├── cv2_util.py
    │   │   │   │   ├── env.py
    │   │   │   │   ├── imports.py
    │   │   │   │   ├── logger.py
    │   │   │   │   ├── metric_logger.py
    │   │   │   │   ├── miscellaneous.py
    │   │   │   │   ├── mlperf_logger.py
    │   │   │   │   ├── model_serialization.py
    │   │   │   │   ├── model_zoo.py
    │   │   │   │   └── registry.py
    │   │   ├── setup.py
    │   │   ├── tests
    │   │   │   ├── checkpoint.py
    │   │   │   ├── test_data_samplers.py
    │   │   │   └── test_metric_logger.py
    │   │   └── tools
    │   │   │   ├── cityscapes
    │   │   │       ├── convert_cityscapes_to_coco.py
    │   │   │       └── instances2dict_with_polygons.py
    │   │   │   ├── test_net.py
    │   │   │   ├── train_mlperf.py
    │   │   │   └── train_net.py
    │   └── run_and_time.sh
    ├── minigo
    │   ├── README.md
    │   └── tensorflow
    │   │   ├── Dockerfile
    │   │   ├── minigo
    │   │       ├── .bazelrc
    │   │       ├── .gitignore
    │   │       ├── .pylintrc
    │   │       ├── LICENSE
    │   │       ├── README.md
    │   │       ├── RESULTS.md
    │   │       ├── WORKSPACE
    │   │       ├── __init__.py
    │   │       ├── batch_exporter.py
    │   │       ├── bigtable_input.py
    │   │       ├── bigtable_output.py
    │   │       ├── bootstrap.py
    │   │       ├── cc
    │   │       │   ├── .clang-format
    │   │       │   ├── BUILD
    │   │       │   ├── CPPLINT.cfg
    │   │       │   ├── README.md
    │   │       │   ├── algorithm.cc
    │   │       │   ├── algorithm.h
    │   │       │   ├── algorithm_test.cc
    │   │       │   ├── async
    │   │       │   │   ├── BUILD
    │   │       │   │   ├── poll_thread.cc
    │   │       │   │   ├── poll_thread.h
    │   │       │   │   ├── semaphore.h
    │   │       │   │   ├── sharded_executor.cc
    │   │       │   │   ├── sharded_executor.h
    │   │       │   │   ├── thread.cc
    │   │       │   │   ├── thread.h
    │   │       │   │   ├── thread_safe_queue.h
    │   │       │   │   └── thread_safe_queue_test.cc
    │   │       │   ├── benchmark.BUILD
    │   │       │   ├── color.cc
    │   │       │   ├── color.h
    │   │       │   ├── concurrent_selfplay.cc
    │   │       │   ├── config
    │   │       │   │   ├── BUILD
    │   │       │   │   └── minigo.bzl
    │   │       │   ├── configure_tensorflow.sh
    │   │       │   ├── constants.h
    │   │       │   ├── coord.cc
    │   │       │   ├── coord.h
    │   │       │   ├── coord_test.cc
    │   │       │   ├── cuda_configure.bzl
    │   │       │   ├── dual_net
    │   │       │   │   ├── BUILD
    │   │       │   │   ├── batching_dual_net.cc
    │   │       │   │   ├── batching_dual_net.h
    │   │       │   │   ├── batching_dual_net_test.cc
    │   │       │   │   ├── dual_net.cc
    │   │       │   │   ├── dual_net.h
    │   │       │   │   ├── dual_net_test.cc
    │   │       │   │   ├── factory.cc
    │   │       │   │   ├── factory.h
    │   │       │   │   ├── fake_dual_net.cc
    │   │       │   │   ├── fake_dual_net.h
    │   │       │   │   ├── inference_cache.cc
    │   │       │   │   ├── inference_cache.h
    │   │       │   │   ├── inference_cache_test.cc
    │   │       │   │   ├── lite_dual_net.cc
    │   │       │   │   ├── lite_dual_net.h
    │   │       │   │   ├── random_dual_net.cc
    │   │       │   │   ├── random_dual_net.h
    │   │       │   │   ├── reloading_dual_net.cc
    │   │       │   │   ├── reloading_dual_net.h
    │   │       │   │   ├── reloading_dual_net_test.cc
    │   │       │   │   ├── test_lite.minigo
    │   │       │   │   ├── test_model.pb
    │   │       │   │   ├── test_model.tflite
    │   │       │   │   ├── test_model.uff
    │   │       │   │   ├── test_tf.minigo
    │   │       │   │   ├── tf_dual_net.cc
    │   │       │   │   ├── tf_dual_net.h
    │   │       │   │   ├── tpu_dual_net.cc
    │   │       │   │   ├── tpu_dual_net.h
    │   │       │   │   ├── trt_dual_net.cc
    │   │       │   │   └── trt_dual_net.h
    │   │       │   ├── eval.cc
    │   │       │   ├── file
    │   │       │   │   ├── BUILD
    │   │       │   │   ├── directory_watcher.cc
    │   │       │   │   ├── directory_watcher.h
    │   │       │   │   ├── path.cc
    │   │       │   │   ├── path.h
    │   │       │   │   ├── path_test.cc
    │   │       │   │   ├── utils.h
    │   │       │   │   ├── utils_posix.cc
    │   │       │   │   ├── utils_test.cc
    │   │       │   │   ├── utils_tf.cc
    │   │       │   │   └── utils_windows.cc
    │   │       │   ├── game.cc
    │   │       │   ├── game.h
    │   │       │   ├── game_utils.cc
    │   │       │   ├── game_utils.h
    │   │       │   ├── group.cc
    │   │       │   ├── group.h
    │   │       │   ├── gtp.cc
    │   │       │   ├── gtp_client.cc
    │   │       │   ├── gtp_client.h
    │   │       │   ├── gtp_player.cc
    │   │       │   ├── gtp_player.h
    │   │       │   ├── init.cc
    │   │       │   ├── init.h
    │   │       │   ├── inline_vector.h
    │   │       │   ├── json.BUILD
    │   │       │   ├── logging.cc
    │   │       │   ├── logging.h
    │   │       │   ├── mcts_node.cc
    │   │       │   ├── mcts_node.h
    │   │       │   ├── mcts_node_test.cc
    │   │       │   ├── mcts_player.cc
    │   │       │   ├── mcts_player.h
    │   │       │   ├── mcts_player_test.cc
    │   │       │   ├── mcts_tree.cc
    │   │       │   ├── mcts_tree.h
    │   │       │   ├── mcts_tree_test.cc
    │   │       │   ├── minigui_gtp_client.cc
    │   │       │   ├── minigui_gtp_client.h
    │   │       │   ├── minigui_player.cc
    │   │       │   ├── minigui_player.h
    │   │       │   ├── model
    │   │       │   │   ├── BUILD
    │   │       │   │   ├── batching_model.cc
    │   │       │   │   ├── batching_model.h
    │   │       │   │   ├── batching_model_test.cc
    │   │       │   │   ├── buffered_model.cc
    │   │       │   │   ├── buffered_model.h
    │   │       │   │   ├── factory.cc
    │   │       │   │   ├── factory.h
    │   │       │   │   ├── features.cc
    │   │       │   │   ├── features.h
    │   │       │   │   ├── features_benchmark.cc
    │   │       │   │   ├── features_internal.h
    │   │       │   │   ├── features_test.cc
    │   │       │   │   ├── inference_cache.cc
    │   │       │   │   ├── inference_cache.h
    │   │       │   │   ├── inference_cache_test.cc
    │   │       │   │   ├── loader.cc
    │   │       │   │   ├── loader.h
    │   │       │   │   ├── model.cc
    │   │       │   │   ├── model.h
    │   │       │   │   ├── types.cc
    │   │       │   │   ├── types.h
    │   │       │   │   └── types_test.cc
    │   │       │   ├── move.cc
    │   │       │   ├── move.h
    │   │       │   ├── padded_array.h
    │   │       │   ├── pass_alive_test.cc
    │   │       │   ├── platform
    │   │       │   │   ├── BUILD
    │   │       │   │   ├── utils.h
    │   │       │   │   ├── utils_linux.cc
    │   │       │   │   ├── utils_osx.cc
    │   │       │   │   └── utils_windows.cc
    │   │       │   ├── position.cc
    │   │       │   ├── position.h
    │   │       │   ├── position_benchmark.cc
    │   │       │   ├── position_test.cc
    │   │       │   ├── puzzle.cc
    │   │       │   ├── random.cc
    │   │       │   ├── random.h
    │   │       │   ├── random_test.cc
    │   │       │   ├── replay_games.cc
    │   │       │   ├── sample_records.cc
    │   │       │   ├── selfplay.cc
    │   │       │   ├── sgf.cc
    │   │       │   ├── sgf.h
    │   │       │   ├── sgf_test.cc
    │   │       │   ├── simple_example.cc
    │   │       │   ├── stone.h
    │   │       │   ├── symmetries.cc
    │   │       │   ├── symmetries.h
    │   │       │   ├── symmetries_test.cc
    │   │       │   ├── tensorflow
    │   │       │   │   ├── BUILD
    │   │       │   │   └── copy_outputs.sh
    │   │       │   ├── tensorrt_configure.bzl
    │   │       │   ├── test.sh
    │   │       │   ├── test_utils.cc
    │   │       │   ├── test_utils.h
    │   │       │   ├── tf_bt_utils.cc
    │   │       │   ├── tf_bt_utils_dummy.cc
    │   │       │   ├── tf_utils.cc
    │   │       │   ├── tf_utils.h
    │   │       │   ├── tf_utils_dummy.cc
    │   │       │   ├── tfrzz_to_cbt.cc
    │   │       │   ├── thread_safe_queue.h
    │   │       │   ├── thread_safe_queue_test.cc
    │   │       │   ├── tiny_set.h
    │   │       │   ├── tpu_test.cc
    │   │       │   ├── wtf.BUILD
    │   │       │   ├── wtf_saver.cc
    │   │       │   ├── wtf_saver.h
    │   │       │   ├── zobrist.cc
    │   │       │   └── zobrist.h
    │   │       ├── cloud_logging.py
    │   │       ├── cluster
    │   │       │   ├── .gitignore
    │   │       │   ├── README.md
    │   │       │   ├── base
    │   │       │   │   ├── Dockerfile
    │   │       │   │   └── Makefile
    │   │       │   ├── calibrator
    │   │       │   │   ├── Dockerfile
    │   │       │   │   ├── Makefile
    │   │       │   │   └── calibrator-deployment.yaml
    │   │       │   ├── cgos
    │   │       │   │   ├── Dockerfile
    │   │       │   │   ├── Makefile
    │   │       │   │   └── cgos-player.yaml
    │   │       │   ├── cluster-down.sh
    │   │       │   ├── cluster-up-cpu.sh
    │   │       │   ├── cluster-up-gpu-large.sh
    │   │       │   ├── cluster-up-gpu.sh
    │   │       │   ├── cluster-up-simple.sh
    │   │       │   ├── cluster-up-tpu.sh
    │   │       │   ├── common.sh
    │   │       │   ├── create_table.sh
    │   │       │   ├── destroy.sh
    │   │       │   ├── eval_server
    │   │       │   │   ├── README.md
    │   │       │   │   ├── add_model.py
    │   │       │   │   └── launch_eval.py
    │   │       │   ├── evaluator
    │   │       │   │   ├── Dockerfile-cc
    │   │       │   │   ├── Dockerfile-py
    │   │       │   │   ├── Dockerfile-ringmaster
    │   │       │   │   ├── Makefile
    │   │       │   │   ├── cc-evaluator.yaml
    │   │       │   │   ├── deploy-cc-evaluator.sh
    │   │       │   │   ├── evaluator_cc_wrapper.sh
    │   │       │   │   ├── evaluator_py_wrapper.sh
    │   │       │   │   ├── evaluator_ringmaster_wrapper.py
    │   │       │   │   ├── evaluator_ringmaster_wrapper.sh
    │   │       │   │   ├── gpu-evaluator.yaml
    │   │       │   │   └── launch_eval.py
    │   │       │   ├── init-credentials.sh
    │   │       │   ├── make-all.sh
    │   │       │   ├── minigui
    │   │       │   │   ├── Dockerfile
    │   │       │   │   ├── Makefile
    │   │       │   │   ├── minigui-pod.yaml
    │   │       │   │   ├── run-local.sh
    │   │       │   │   └── simple-service.yaml
    │   │       │   ├── ringmaster
    │   │       │   │   ├── Makefile
    │   │       │   │   ├── example.ctl
    │   │       │   │   ├── lz-Dockerfile
    │   │       │   │   ├── mggtp-Dockerfile
    │   │       │   │   ├── p100-lz-tuning
    │   │       │   │   ├── ringmaster.yaml
    │   │       │   │   ├── ringmaster_wrapper.sh
    │   │       │   │   └── setup_ringmaster.py
    │   │       │   ├── selfplay
    │   │       │   │   ├── Dockerfile-cc
    │   │       │   │   ├── Dockerfile-py
    │   │       │   │   ├── Dockerfile-tpu
    │   │       │   │   ├── Makefile
    │   │       │   │   ├── README.md
    │   │       │   │   ├── cc-player.yaml
    │   │       │   │   ├── deploy-cc-player.sh
    │   │       │   │   ├── deploy-cpu-player.sh
    │   │       │   │   ├── deploy-gpu-player.sh
    │   │       │   │   ├── gpu-player.yaml
    │   │       │   │   ├── gpu-provision-daemonset.yaml
    │   │       │   │   ├── launch-tpu-deployment.sh
    │   │       │   │   ├── tpu-player-deployment-nr.yaml
    │   │       │   │   └── tpu-player-deployment.yaml
    │   │       │   ├── trainer
    │   │       │   │   ├── Dockerfile
    │   │       │   │   ├── Makefile
    │   │       │   │   ├── deploy-trainer.sh
    │   │       │   │   └── tpu-trainer-deployment.yaml
    │   │       │   ├── unset-common.sh
    │   │       │   ├── utils.sh
    │   │       │   └── var-status.sh
    │   │       ├── coords.py
    │   │       ├── dual_net.py
    │   │       ├── dual_net_edge_tpu.py
    │   │       ├── evaluate.py
    │   │       ├── features.py
    │   │       ├── freeze_graph.py
    │   │       ├── go.py
    │   │       ├── gtp.py
    │   │       ├── gtp_cmd_handlers.py
    │   │       ├── gtp_engine.py
    │   │       ├── mask_flags.py
    │   │       ├── mcts.py
    │   │       ├── minigo_model.py
    │   │       ├── minigui
    │   │       │   ├── README.md
    │   │       │   ├── app.ts
    │   │       │   ├── base.ts
    │   │       │   ├── board.ts
    │   │       │   ├── control
    │   │       │   │   ├── leelaz.ctl
    │   │       │   │   ├── minigo_edgetpu.ctl
    │   │       │   │   ├── minigo_py.ctl
    │   │       │   │   ├── minigo_tf.ctl
    │   │       │   │   └── vs.ctl
    │   │       │   ├── demo.ts
    │   │       │   ├── edgetpu
    │   │       │   │   ├── install_requirements.sh
    │   │       │   │   └── start_chromium.sh
    │   │       │   ├── fetch-and-run.sh
    │   │       │   ├── graph.ts
    │   │       │   ├── gtp_socket.ts
    │   │       │   ├── kiosk.ts
    │   │       │   ├── layer.ts
    │   │       │   ├── log.ts
    │   │       │   ├── minigui-common.sh
    │   │       │   ├── position.ts
    │   │       │   ├── requirements.txt
    │   │       │   ├── serve.py
    │   │       │   ├── static
    │   │       │   │   ├── app.js
    │   │       │   │   ├── base.js
    │   │       │   │   ├── board.js
    │   │       │   │   ├── demo.html
    │   │       │   │   ├── demo.js
    │   │       │   │   ├── graph.js
    │   │       │   │   ├── gtp_socket.js
    │   │       │   │   ├── heat_map.js
    │   │       │   │   ├── index.html
    │   │       │   │   ├── kiosk.html
    │   │       │   │   ├── kiosk.js
    │   │       │   │   ├── layer.js
    │   │       │   │   ├── log.js
    │   │       │   │   ├── lw_demo.html
    │   │       │   │   ├── position.js
    │   │       │   │   ├── require
    │   │       │   │   │   ├── LICENSE
    │   │       │   │   │   └── require.js
    │   │       │   │   ├── socketio
    │   │       │   │   │   ├── LICENSE
    │   │       │   │   │   └── socket.io.min.js
    │   │       │   │   ├── study.html
    │   │       │   │   ├── study.js
    │   │       │   │   ├── style.css
    │   │       │   │   ├── util.js
    │   │       │   │   ├── variation_tree.js
    │   │       │   │   ├── view.js
    │   │       │   │   ├── vs.html
    │   │       │   │   ├── vs.js
    │   │       │   │   └── winrate_graph.js
    │   │       │   ├── study.ts
    │   │       │   ├── unset-minigui-common.sh
    │   │       │   ├── util.ts
    │   │       │   ├── variation_tree.ts
    │   │       │   ├── view.ts
    │   │       │   ├── vs.ts
    │   │       │   └── winrate_graph.ts
    │   │       ├── ml_perf
    │   │       │   ├── .gitignore
    │   │       │   ├── README.md
    │   │       │   ├── eval_models.py
    │   │       │   ├── flags
    │   │       │   │   ├── 9
    │   │       │   │   │   ├── architecture.flags
    │   │       │   │   │   ├── bootstrap.flags
    │   │       │   │   │   ├── eval.flags
    │   │       │   │   │   ├── rl_loop.flags
    │   │       │   │   │   ├── selfplay.flags
    │   │       │   │   │   ├── train.flags
    │   │       │   │   │   ├── train_loop.flags
    │   │       │   │   │   └── validate.flags
    │   │       │   │   └── 19
    │   │       │   │   │   ├── architecture.flags
    │   │       │   │   │   ├── bootstrap.flags
    │   │       │   │   │   ├── eval.flags
    │   │       │   │   │   ├── rl_loop.flags
    │   │       │   │   │   ├── selfplay.flags
    │   │       │   │   │   ├── train.flags
    │   │       │   │   │   ├── train_loop.flags
    │   │       │   │   │   └── validate.flags
    │   │       │   ├── get_data.py
    │   │       │   ├── init_from_checkpoint.py
    │   │       │   ├── make_checkpoint.py
    │   │       │   ├── reference_implementation.py
    │   │       │   ├── repeat_run.sh
    │   │       │   ├── scripts
    │   │       │   │   ├── bootstrap.sh
    │   │       │   │   ├── common.sh
    │   │       │   │   ├── init_from_checkpoint.sh
    │   │       │   │   ├── make_checkpoint.sh
    │   │       │   │   ├── start_selfplay.sh
    │   │       │   │   ├── stop_selfplay.sh
    │   │       │   │   └── train.sh
    │   │       │   ├── train_loop.py
    │   │       │   └── utils.py
    │   │       ├── notes.txt
    │   │       ├── oneoffs
    │   │       │   ├── __init__.py
    │   │       │   ├── bigquery_games_schema.json
    │   │       │   ├── bigquery_moves_schema.json
    │   │       │   ├── cbt_eval_sgfs.py
    │   │       │   ├── cbt_models.py
    │   │       │   ├── compare_examples.py
    │   │       │   ├── distillation.py
    │   │       │   ├── dump_game.py
    │   │       │   ├── embeddings.py
    │   │       │   ├── embeddings_graphs.py
    │   │       │   ├── eval_sgf_to_cbt.py
    │   │       │   ├── generate_tpu_graph_def.py
    │   │       │   ├── get_tpu_address.py
    │   │       │   ├── heatmap.py
    │   │       │   ├── inspect_examples.py
    │   │       │   ├── l2_cost_by_var.py
    │   │       │   ├── ladder_detector.py
    │   │       │   ├── launch_tensorboard.py
    │   │       │   ├── modelstats.sh
    │   │       │   ├── oneoff_utils.py
    │   │       │   ├── position_pv.py
    │   │       │   ├── prepare_bigquery.py
    │   │       │   ├── resign_analysis.py
    │   │       │   ├── retrain.sh
    │   │       │   ├── rotate_examples.py
    │   │       │   ├── sharp_positions.py
    │   │       │   ├── swa.py
    │   │       │   ├── symmetry_analysis.py
    │   │       │   ├── training_curve.py
    │   │       │   ├── unwrap_model.py
    │   │       │   ├── validate_misc.py
    │   │       │   └── wrap_model.py
    │   │       ├── player_interface.py
    │   │       ├── preprocessing.py
    │   │       ├── ratings
    │   │       │   ├── cbt_ratings.py
    │   │       │   ├── math_ratings.py
    │   │       │   ├── rate_subdir.py
    │   │       │   ├── ratings.py
    │   │       │   ├── schema.sql
    │   │       │   └── sqlite_ratings.py
    │   │       ├── requirements-analysis.txt
    │   │       ├── requirements-colab.txt
    │   │       ├── requirements.txt
    │   │       ├── rl_loop
    │   │       │   ├── bootstrap.py
    │   │       │   ├── distributed_flags
    │   │       │   ├── distributed_flags_nr
    │   │       │   ├── example_buffer.py
    │   │       │   ├── fsdb.py
    │   │       │   ├── local_flags
    │   │       │   ├── local_integration_test.py
    │   │       │   ├── selfplay.py
    │   │       │   ├── shipname.py
    │   │       │   ├── train_and_validate.py
    │   │       │   └── update_resign_threshold.py
    │   │       ├── selfplay.py
    │   │       ├── sgf_wrapper.py
    │   │       ├── strategies.py
    │   │       ├── symmetries.py
    │   │       ├── test.sh
    │   │       ├── testing
    │   │       │   ├── Dockerfile.v2
    │   │       │   ├── Makefile
    │   │       │   ├── README.md
    │   │       │   ├── bootstrap_v2.sh
    │   │       │   └── setup.sh
    │   │       ├── tests
    │   │       │   ├── __init__.py
    │   │       │   ├── example_game.sgf
    │   │       │   ├── run_tests.py
    │   │       │   ├── test_coords.py
    │   │       │   ├── test_dual_net.py
    │   │       │   ├── test_features.py
    │   │       │   ├── test_flags
    │   │       │   ├── test_go.py
    │   │       │   ├── test_mask_flags.py
    │   │       │   ├── test_mcts.py
    │   │       │   ├── test_preprocessing.py
    │   │       │   ├── test_sgf_wrapper.py
    │   │       │   ├── test_shipname.py
    │   │       │   ├── test_strategies.py
    │   │       │   ├── test_symmetries.py
    │   │       │   └── test_utils.py
    │   │       ├── train.py
    │   │       ├── tsconfig.json
    │   │       ├── utils.py
    │   │       └── validate.py
    │   │   ├── run.sh
    │   │   └── run_and_time.sh
    ├── mixtral8x22b
    │   ├── README.md
    │   ├── clm_datasets.py
    │   ├── config
    │   │   ├── config.yaml
    │   │   ├── dataset
    │   │   │   ├── c4_mlperf.yaml
    │   │   │   └── wikitext.yaml
    │   │   ├── experiment
    │   │   │   ├── convergence_template.yaml
    │   │   │   └── gbs256_tpu.yaml
    │   │   ├── model
    │   │   │   └── blank_model.yaml
    │   │   └── sched
    │   │   │   ├── CosineAnnealing.yaml
    │   │   │   └── WarmupHoldPolicy.yaml
    │   ├── docker
    │   │   ├── gpu
    │   │   │   ├── Dockerfile
    │   │   │   ├── Dockerfile.GCP
    │   │   │   ├── build_and_push_image.sh
    │   │   │   └── megatron_core.patch
    │   │   └── tpu
    │   │   │   ├── Dockerfile
    │   │   │   └── build_and_push_image.sh
    │   ├── download_dataset.py
    │   ├── file_utils.py
    │   ├── helm_context
    │   │   ├── Chart.yaml
    │   │   ├── selected-configuration.yaml
    │   │   ├── templates
    │   │   │   └── nemo-example.yaml
    │   │   └── values.yaml
    │   ├── mixtral80.json
    │   ├── mixtral822-instruct.json
    │   ├── mixtral822.json
    │   ├── mixtral87.json
    │   ├── mlperf_logging_utils.py
    │   ├── model_utils_gpu.py
    │   ├── model_utils_tpu.py
    │   ├── run_clm.py
    │   ├── scripts
    │   │   ├── gpu
    │   │   │   ├── checkpoint_download.py
    │   │   │   ├── dataset_preprocessing.py
    │   │   │   └── run.sub
    │   │   └── tpu
    │   │   │   └── distributed_checkpoint_saving.py
    │   └── trainer_utils_tpu.py
    ├── ncf
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── alias_generator.py
    │   ├── convert.py
    │   ├── ncf.py
    │   ├── negative_sampling_cpp
    │   │   ├── negative_sampling.cpp
    │   │   ├── setup.py
    │   │   └── test.py
    │   ├── neumf.py
    │   ├── requirements.txt
    │   ├── run_and_time.sh
    │   └── utils.py
    ├── never-adopted
    │   ├── sentiment_analysis
    │   │   ├── README.md
    │   │   ├── download.py
    │   │   ├── download_dataset.sh
    │   │   ├── paddle
    │   │   │   ├── run_and_time.sh
    │   │   │   └── train.py
    │   │   ├── verify.py
    │   │   └── verify_dataset.sh
    │   └── speech_recognition
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── data
    │   │       ├── .gitignore
    │   │       ├── __init__.py
    │   │       ├── bucketing_sampler.py
    │   │       ├── data-LibriSpeech-ref-cksum.out
    │   │       ├── data_loader.py
    │   │       ├── librispeech.py
    │   │       ├── merge_manifests.py
    │   │       └── utils.py
    │   │   ├── download_dataset.sh
    │   │   ├── labels.json
    │   │   ├── pytorch
    │   │       ├── .gitignore
    │   │       ├── decoder.py
    │   │       ├── docker
    │   │       │   ├── Dockerfile.gpu
    │   │       │   ├── base.gpu
    │   │       │   ├── build-docker.sh
    │   │       │   └── run-dev.sh
    │   │       ├── eval_model.py
    │   │       ├── model.py
    │   │       ├── params.py
    │   │       ├── run_and_time.sh
    │   │       └── train.py
    │   │   └── verify_dataset.sh
    ├── resnet-tf1
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── log_stitch.py
    │   ├── official
    │   │   ├── .gitignore
    │   │   ├── Dockerfile.cpu
    │   │   ├── Dockerfile.gpu
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── benchmark
    │   │   │   └── datastore
    │   │   │   │   └── schema
    │   │   │   │       ├── benchmark_metric.json
    │   │   │   │       └── benchmark_run.json
    │   │   ├── requirements.txt
    │   │   ├── resnet
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── imagenet_main.py
    │   │   │   ├── imagenet_preprocessing.py
    │   │   │   ├── imagenet_test.py
    │   │   │   ├── layer_test.py
    │   │   │   ├── resnet_model.py
    │   │   │   └── resnet_run_loop.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── arg_parsers
    │   │   │       ├── __init__.py
    │   │   │       ├── parsers.py
    │   │   │       └── parsers_test.py
    │   │   │   ├── export
    │   │   │       ├── __init__.py
    │   │   │       ├── export.py
    │   │   │       └── export_test.py
    │   │   │   ├── logs
    │   │   │       ├── __init__.py
    │   │   │       ├── benchmark_uploader.py
    │   │   │       ├── hooks.py
    │   │   │       ├── hooks_helper.py
    │   │   │       ├── hooks_helper_test.py
    │   │   │       ├── hooks_test.py
    │   │   │       ├── logger.py
    │   │   │       ├── logger_test.py
    │   │   │       ├── metric_hook.py
    │   │   │       └── metric_hook_test.py
    │   │   │   ├── misc
    │   │   │       ├── __init__.py
    │   │   │       ├── model_helpers.py
    │   │   │       └── model_helpers_test.py
    │   │   │   └── testing
    │   │   │       ├── __init__.py
    │   │   │       ├── integration.py
    │   │   │       ├── pylint.rcfile
    │   │   │       ├── reference_data.py
    │   │   │       ├── reference_data
    │   │   │           ├── reference_data_test
    │   │   │           │   ├── dense
    │   │   │           │   │   ├── expected_graph
    │   │   │           │   │   ├── model.ckpt.data-00000-of-00001
    │   │   │           │   │   ├── model.ckpt.index
    │   │   │           │   │   ├── results.json
    │   │   │           │   │   └── tf_version.json
    │   │   │           │   └── uniform_random
    │   │   │           │   │   ├── expected_graph
    │   │   │           │   │   ├── model.ckpt.data-00000-of-00001
    │   │   │           │   │   ├── model.ckpt.index
    │   │   │           │   │   ├── results.json
    │   │   │           │   │   └── tf_version.json
    │   │   │           └── resnet
    │   │   │           │   ├── batch-size-32_bottleneck_projection_version-1_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_bottleneck_projection_version-2_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_bottleneck_version-1_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_bottleneck_version-2_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_building_projection_version-1_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_building_projection_version-2_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_building_version-1_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   ├── batch-size-32_building_version-2_width-8_channels-4
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │           │   └── batch_norm
    │   │   │           │       ├── expected_graph
    │   │   │           │       ├── model.ckpt.data-00000-of-00001
    │   │   │           │       ├── model.ckpt.index
    │   │   │           │       ├── results.json
    │   │   │           │       └── tf_version.json
    │   │   │       ├── reference_data_test.py
    │   │   │       └── scripts
    │   │   │           └── presubmit.sh
    │   ├── official_diff.txt
    │   ├── preprocess.sh
    │   ├── requirements.txt
    │   ├── run.sh
    │   └── run_and_time.sh
    ├── resnet-tf2
    │   ├── README.md
    │   ├── README_old.md
    │   ├── download_dataset.sh
    │   ├── tensorflow2
    │   │   ├── common.py
    │   │   ├── imagenet_preprocessing.py
    │   │   ├── lars_optimizer.py
    │   │   ├── lars_util.py
    │   │   ├── resnet_ctl_imagenet_main.py
    │   │   ├── resnet_model.py
    │   │   ├── resnet_runnable.py
    │   │   └── tf2_common
    │   │   │   ├── modeling
    │   │   │       └── performance.py
    │   │   │   ├── training
    │   │   │       ├── controller.py
    │   │   │       ├── optimizer_v2modified.py
    │   │   │       ├── runnable.py
    │   │   │       ├── standard_runnable.py
    │   │   │       └── utils.py
    │   │   │   └── utils
    │   │   │       ├── flags
    │   │   │           ├── __init__.py
    │   │   │           ├── _base.py
    │   │   │           ├── _benchmark.py
    │   │   │           ├── _conventions.py
    │   │   │           ├── _device.py
    │   │   │           ├── _distribution.py
    │   │   │           ├── _misc.py
    │   │   │           ├── _performance.py
    │   │   │           └── core.py
    │   │   │       ├── logs
    │   │   │           ├── __init__.py
    │   │   │           ├── cloud_lib.py
    │   │   │           ├── hooks.py
    │   │   │           ├── hooks_helper.py
    │   │   │           ├── logger.py
    │   │   │           └── metric_hook.py
    │   │   │       ├── misc
    │   │   │           ├── distribution_utils.py
    │   │   │           ├── keras_utils.py
    │   │   │           ├── model_helpers.py
    │   │   │           └── tpu_lib.py
    │   │   │       └── mlp_log
    │   │   │           ├── __init__.py
    │   │   │           └── mlp_log.py
    │   └── verify_dataset.sh
    ├── rnnt
    │   └── pytorch
    │   │   ├── .dockerignore
    │   │   ├── Dockerfile
    │   │   ├── LICENSE
    │   │   ├── NOTICE
    │   │   ├── README.md
    │   │   ├── common
    │   │       ├── __init__.py
    │   │       ├── audio.py
    │   │       ├── data
    │   │       │   ├── __init__.py
    │   │       │   ├── dali
    │   │       │   │   ├── __init__.py
    │   │       │   │   ├── data_loader.py
    │   │       │   │   ├── iterator.py
    │   │       │   │   ├── pipeline.py
    │   │       │   │   └── sampler.py
    │   │       │   ├── dataset.py
    │   │       │   ├── features.py
    │   │       │   ├── helpers.py
    │   │       │   └── text.py
    │   │       ├── helpers.py
    │   │       ├── metrics.py
    │   │       ├── optimizers.py
    │   │       ├── rnn.py
    │   │       ├── sampler.py
    │   │       ├── tb_dllogger.py
    │   │       └── text
    │   │       │   ├── LICENSE
    │   │       │   ├── __init__.py
    │   │       │   ├── cleaners.py
    │   │       │   ├── numbers.py
    │   │       │   └── symbols.py
    │   │   ├── configs
    │   │       └── baseline_v3-1023sp.yaml
    │   │   ├── docker-compose.yaml
    │   │   ├── eval_model.py
    │   │   ├── inference.py
    │   │   ├── mlperf
    │   │       ├── __init__.py
    │   │       └── logging.py
    │   │   ├── requirements.txt
    │   │   ├── rnnt
    │   │       ├── config.py
    │   │       ├── decoder.py
    │   │       ├── loss.py
    │   │       └── model.py
    │   │   ├── rnnt_layers.svg
    │   │   ├── run_and_time.sh
    │   │   ├── scripts
    │   │       ├── create_sentencepieces.sh
    │   │       ├── docker
    │   │       │   ├── build.sh
    │   │       │   └── launch.sh
    │   │       ├── download_librispeech.sh
    │   │       ├── inference.sh
    │   │       ├── inference_benchmark.sh
    │   │       ├── preprocess_librispeech.sh
    │   │       ├── train.sh
    │   │       ├── train_bench.sh
    │   │       ├── train_debug.sh
    │   │       └── train_refactor.sh
    │   │   ├── tests
    │   │       ├── Dockerfile
    │   │       ├── requirements.txt
    │   │       └── rnnt
    │   │       │   └── dataset
    │   │       │       └── test_rnnt_wordpiece_tokenizer.py
    │   │   ├── train.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── convert_librispeech.py
    │   │       ├── download_librispeech.py
    │   │       ├── download_utils.py
    │   │       ├── inference_librispeech.csv
    │   │       ├── librispeech.csv
    │   │       └── preprocessing_utils.py
    ├── ssd-v1
    │   ├── Dockerfile
    │   ├── download_dataset.sh
    │   ├── download_resnet34_backbone.sh
    │   ├── pth_to_pickle.py
    │   ├── requirements.txt
    │   └── ssd
    │   │   ├── README.md
    │   │   ├── base_model.py
    │   │   ├── bind_launch.py
    │   │   ├── coco.py
    │   │   ├── config_DGX1_32.sh
    │   │   ├── config_DGX1_multinode.sh
    │   │   ├── config_DGX1_singlenode.sh
    │   │   ├── distributed.py
    │   │   ├── eval.py
    │   │   ├── mlperf_logger.py
    │   │   ├── run.sub
    │   │   ├── run_and_time.sh
    │   │   ├── ssd300.py
    │   │   ├── train.py
    │   │   └── utils.py
    ├── transformer
    │   ├── README.md
    │   ├── data_download.py
    │   ├── download_data.sh
    │   ├── tensorflow
    │   │   ├── Dockerfile
    │   │   ├── bert
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── create_pretraining_data.py
    │   │   │   ├── extract_features.py
    │   │   │   ├── lamb_optimizer_v1.py
    │   │   │   ├── modeling.py
    │   │   │   ├── modeling_test.py
    │   │   │   ├── optimization.py
    │   │   │   ├── optimization_test.py
    │   │   │   ├── run_classifier.py
    │   │   │   ├── run_pretraining.py
    │   │   │   ├── run_squad.py
    │   │   │   ├── tokenization.py
    │   │   │   └── tokenization_test.py
    │   │   ├── process_data.py
    │   │   ├── requirements.txt
    │   │   ├── run_and_time.sh
    │   │   ├── run_preprocessing.sh
    │   │   ├── run_training.sh
    │   │   ├── transformer
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── compute_bleu.py
    │   │   │   ├── compute_bleu_test.py
    │   │   │   ├── data_download.py
    │   │   │   ├── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attention_layer.py
    │   │   │   │   ├── beam_search.py
    │   │   │   │   ├── beam_search_test.py
    │   │   │   │   ├── embedding_layer.py
    │   │   │   │   ├── ffn_layer.py
    │   │   │   │   ├── model_params.py
    │   │   │   │   ├── model_utils.py
    │   │   │   │   ├── model_utils_test.py
    │   │   │   │   └── transformer.py
    │   │   │   ├── transformer_main.py
    │   │   │   ├── translate.py
    │   │   │   ├── utils
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── dataset.py
    │   │   │   │   ├── metrics.py
    │   │   │   │   ├── tokenizer.py
    │   │   │   │   └── tokenizer_test.py
    │   │   │   └── vocab
    │   │   │   │   └── vocab.translate_ende_wmt32k.32768.subwords
    │   │   └── transformer_diff.txt
    │   └── verify_dataset.sh
    └── unet3d
    │   └── pytorch
    │       ├── Dockerfile
    │       ├── LICENCE
    │       ├── README.md
    │       ├── checksum.json
    │       ├── data_loading
    │           ├── data_loader.py
    │           └── pytorch_loader.py
    │       ├── evaluation_cases.txt
    │       ├── main.py
    │       ├── model
    │           ├── layers.py
    │           ├── losses.py
    │           └── unet3d.py
    │       ├── oldREADME.md
    │       ├── preprocess_dataset.py
    │       ├── requirements.txt
    │       ├── run_and_time.sh
    │       └── runtime
    │           ├── arguments.py
    │           ├── callbacks.py
    │           ├── distributed_utils.py
    │           ├── inference.py
    │           ├── logging.py
    │           └── training.py
├── single_stage_detector
    ├── .dockerignore
    ├── Dockerfile
    ├── README.md
    ├── mlcube
    │   ├── README.md
    │   └── mlcube.yaml
    ├── requirements.txt
    ├── scripts
    │   ├── backbone_to_onnx.py
    │   ├── download_backbone.sh
    │   ├── download_coco2017.sh
    │   ├── download_openimages_demo.sh
    │   ├── download_openimages_full.sh
    │   ├── download_openimages_mlperf.sh
    │   ├── fiftyone_openimages.py
    │   ├── pth_to_onnx.py
    │   └── pth_to_pickle.py
    └── ssd
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── bind.sh
    │   ├── check_logs.sh
    │   ├── coco_eval.py
    │   ├── coco_utils.py
    │   ├── config_DGXA100_001x08x032.sh
    │   ├── config_DGXA100_002x08x016.sh
    │   ├── config_DGXA100_008x08x004_inference_benchmark.sh
    │   ├── config_DGXA100_008x08x008.sh
    │   ├── config_DGXA100_032x08x032.sh
    │   ├── engine.py
    │   ├── model
    │       ├── __init__.py
    │       ├── anchor_utils.py
    │       ├── backbone_utils.py
    │       ├── boxes.py
    │       ├── feature_pyramid_network.py
    │       ├── focal_loss.py
    │       ├── image_list.py
    │       ├── resnet.py
    │       ├── retinanet.py
    │       ├── roi_heads.py
    │       ├── transform.py
    │       └── utils.py
    │   ├── presets.py
    │   ├── run.sub
    │   ├── run_and_time.sh
    │   ├── run_demo.sh
    │   ├── ssd_logger.py
    │   ├── train.py
    │   ├── transforms.py
    │   └── utils.py
└── stable_diffusion
    ├── .dockerignore
    ├── .gitignore
    ├── Dockerfile
    ├── LICENSE
    ├── README.md
    ├── configs
        ├── train_01x08x08.yaml
        ├── train_32x08x02.yaml
        ├── train_32x08x02_raw_images.yaml
        ├── train_32x08x04.yaml
        └── train_32x08x08.yaml
    ├── imgs
        └── overview.png
    ├── ldm
        ├── data
        │   ├── __init__.py
        │   ├── base.py
        │   ├── composable_data_module.py
        │   ├── tsv.py
        │   ├── utils.py
        │   └── webdatasets.py
        ├── lr_scheduler.py
        ├── models
        │   ├── autoencoder.py
        │   ├── clip_encoder.py
        │   └── diffusion
        │   │   ├── __init__.py
        │   │   ├── ddim.py
        │   │   ├── ddpm.py
        │   │   ├── dpm_solver
        │   │       ├── __init__.py
        │   │       ├── dpm_solver.py
        │   │       └── sampler.py
        │   │   ├── plms.py
        │   │   └── sampling_util.py
        ├── modules
        │   ├── attention.py
        │   ├── diffusionmodules
        │   │   ├── __init__.py
        │   │   ├── model.py
        │   │   ├── openaimodel.py
        │   │   ├── upscaling.py
        │   │   └── util.py
        │   ├── distributions
        │   │   ├── __init__.py
        │   │   └── distributions.py
        │   ├── ema.py
        │   ├── encoders
        │   │   ├── __init__.py
        │   │   └── modules.py
        │   └── fid
        │   │   ├── README.md
        │   │   ├── fid_score.py
        │   │   └── inception.py
        └── util.py
    ├── main.py
    ├── mlperf_logging_utils.py
    ├── requirements.txt
    ├── run_and_time.sh
    ├── scripts
        ├── checkpoints
        │   ├── download_clip.sh
        │   ├── download_inception.sh
        │   └── download_sd.sh
        ├── datasets
        │   ├── coco-2014-validation-download.sh
        │   ├── coco-2014-validation-split-resize.sh
        │   ├── coco-split-resize.py
        │   ├── coco2014-validation-download-prompts.sh
        │   ├── coco2014-validation-download-stats.sh
        │   ├── filter-metadata.py
        │   ├── generate-fid-statistics.sh
        │   ├── laion400m-convert-images-to-moments.sh
        │   ├── laion400m-download-dataset.sh
        │   ├── laion400m-download-metadata.sh
        │   ├── laion400m-filter-metadata.sh
        │   ├── laion400m-filtered-download-images.sh
        │   └── laion400m-filtered-download-moments.sh
        ├── docker
        │   ├── build.sh
        │   └── launch.sh
        └── slurm
        │   ├── sbatch.sh
        │   └── srun.sh
    └── webdataset_images2latents.py


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # These owners will be the default owners for everything in the repo.
2 | # Unless a later match takes precedence,they will be requested for review when someone opens a pull request.
3 | * @mlcommons/wg-training
4 | 
5 | /CODEOWNERS @mlcommons/staff
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | single_stage_detector/mlcube/workspace/*
6 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "recommendation/dlrm"]
2 | 	path = retired_benchmarks/dlrm/dlrm
3 | 	url = https://github.com/facebookresearch/dlrm.git
4 |         branch = mlperf
5 | 


--------------------------------------------------------------------------------
/graph_neural_network/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:1.13.0-cuda11.6-cudnn8-devel
 2 | 
 3 | WORKDIR /workspace/repository
 4 | 
 5 | RUN pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu117
 6 | RUN pip install scikit-learn==0.24.2
 7 | RUN pip install torch_geometric==2.4.0
 8 | RUN pip install --no-index  torch_scatter==2.1.1 torch_sparse==0.6.17 -f https://data.pyg.org/whl/torch-1.13.0+cu117.html
 9 | RUN pip install graphlearn-torch==0.2.2
10 | 
11 | RUN apt update
12 | RUN apt install -y git
13 | RUN pip install git+https://github.com/mlcommons/logging.git
14 | 
15 | # TF32 instead of FP32 for faster compute
16 | ENV NVIDIA_TF32_OVERRIDE=1
17 | 
18 | COPY ..
19 | WORKDIR /workspace/repository
20 | 


--------------------------------------------------------------------------------
/graph_neural_network/Dockerfile.h100:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:22.12-py3
 2 | 
 3 | WORKDIR /workspace/repository
 4 | 
 5 | RUN pip install scikit-learn==0.24.2
 6 | RUN pip install torch_geometric==2.4.0
 7 | RUN pip install torch_scatter==2.1.1 torch_sparse==0.6.17
 8 | RUN pip install graphlearn-torch==0.2.2
 9 | 
10 | RUN apt update
11 | RUN apt install -y git
12 | RUN pip install git+https://github.com/mlcommons/logging.git
13 | 
14 | # TF32 instead of FP32 for faster compute
15 | ENV NVIDIA_TF32_OVERRIDE=1
16 | 
17 | COPY . .
18 | WORKDIR /workspace/repository
19 | 
20 | RUN git clone https://github.com/alibaba/graphlearn-for-pytorch.git
21 | WORKDIR /workspace/repository/graphlearn-for-pytorch
22 | RUN git checkout 910cb55
23 | RUN git submodule update --init
24 | 


--------------------------------------------------------------------------------
/graph_neural_network/utilities.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | def create_ckpt_folder(base_dir, prefix="ckpt"):
 5 |     timestamp = time.strftime("%Y%m%d-%H%M%S")
 6 |     folder_name = f"{prefix}_{timestamp}" if prefix else timestamp
 7 |     full_path = os.path.join(base_dir, folder_name)
 8 |     if not os.path.exists(full_path):
 9 |         os.makedirs(full_path)
10 |     return full_path
11 | 
12 | 


--------------------------------------------------------------------------------
/language_model/tensorflow/bert/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/large_language_model_pretraining/nemo/utils/launch_nemo_convert.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -N 1
 3 | #SBATCH --gpus-per-node 1
 4 | #SBATCH -t 02:00:00
 5 | #SBATCH --mem=0
 6 | 
 7 | set -e
 8 | 
 9 | : "${CONT_IMAGE_URL:?CONT_IMAGE_URL not set}"
10 | : "${SRC_PATH:?SRC_PATH not set}"
11 | : "${DST_PATH:?DST_PATH not set}"
12 | 
13 | working_dir=$(dirname -- ${BASH_SOURCE[0]})
14 | 
15 | if [ ! -d $DST_PATH ]; then
16 |     mkdir -p $DST_PATH
17 | fi
18 | 
19 | container_maps="${SRC_PATH}:/source,${DST_PATH}:/destination,${working_dir}:/workspace/utils"
20 | 
21 | srun --nodes=1 --ntasks-per-node=1 \
22 | --container-image=$CONT_IMAGE_URL --container-mounts $container_maps --no-container-entrypoint \
23 | python3 /workspace/utils/convert.py --source /source --destination /destination
24 | 


--------------------------------------------------------------------------------
/large_language_model_pretraining/nemo/utils/nemo_convert.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import argparse
 3 |     from nemo.collections.llm.gpt.model.llama import HFLlamaImporter
 4 |     parser = argparse.ArgumentParser()
 5 |     parser.add_argument("--source", default="/source", type=str)
 6 |     parser.add_argument("--destination", default="/destination", type=str)
 7 |     args = parser.parse_args()
 8 |     
 9 |     importer = HFLlamaImporter(args.source)
10 |     importer.apply(args.destination)
11 | 


--------------------------------------------------------------------------------
/llama2_70b_lora/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:24.01-py3
2 | FROM ${FROM_IMAGE_NAME}
3 | 
4 | WORKDIR /workspace/ft-llm
5 | ADD . /workspace/ft-llm
6 | 
7 | RUN pip install -r requirements.txt
8 | RUN pip install flash-attn==2.4.1 --no-build-isolation
9 | 


--------------------------------------------------------------------------------
/llama2_70b_lora/configs/default_config.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | deepspeed_config:
 4 |   gradient_clipping: 0.3
 5 |   gradient_accumulation_steps: 1
 6 |   offload_optimizer_device: none
 7 |   offload_param_device: none
 8 |   zero3_init_flag: true
 9 |   zero3_save_16bit_model: true
10 |   zero_stage: 3
11 | distributed_type: DEEPSPEED
12 | downcast_bf16: 'no'
13 | machine_rank: 0
14 | main_training_function: main
15 | mixed_precision: bf16
16 | num_machines: 1
17 | num_processes: 8
18 | rdzv_backend: static
19 | same_network: true
20 | tpu_env: []
21 | tpu_use_cluster: false
22 | tpu_use_sudo: false
23 | use_cpu: false
24 | 


--------------------------------------------------------------------------------
/llama2_70b_lora/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/mlcommons/logging.git
2 | transformers==4.38.1
3 | accelerate==0.27.2
4 | peft==0.8.2
5 | datasets==2.17.1
6 | deepspeed==0.13.2


--------------------------------------------------------------------------------
/llama2_70b_lora/run_docker.sh:
--------------------------------------------------------------------------------
1 | docker pull nvcr.io/nvidia/pytorch:23.09-py3
2 | docker run -v path_to_my_folder:/root/workspace --workdir /root/workspace --gpus all -it --rm --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/pytorch:23.09-py3
3 | 


--------------------------------------------------------------------------------
/llama2_70b_lora/run_llama_70B_scrolls_r16.sh:
--------------------------------------------------------------------------------
 1 | accelerate launch --config_file configs/default_config.yaml scripts/train.py \
 2 | --dataset_path "./dataset" \
 3 | --model_path "./models/llama-v2-fused-qkv" \
 4 | --max_seq_len 8192 \
 5 | --bf16 True \
 6 | --logging_steps 24 \
 7 | --eval_steps 48 \
 8 | --output_dir "./results/llama-70b_scrolls_gov_report_r16_$1" \
 9 | --per_device_train_batch_size 1 \
10 | --gradient_accumulation_steps 1 \
11 | --lr_scheduler_type "cosine" \
12 | --learning_rate 4e-4 \
13 | --weight_decay 0.0001 \
14 | --warmup_ratio 0 \
15 | --max_grad_norm 0.3 \
16 | --use_gradient_checkpointing True \
17 | --target_eval_loss 0.925 \
18 | --use_peft_lora True \
19 | --lora_r 16 \
20 | --lora_alpha 32 \
21 | --lora_dropout 0.1 \
22 | --max_steps 1024 \
23 | --use_flash_attn \
24 | --seed "$1" \
25 | --lora_target_modules "qkv_proj,o_proj"
26 | 


--------------------------------------------------------------------------------
/recommendation_v2/torchrec_dlrm/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FROM_IMAGE_NAME=pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime
 2 | FROM ${FROM_IMAGE_NAME}
 3 | 
 4 | RUN apt-get -y update && \
 5 |     apt-get -y install git
 6 | 
 7 | WORKDIR /workspace/torchrec_dlrm
 8 | COPY . .
 9 | 
10 | RUN pip install --no-cache-dir -r requirements.txt
11 | 


--------------------------------------------------------------------------------
/recommendation_v2/torchrec_dlrm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/recommendation_v2/torchrec_dlrm/__init__.py


--------------------------------------------------------------------------------
/recommendation_v2/torchrec_dlrm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/recommendation_v2/torchrec_dlrm/data/__init__.py


--------------------------------------------------------------------------------
/recommendation_v2/torchrec_dlrm/mlperf_logging_utils.py:
--------------------------------------------------------------------------------
 1 | from mlperf_logging.mllog import constants
 2 | from mlperf_logging.mllog.mllog import MLLogger
 3 | 
 4 | 
 5 | def submission_info(mllogger: MLLogger, benchmark_name: str, submitter_name: str):
 6 |     """Logs required for a valid MLPerf submission."""
 7 |     mllogger.event(
 8 |         key=constants.SUBMISSION_BENCHMARK,
 9 |         value=benchmark_name,
10 |     )
11 |     mllogger.event(
12 |         key=constants.SUBMISSION_ORG,
13 |         value=submitter_name,
14 |     )
15 |     mllogger.event(
16 |         key=constants.SUBMISSION_DIVISION,
17 |         value=constants.CLOSED,
18 |     )
19 |     mllogger.event(
20 |         key=constants.SUBMISSION_STATUS,
21 |         value=constants.ONPREM,
22 |     )
23 |     mllogger.event(
24 |         key=constants.SUBMISSION_PLATFORM,
25 |         value=submitter_name,
26 |     )
27 | 


--------------------------------------------------------------------------------
/recommendation_v2/torchrec_dlrm/requirements.txt:
--------------------------------------------------------------------------------
1 | fbgemm-gpu==0.3.2
2 | git+https://github.com/mlperf/logging.git
3 | torchmetrics==0.11.0
4 | torchrec==0.3.2
5 | 


--------------------------------------------------------------------------------
/reference_results.md:
--------------------------------------------------------------------------------
 1 | The following table shows reference results, to be used to normalized benchmark results. 
 2 | **These results are NOT for optimized code and do NOT measure framework or hardware performance.**
 3 | Individual seed results are provided only to show variance and should not be used for normalization.
 4 | 
 5 | Benchmark|Reference Result|Seed 1|Seed 2|Seed 3|Seed 4|Seed 5
 6 | ---|---|---|---|---|---|---
 7 | Image classification|529877|530571|529438|530373|523480|529877
 8 | Object detection|299971|298906|299971|300471|298915|327101
 9 | Translation|112187|111790|112187|111760|112879|149175
10 | Speech recognition|412417|405780|532790|450667|412417|344806
11 | Recommendation|2803|2823|2802|2803|2912|2541
12 | Sentiment analysis|214|287|207|210|214|324
13 | Reinforcement learning|263322|279252|277831|223113|260951|263322
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/dlrm/download_dataset.sh:
--------------------------------------------------------------------------------
 1 | function download_20m {
 2 | 	echo "Download ml-20m"
 3 | 	curl -O http://files.grouplens.org/datasets/movielens/ml-20m.zip
 4 | }
 5 | 
 6 | function download_1m {
 7 | 	echo "Downloading ml-1m"
 8 | 	curl -O http://files.grouplens.org/datasets/movielens/ml-1m.zip
 9 | }
10 | 
11 | if [[ $1 == "ml-1m" ]]
12 | then
13 | 	download_1m
14 | else
15 | 	download_20m
16 | fi
17 | 


--------------------------------------------------------------------------------
/retired_benchmarks/dlrm/verify_dataset.sh:
--------------------------------------------------------------------------------
 1 | function get_checker {
 2 |     if [[ "$OSTYPE" == "darwin"* ]]; then
 3 |         checkmd5=md5
 4 |     else
 5 |         checkmd5=md5sum
 6 |     fi
 7 | 
 8 |     echo $checkmd5
 9 | }
10 | 
11 | 
12 | function verify_1m {
13 |     # From: curl -O http://files.grouplens.org/datasets/movielens/ml-1m.zip.md5
14 |     hash=<(echo "MD5 (ml-1m.zip) = c4d9eecfca2ab87c1945afe126590906")
15 |     local checkmd5=$(get_checker)
16 |     if diff <($checkmd5 ml-1m.zip) $hash &> /dev/null
17 |     then
18 |         echo "PASSED"
19 |     else
20 |         echo "FAILED"
21 |     fi
22 | }
23 | 
24 | function verify_20m {
25 |     # From: curl -O http://files.grouplens.org/datasets/movielens/ml-20m.zip.md5
26 |     hash=<(echo "MD5 (ml-20m.zip) = cd245b17a1ae2cc31bb14903e1204af3")
27 |     local checkmd5=$(get_checker)
28 | 
29 |     if diff <($checkmd5 ml-20m.zip) $hash &> /dev/null
30 |     then
31 |         echo "PASSED"
32 |     else
33 |         echo "FAILED"
34 |     fi
35 | 
36 | }
37 | 
38 | 
39 | if [[ $1 == "ml-1m" ]]
40 | then
41 |     verify_1m
42 | else
43 |     verify_20m
44 | fi
45 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | tags
3 | /data
4 | /results
5 | *.log
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | tags
3 | /data
4 | /results
5 | *.log
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/pytorch/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:1.0.1-cuda10.0-cudnn7-runtime
 2 | 
 3 | ENV LANG C.UTF-8
 4 | ENV LC_ALL C.UTF-8
 5 | 
 6 | ADD . /workspace/pytorch
 7 | WORKDIR /workspace/pytorch
 8 | 
 9 | RUN pip install -r requirements.txt
10 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | sacrebleu==1.2.10
2 | git+git://github.com/NVIDIA/apex.git@9041a868a1a253172d94b113a963375b9badd030#egg=apex
3 | mlperf-compliance==0.0.10
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/pytorch/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | DATASET_DIR='/data'
 6 | 
 7 | SEED=${1:-"1"}
 8 | TARGET=${2:-"24.00"}
 9 | 
10 | # run training
11 | python3 train.py \
12 |   --dataset-dir ${DATASET_DIR} \
13 |   --seed $SEED \
14 |   --target-bleu $TARGET
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/pytorch/run_and_time.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # runs benchmark and reports time to convergence
 4 | # to use the script:
 5 | #   run_and_time.sh
 6 | 
 7 | set -e
 8 | 
 9 | # start timing
10 | start=$(date +%s)
11 | start_fmt=$(date +%Y-%m-%d\ %r)
12 | echo "STARTING TIMING RUN AT $start_fmt"
13 | 
14 | # run benchmark
15 | seed=${1:-"1"}
16 | target=24.00
17 | 
18 | echo "running benchmark"
19 | ./run.sh $seed $target
20 | 
21 | sleep 3
22 | ret_code=$?; if [[ $ret_code != 0 ]]; then exit $ret_code; fi
23 | 
24 | # end timing
25 | end=$(date +%s)
26 | end_fmt=$(date +%Y-%m-%d\ %r)
27 | echo "ENDING TIMING RUN AT $end_fmt"
28 | 
29 | # report result
30 | result=$(( $end - $start ))
31 | result_name="RNN_TRANSLATOR"
32 | 
33 | echo "RESULT,$result_name,$seed,$result,$USER,$start_fmt"
34 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/pytorch/scripts/docker/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker build . --rm -t gnmt:latest
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gnmt/pytorch/scripts/docker/interactive.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nvidia-docker run -it --rm --ipc=host -v $PWD:/workspace/gnmt/ gnmt bash
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/.gitignore:
--------------------------------------------------------------------------------
1 | megatron/__pycache__/
2 | megatron/data/__pycache__/
3 | megatron/model/__pycache__/
4 | megatron/mpu/__pycache__/
5 | megatron/optimizer/__pycache__/
6 | megatron/tokenizer/__pycache__/
7 | megatron/fused_kernels/__pycache__/
8 | megatron/fused_kernels/build/
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:24.04-py3
2 | FROM ${FROM_IMAGE_NAME}
3 | 
4 | # Copy code
5 | WORKDIR /workspace/llm
6 | COPY . .
7 | RUN pip install -r requirements.txt
8 | ENV PYTHONPATH "/workspace/llm:${PYTHONPATH}"
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/checksums/additional_checkpoint_files/common.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/gpt3/megatron-lm/checksums/additional_checkpoint_files/common.pt


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/checksums/additional_checkpoint_files/metadata.json:
--------------------------------------------------------------------------------
1 | {"sharded_backend": "zarr"}
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/checksums/dataset_checksum.log:
--------------------------------------------------------------------------------
1 | 317a1c1b0b17fbd658e3e0b09d118ce9  c4_en_6_c4_spm_text_document.bin
2 | 5c8cfe37a26f919fb3998e84d1d07d8e  c4_en_6_c4_spm_text_document.idx
3 | 5a84af04d55765993ecb5461af56b718  c4_en_7_c4_spm_text_document.bin
4 | 35b23332069840094e1a75332cdeab62  c4_en_7_c4_spm_text_document.idx
5 | 20d868f6cb865ce616ce7b9cf8312be0  c4_en_validation_subset_c4_spm_text_document.bin
6 | f76050809d0b42611eeef31d67d04224  c4_en_validation_subset_c4_spm_text_document.idx


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/core/dist_checkpointing/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2 | 
3 | from .core import check_is_distributed_checkpoint
4 | from .mapping import ShardedTensor, LocalNonpersitentObject
5 | from .serialization import load, save, load_common_state_dict


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/core/dist_checkpointing/strategies/__init__.py:
--------------------------------------------------------------------------------
1 | """ Various loading and saving strategies """
2 | 
3 | try:
4 |     import zarr
5 |     import tensorstore
6 |     from .zarr import _import_trigger
7 | except ImportError:
8 |     print('Zarr strategies will not be registered because of missing packages')
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/core/dist_checkpointing/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/gpt3/megatron-lm/megatron/core/dist_checkpointing/tests/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/data/Makefile:
--------------------------------------------------------------------------------
 1 | CXXFLAGS += -O3 -Wall -shared -std=c++11 -fPIC -fdiagnostics-color
 2 | CPPFLAGS += $(shell python3 -m pybind11 --includes)
 3 | LIBNAME = helpers
 4 | LIBEXT = $(shell python3-config --extension-suffix)
 5 | 
 6 | default: $(LIBNAME)$(LIBEXT)
 7 | 
 8 | %$(LIBEXT): %.cpp
 9 | 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) $< -o $@
10 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import indexed_dataset
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/data/test/test_preprocess_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMPL=cached
 4 | python ../preprocess_data.py \
 5 |        --input test_samples.json \
 6 |        --vocab vocab.txt \
 7 |        --dataset-impl ${IMPL} \
 8 |        --output-prefix test_samples_${IMPL} \
 9 |        --workers 1 \
10 |        --log-interval 2
11 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/fused_kernels/compat.h:
--------------------------------------------------------------------------------
 1 | /* coding=utf-8
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /*This code is copied fron NVIDIA apex:
18 |  *     https://github.com/NVIDIA/apex
19 |  *     with minor changes. */
20 | 
21 | 
22 | 
23 | #ifndef TORCH_CHECK
24 | #define TORCH_CHECK AT_CHECK
25 | #endif
26 | 
27 | #ifdef VERSION_GE_1_3
28 | #define DATA_PTR data_ptr
29 | #else
30 | #define DATA_PTR data
31 | #endif
32 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/fused_kernels/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/gpt3/megatron-lm/megatron/fused_kernels/tests/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | #from .fused_layer_norm import MixedFusedLayerNorm as LayerNorm
17 | from .fused_layer_norm import MixedFusedLayerNorm1P as LayerNorm
18 | 
19 | from .distributed import DistributedDataParallel
20 | from .gpt_model import GPTModel
21 | from .language_model import get_language_model
22 | from .module import Float16Module
23 | from .enums import ModelType
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/model/enums.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import enum
17 | 
18 | class ModelType(enum.Enum):
19 |     encoder_or_decoder = 1
20 |     encoder_and_decoder = 2
21 | 
22 | class LayerType(enum.Enum):
23 |     encoder = 1
24 |     decoder = 2
25 |  
26 | class AttnType(enum.Enum):
27 |     self_attn = 1
28 |     cross_attn = 2
29 | 
30 | class AttnMaskType(enum.Enum):
31 |     padding = 1
32 |     causal = 2
33 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/mpu/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/gpt3/megatron-lm/megatron/mpu/tests/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/text_generation/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .api import (
18 |     generate,
19 |     generate_and_post_process,
20 |     beam_search_and_post_process)
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/megatron/tokenizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from .tokenizer import build_tokenizer
18 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/mlcommons/logging.git@2.1.0-rc1
2 | git+https://github.com/NVIDIA/mlperf-common.git
3 | zarr==2.13
4 | tensorstore==0.1.27
5 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/scripts/common_bf16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "optimizer": {
 3 |     "optimizer": {
 4 |       "param_groups": [
 5 |         {
 6 |           "wd_mult": 1.0,
 7 |           "lr_mult": 1.0,
 8 |           "lr": 5.984178321979609e-05,
 9 |           "bias_correction": true,
10 |           "betas": [
11 |             0.9,
12 |             0.95
13 |           ],
14 |           "eps": 1e-08,
15 |           "weight_decay": 0.1,
16 |           "step": 4000
17 |         }
18 |       ]
19 |     }
20 |   }
21 | }


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/scripts/common_fp32.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "optimizer": {
 3 |     "param_groups": [
 4 |       {
 5 |         "wd_mult": 1.0,
 6 |         "lr_mult": 1.0,
 7 |         "lr": 5.984178321979609e-05,
 8 |         "bias_correction": true,
 9 |         "betas": [
10 |           0.9,
11 |           0.95
12 |         ],
13 |         "eps": 1e-08,
14 |         "weight_decay": 0.1,
15 |         "step": 4000
16 |       }
17 |     ]
18 |   }
19 | }


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/scripts/load_checkpoint.md:
--------------------------------------------------------------------------------
 1 | # Load checkpoint
 2 | 
 3 | This is an example script to load the checkpoint using PyTorch for LLM benchmark.
 4 | 
 5 | ## Requirement
 6 | 
 7 | Megatron
 8 | PyTorch
 9 | 
10 | ## Usage
11 | 
12 | Assuming that the checkpoint has been downloaded to `/data`, the following command 
13 | will load the state_dict for all model parallel units.
14 | 
15 | ```
16 | python3 scripts/load_checkpoint.py \
17 |     --input_path /data/iter_0000300 \
18 |     --tensor-model-parallel-size 8 \
19 |     --pipeline-model-parallel-size 8
20 | ```
21 | 
22 | Each pickle file is ~37GB and the data is loaded into a list of state_dicts for each model parallel unit.
23 | 
24 | The script has been tested using Python 3.8.12 and PyTorch 1.11.0
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/scripts/preprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -N 1
 3 | #SBATCH --exclusive
 4 | #SBATCH --dependency=singleton
 5 | #SBATCH --mem=0
 6 | #SBATCH --array=6-7%8
 7 | #SBATCH --requeue
 8 | 
 9 | C4_PATH=$1
10 | 
11 | srun --container-image nvcr.io/nvidia/pytorch:21.12-py3 \
12 |  --container-mounts ${C4_PATH}:${C4_PATH} \
13 |  bash -c \
14 |  " git clone https://github.com/NVIDIA/NeMo.git; \
15 |    cd NeMo && git checkout f3ad584b94170bc3ea197df29eb9ef9c96061730 && bash ./reinstall.sh; \
16 |    python /workspace/NeMo/scripts/nlp_language_modeling/preprocess_data_for_megatron.py \
17 |     --input ${C4_PATH}/en_merge/c4-train.en_\${SLURM_ARRAY_TASK_ID}.json.gz \
18 |     --tokenizer-library sentencepiece \
19 |     --tokenizer-model ${C4_PATH}/tokenizers/c4_spm/sentencepiece.model \
20 |     --output-prefix ${C4_PATH}/preprocessed_c4_spm/c4_en_\${SLURM_ARRAY_TASK_ID}_c4_spm_text_document \
21 |     --dataset-impl mmap \
22 |     --workers 128 "
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/megatron-lm/scripts/preprocess_val.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -N 1
 3 | #SBATCH --exclusive
 4 | #SBATCH --dependency=singleton
 5 | #SBATCH --mem=0
 6 | #SBATCH --requeue
 7 | 
 8 | C4_PATH=$1
 9 | VALIDATION_JSON_PATH=$2
10 | 
11 | srun --container-image nvcr.io/nvidia/pytorch:21.12-py3 \
12 |  --container-mounts ${C4_PATH}:${C4_PATH} \
13 |  bash -c \
14 |  " git clone https://github.com/NVIDIA/NeMo.git; \
15 |    cd NeMo && git checkout f3ad584b94170bc3ea197df29eb9ef9c96061730 && bash ./reinstall.sh; \
16 |    python /workspace/NeMo/scripts/nlp_language_modeling/preprocess_data_for_megatron.py \
17 |     --input ${VALIDATION_JSON_PATH} \
18 |     --tokenizer-library sentencepiece \
19 |     --tokenizer-model ${C4_PATH}/tokenizers/c4_spm/sentencepiece.model \
20 |     --output-prefix ${C4_PATH}/preprocessed_c4_spm/c4_en_validation_subset_c4_spm_text_document \
21 |     --dataset-impl mmap \
22 |     --workers 128 "
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/gpt3/paxml/utils/load_ts_ckpt.py:
--------------------------------------------------------------------------------
 1 | # Lint as: python3
 2 | """Script to load layer(s) of the LLM checkpoint using TensorStore.
 3 | More details about TensorStore, please visit 
 4 | https://github.com/google/tensorstore .
 5 | """
 6 | 
 7 | import argparse
 8 | import tensorstore as ts
 9 | 
10 | parser = argparse.ArgumentParser(description='Checkpoint loading for LLM.')
11 | parser.add_argument(
12 |     '--input_path',
13 |     type=str,
14 |     default='',
15 |     help='Input directory for layer(s) of the saved checkpoint.')
16 | args = parser.parse_args()
17 | 
18 | if __name__ == '__main__':
19 |   input_path = args.input_path
20 |   spec = {'driver': 'zarr', 'kvstore': {}}
21 |   spec['kvstore'] = {
22 |       'driver': 'file',
23 |       'path': input_path,
24 |   }
25 |   t = ts.open(ts.Spec(spec), open=True).result()
26 |   t_v = t.read().result()
27 | 
28 |   print("path = ", input_path,
29 |         ", type = ", type(t_v),
30 |         ", shape = ", t_v.shape)
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/download_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Get COCO 2017 data sets
 4 | mkdir -p pytorch/datasets/coco
 5 | pushd pytorch/datasets/coco
 6 | 
 7 | curl -O https://dl.fbaipublicfiles.com/detectron/coco/coco_annotations_minival.tgz
 8 | tar xzf coco_annotations_minival.tgz
 9 | 
10 | curl -O http://images.cocodataset.org/zips/train2017.zip
11 | unzip train2017.zip
12 | 
13 | curl -O http://images.cocodataset.org/zips/val2017.zip
14 | unzip val2017.zip
15 | 
16 | curl -O http://images.cocodataset.org/annotations/annotations_trainval2017.zip
17 | unzip annotations_trainval2017.zip
18 | 
19 | # TBD: MD5 verification
20 | # $md5sum *.zip *.tgz
21 | #f4bbac642086de4f52a3fdda2de5fa2c  annotations_trainval2017.zip
22 | #cced6f7f71b7629ddf16f17bbcfab6b2  train2017.zip
23 | #442b8da7639aecaf257c1dceb8ba8c80  val2017.zip
24 | #2d2b9d2283adb5e3b8d25eec88e65064  coco_annotations_minival.tgz
25 | 
26 | popd
27 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/.flake8:
--------------------------------------------------------------------------------
1 | # This is an example .flake8 config, used when developing *Black* itself.
2 | # Keep in sync with setup.cfg which is used for source packages.
3 | 
4 | [flake8]
5 | ignore = E203, E266, E501, W503
6 | max-line-length = 80
7 | max-complexity = 18
8 | select = B,C,E,F,W,T4,B9
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680Feature Request"
 3 | about: Submit a proposal/request for a new Mask R-CNN Benchmark feature
 4 | 
 5 | ---
 6 | 
 7 | ## 🚀 Feature
 8 | <!-- A clear and concise description of the feature proposal -->
 9 | 
10 | ## Motivation
11 | 
12 | <!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
13 | 
14 | ## Pitch
15 | 
16 | <!-- A clear and concise description of what you want to happen. -->
17 | 
18 | ## Alternatives
19 | 
20 | <!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
21 | 
22 | ## Additional context
23 | 
24 | <!-- Add any other context or screenshots about the feature request here. -->
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/.github/ISSUE_TEMPLATE/questions-help-support.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "❓Questions/Help/Support"
3 | about: Do you need support?
4 | 
5 | ---
6 | 
7 | ## ❓ Questions and Help
8 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/.gitignore:
--------------------------------------------------------------------------------
 1 | # compilation and distribution
 2 | __pycache__
 3 | _ext
 4 | *.pyc
 5 | *.so
 6 | maskrcnn_benchmark.egg-info/
 7 | build/
 8 | dist/
 9 | 
10 | # pytorch/python/numpy formats
11 | *.pth
12 | *.pkl
13 | *.npy
14 | 
15 | # ipython/jupyter notebooks
16 | *.ipynb
17 | **/.ipynb_checkpoints/
18 | 
19 | # Editor temporaries
20 | *.swn
21 | *.swo
22 | *.swp
23 | *~
24 | 
25 | # Pycharm editor settings
26 | .idea
27 | 
28 | # project dirs
29 | /datasets
30 | /models
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/maskrcnn/pytorch/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/maskrcnn/pytorch/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .defaults import _C as cfg
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |   m.def("nms", &nms, "non-maximum suppression");
 9 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
14 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
15 | }
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .build import make_data_loader
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .coco import COCODataset
16 | from .voc import PascalVOCDataset
17 | from .concat_dataset import ConcatDataset
18 | 
19 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"]
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .distributed import DistributedSampler
16 | from .grouped_batch_sampler import GroupedBatchSampler
17 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
18 | 
19 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .transforms import Compose
16 | from .transforms import Resize
17 | from .transforms import RandomHorizontalFlip
18 | from .transforms import ToTensor
19 | from .transforms import Normalize
20 | 
21 | from .build import build_transforms
22 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | # from ._utils import _C
16 | from maskrcnn_benchmark import _C
17 | 
18 | nms = _C.nms
19 | # nms.__doc__ = """
20 | # This function performs Non-maximum suppresion"""
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .backbone import build_backbone
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .detectors import build_detection_model
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .generalized_rcnn import GeneralizedRCNN
16 | 
17 | 
18 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
19 | 
20 | 
21 | def build_detection_model(cfg):
22 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
23 |     return meta_arch(cfg)
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | 
16 | from maskrcnn_benchmark.utils.registry import Registry
17 | 
18 | BACKBONES = Registry()
19 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
20 | ROI_BOX_PREDICTOR = Registry()
21 | RPN_HEADS = Registry()
22 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | # from .rpn import build_rpn
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | from .build import make_optimizer
16 | from .build import make_lr_scheduler
17 | from .lr_scheduler import WarmupMultiStepLR
18 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/pytorch/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
15 | import errno
16 | import os
17 | 
18 | 
19 | def mkdir(path):
20 |     try:
21 |         os.makedirs(path)
22 |     except OSError as e:
23 |         if e.errno != errno.EEXIST:
24 |             raise
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/maskrcnn/run_and_time.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Runs benchmark and reports time to convergence
 4 | 
 5 | pushd pytorch
 6 | 
 7 | # Single GPU training
 8 | time python tools/train_mlperf.py --config-file "configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" \
 9 |        SOLVER.IMS_PER_BATCH 2 TEST.IMS_PER_BATCH 1 SOLVER.MAX_ITER 720000 SOLVER.STEPS "(480000, 640000)" SOLVER.BASE_LR 0.0025
10 |        
11 | popd
12 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 2 | WORKDIR /research
 3 | RUN apt-get update
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |     ca-certificates \
 6 |     build-essential \
 7 |     git \
 8 |     python \
 9 |     python-pip
10 | ENV HOME /research
11 | ENV PYENV_ROOT $HOME/.pyenv
12 | ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
13 | RUN apt-get install -y python-setuptools
14 | RUN apt-get install -y python-pip python3-pip virtualenv htop
15 | RUN pip3 install virtualenv
16 | RUN pip3 install virtualenvwrapper
17 | RUN pip3 install --upgrade numpy scipy sklearn tf-nightly-gpu
18 | #RUN pip3 install --upgrade numpy scipy sklearn tf-nightly-gpu
19 | # Mount data into the docker
20 | ADD . /research/reinforcement
21 | WORKDIR /research/reinforcement
22 | # RUN /bin/bash env_setup.sh
23 | 
24 | RUN pip3 install --upgrade pip
25 | RUN pip3 install --upgrade setuptools
26 | RUN pip3 install -r minigo/requirements.txt
27 | #RUN pip3 install "tensorflow-gpu>=1.5,<1.6"
28 | 
29 | ENTRYPOINT ["/bin/bash"]
30 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/.bazelrc:
--------------------------------------------------------------------------------
 1 | build --define=tf=1
 2 | test -c dbg
 3 | 
 4 | # Some of the Bazel rules used to precompile TensorFlow don't respect Bazel's
 5 | # "manual" tag. The following hack prevents Bazel from compiling TensorFlow
 6 | # from source when executing a command such as: bazel test cc/...
 7 | test //cc/... -- -//cc/tensorflow/...
 8 | 
 9 | # These .bazelrc files are generated by the cc/configure_tensorflow.sh script.
10 | try-import %workspace%/tf_configure.bazelrc
11 | try-import %workspace%/tensorflow.bazelrc
12 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/.gitignore:
--------------------------------------------------------------------------------
 1 | lib
 2 | lib64
 3 | bin
 4 | data
 5 | *__pycache__
 6 | pip-selfcheck.json
 7 | *.pyc
 8 | sgf
 9 | pyvenv.cfg
10 | .DS_store
11 | logs/
12 | 
13 | # Vim temp files
14 | *.swp
15 | *.swo
16 | *~
17 | 
18 | .mypy_cache
19 | 
20 | # Ignore any staging directory. We use this directory for docker-file creation.
21 | staging/
22 | 
23 | bazel-*
24 | cc/tensorflow/
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google
2 | 
3 | Cpp11BracedListStyle: true
4 | DerivePointerAlignment: false
5 | PointerAlignment: Left
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/CPPLINT.cfg:
--------------------------------------------------------------------------------
1 | # Stop cpplint complaining about including <thread>
2 | filter=-build/c++11
3 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/benchmark.BUILD:
--------------------------------------------------------------------------------
 1 | cc_library(
 2 |     name = "benchmark",
 3 |     srcs = glob(
 4 |         ["src/*.cc"],
 5 |         exclude = [
 6 |             "src/re_posix.cc",
 7 |             "src/gnuregex.cc",
 8 |         ],
 9 |     ),
10 |     hdrs = glob(
11 |         [
12 |             "src/*.h",
13 |             "include/benchmark/*.h",
14 |         ],
15 |         exclude = [
16 |             "src/re_posix.h",
17 |             "src/gnuregex.h",
18 |         ],
19 |     ),
20 |     copts = [
21 |         "-DHAVE_STD_REGEX",
22 |     ],
23 |     includes = [
24 |         "include",
25 |     ],
26 |     linkopts = ["-pthread"],
27 |     visibility = ["//visibility:public"],
28 | )
29 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_lite.minigo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_lite.minigo


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_model.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_model.pb


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_model.tflite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_model.tflite


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_model.uff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_model.uff


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_tf.minigo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/test_tf.minigo


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/dual_net/trt_dual_net.h:
--------------------------------------------------------------------------------
 1 | #ifndef CC_DUAL_NET_TRT_DUAL_NET_H_
 2 | #define CC_DUAL_NET_TRT_DUAL_NET_H_
 3 | 
 4 | #include "cc/dual_net/dual_net.h"
 5 | 
 6 | namespace minigo {
 7 | 
 8 | class TrtDualNetFactory : public DualNetFactory {
 9 |  public:
10 |   TrtDualNetFactory();
11 | 
12 |   int GetBufferCount() const override;
13 | 
14 |   std::unique_ptr<DualNet> NewDualNet(const std::string& model) override;
15 | 
16 |  private:
17 |   int device_count_;
18 | };
19 | 
20 | }  // namespace minigo
21 | 
22 | #endif  // CC_DUAL_NET_TRT_DUAL_NET_H_
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/group.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Google LLC
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "cc/group.h"
16 | 
17 | namespace minigo {
18 | 
19 | constexpr int Group::kMaxNumGroups;
20 | 
21 | }  // namespace minigo
22 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/init.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Google LLC
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef CC_INIT_H_
16 | #define CC_INIT_H_
17 | 
18 | namespace minigo {
19 | 
20 | // Initializes base libraries like gflags and symbolizer.
21 | // Call at the very top of main.
22 | void Init(int* pargc, char*** pargv);
23 | 
24 | }  // namespace minigo
25 | 
26 | #endif  // CC_INIT_H_
27 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/json.BUILD:
--------------------------------------------------------------------------------
1 | cc_library(
2 |     name = "json",
3 |     hdrs = ["single_include/nlohmann/json.hpp"],
4 |     includes = ["single_include"],
5 |     visibility = ["//visibility:public"],
6 | )
7 | 
8 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/model/types.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 Google LLC
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "cc/model/types.h"
16 | 
17 | namespace minigo {
18 | 
19 | std::ostream& operator<<(std::ostream& os, const TensorShape& shape) {
20 |   os << "[";
21 |   if (!shape.empty()) {
22 |     os << shape[0];
23 |     for (int i = 1; i < shape.size(); ++i) {
24 |       os << ", " << shape[i];
25 |     }
26 |   }
27 |   os << "]";
28 |   return os;
29 | }
30 | 
31 | }  // namespace minigo
32 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/move.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 Google LLC
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //      http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "cc/move.h"
16 | 
17 | #include "absl/strings/str_cat.h"
18 | 
19 | namespace minigo {
20 | 
21 | std::string Move::ToSgf() const {
22 |   return absl::StrCat(ColorToCode(color), "[", c.ToSgf(), "]");
23 | }
24 | 
25 | }  // namespace minigo
26 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/platform/BUILD:
--------------------------------------------------------------------------------
 1 | package(default_visibility = [
 2 |     "//cc:__subpackages__",
 3 | ])
 4 | 
 5 | licenses(["notice"])  # Apache License 2.0
 6 | 
 7 | cc_library(
 8 |     name = "platform",
 9 |     srcs = select({
10 |         "@bazel_tools//src/conditions:darwin": ["utils_osx.cc"],
11 |         "@bazel_tools//src/conditions:windows": ["utils_windows.cc"],
12 |         "//conditions:default": ["utils_linux.cc"],
13 |     }),
14 |     hdrs = [
15 |         "utils.h",
16 |     ],
17 |     deps = [
18 |         "//cc:logging",
19 |     ],
20 | )
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cc/wtf.BUILD:
--------------------------------------------------------------------------------
 1 | cc_library(
 2 |     name = "wtf",
 3 |     srcs = [
 4 |         "bindings/cpp/buffer.cc",
 5 |         "bindings/cpp/event.cc",
 6 |         "bindings/cpp/platform.cc",
 7 |         "bindings/cpp/runtime.cc",
 8 |     ] + glob(["bindings/cpp/include/wtf/platform/*.h"]),
 9 |     hdrs = [
10 | 	"bindings/cpp/include/wtf/argtypes.h",
11 | 	"bindings/cpp/include/wtf/config.h",
12 | 	"bindings/cpp/include/wtf/event.h",
13 | 	"bindings/cpp/include/wtf/macros.h",
14 | 	"bindings/cpp/include/wtf/platform.h",
15 | 	"bindings/cpp/include/wtf/runtime.h",
16 |         "bindings/cpp/include/wtf/buffer.h",
17 |     ],
18 |     copts = [
19 |         "-O3",
20 |     ],
21 |     includes = [
22 |         "bindings/cpp/include/",
23 |     ],
24 |     visibility = ["//visibility:public"],
25 | )
26 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/base/Makefile:
--------------------------------------------------------------------------------
 1 | # See existing images with
 2 | #     gcloud container images list-tags gcr.io/$PROJECT/cc-base
 3 | #
 4 | # Usage:
 5 | #	  VERSION_TAG=0.XY make target
 6 | 
 7 | base-image:
 8 | 		mkdir -p staging/cc/tensorflow
 9 | 		cp ../../WORKSPACE staging/
10 | 		cp ../../.bazelrc staging/
11 | 		cp ../../cc/configure_tensorflow.sh staging/cc/
12 | 		cp ../../cc/tensorflow/BUILD staging/cc/tensorflow/
13 | 		cp ../../cc/tensorflow/copy_outputs.sh staging/cc/tensorflow/
14 | 		cp ../../requirements.txt staging/
15 | 		docker build --quiet -t "gcr.io/${PROJECT}/cc-base:${VERSION_TAG}" .
16 | 		rm -rfd staging/
17 | 
18 | base-push: base-image
19 | 		gcloud docker --verbosity=error -- push "gcr.io/${PROJECT}/cc-base:${VERSION_TAG}"
20 | 
21 | .PHONY: base-image base-push
22 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/calibrator/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PROJECT
 2 | FROM gcr.io/$PROJECT/cc-base:latest
 3 | 
 4 | RUN pip3 install tensorflow==1.15.0
 5 | WORKDIR /app
 6 | 
 7 | ENV BOARD_SIZE="19"
 8 | 
 9 | COPY staging /app
10 | COPY staging/rl_loop/ /app
11 | 
12 | CMD ["sh", "-c", "python rl_loop/update_resign_threshold.py --bucket_name=$BUCKET_NAME --flagfile=rl_loop/distributed_flags"]
13 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/calibrator/Makefile:
--------------------------------------------------------------------------------
 1 | # See existing images with
 2 | #     gcloud container images list
 3 | #
 4 | # Usage:
 5 | #  	  PROJECT=(gcp project id) VERSION_TAG=0.XY make target
 6 | 
 7 | define staging =
 8 | 	mkdir -p staging
 9 | 	cp -r -p ../../rl_loop/ staging/
10 | 	cp ../../*.py staging/
11 | 	cp ../../requirements.txt staging/
12 | endef
13 | 
14 | 
15 | image:
16 | 	$(staging)
17 | 	docker build --quiet --build-arg PROJECT=$(PROJECT) -f Dockerfile -t "gcr.io/$(PROJECT)/minigo-calibrator:$(VERSION_TAG)" .
18 | 
19 | push: image
20 | 	gcloud docker -- push "gcr.io/$(PROJECT)/minigo-calibrator:$(VERSION_TAG)"
21 | 
22 | clean:
23 | 	rm -rfd staging
24 | 
25 | .PHONY: image push clean
26 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/calibrator/calibrator-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: minigo-calibrator
 5 | spec:
 6 |   replicas: 1
 7 |   selector:
 8 |     matchLabels:
 9 |       app: minigo-calibrator
10 |   template:
11 |     metadata:
12 |       labels:
13 |         app: minigo-calibrator
14 |     spec:
15 |       containers:
16 |       - name: minigo-calibrator-container
17 |         image: gcr.io/tensor-go/minigo-calibrator:v17
18 |         imagePullPolicy: Always
19 |         volumeMounts:
20 |         - name: service-credentials
21 |           mountPath: /etc/credentials
22 |         env:
23 |         - name: GCS_READ_CACHE_MAX_SIZE_MB
24 |           value: "0"
25 |         - name: GOOGLE_APPLICATION_CREDENTIALS
26 |           value: /etc/credentials/service-account.json
27 |         - name: BUCKET_NAME
28 |           value: $BUCKET_NAME
29 |       volumes:
30 |       - name: service-credentials
31 |         secret:
32 |           secretName: $SERVICE_ACCOUNT-creds
33 |       restartPolicy: Always
34 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/cgos/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM gcr.io/${PROJECT}/cc-base:v14
 2 | 
 3 | RUN apt-get update && apt-get install gettext -y
 4 | 
 5 | COPY cgosGtp-linux-x86_64 /app
 6 | 
 7 | ARG MODEL
 8 | ARG MODEL_NUM
 9 | 
10 | ENV MODEL=$MODEL
11 | ENV MODEL_NUM=$MODEL_NUM
12 | 
13 | WORKDIR /app
14 | 
15 | RUN gsutil cp gs://minigo-pub/v7-19x19/models/$MODEL /app
16 | COPY config.txt /app
17 | 
18 | # config.txt should setup the username and password for cgos
19 | # it's not checked in for obvious reasons.
20 | RUN envsubst < config.txt > config.txt
21 | 
22 | RUN cat config.txt
23 | CMD ["./cgosGtp-linux-x86_64", "-c", "config.txt"]
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/cgos/Makefile:
--------------------------------------------------------------------------------
 1 | # See existing images with
 2 | #     gcloud container images list-tags gcr.io/$PROJECT/cgos-player
 3 | #
 4 | # Usage:
 5 | # Set environment variables as follows:
 6 | #   - MODEL: the basename of the .pb of the model to play (e.g. 000303-olympus.pb)
 7 | #   - MODEL_NUM: the number of the model, e.g. '303'. This is used in the image name, CGOS username, and pod name.
 8 | #   - VERSION_TAG: as in the other cluster/ dockerfiles.
 9 | #
10 | #     VERSION_TAG=0.XY MODEL=000123-foo.pb MODEL_NUM=123 make target
11 | 
12 | cgos-image:
13 | 	  docker build --build-arg MODEL=${MODEL} --build-arg MODEL_NUM=${MODEL_NUM} -t "gcr.io/${PROJECT}/cgos-player-${MODEL_NUM}:${VERSION_TAG}" .
14 | 
15 | cgos-push: cgos-image
16 | 		gcloud docker -- push "gcr.io/${PROJECT}/cgos-player-${MODEL_NUM}:${VERSION_TAG}"
17 | 
18 | .PHONY: cgos-image cgos-push
19 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/cgos/cgos-player.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: minigo-cgos-player-$MODEL_NUM
 5 | spec:
 6 |   containers:
 7 |   - name: cgos-player-$MODEL_NUM
 8 |     image: gcr.io/$PROJECT/cgos-player:$VERSION_TAG
 9 |     imagePullPolicy: Always
10 |     resources:
11 |       limits:
12 |         nvidia.com/gpu: 1
13 |       requests:
14 |         nvidia.com/gpu: 1
15 |     volumeMounts:
16 |     - name: service-credentials
17 |       mountPath: /etc/credentials
18 |   volumes:
19 |   - name: service-credentials
20 |     secret:
21 |       secretName: $SERVICE_ACCOUNT-creds
22 |   restartPolicy: OnFailure
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/cluster-down.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | 
18 | source ${SCRIPT_DIR}/common.sh
19 | source ${SCRIPT_DIR}/utils.sh
20 | 
21 | check_gcloud_exists
22 | 
23 | gcloud container clusters delete $CLUSTER_NAME --project=$PROJECT --zone=$ZONE --async
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/create_table.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
19 | 
20 | source ${SCRIPT_DIR}/common.sh
21 | source ${SCRIPT_DIR}/utils.sh
22 | 
23 | create_cbt_game_table
24 | create_cbt_eval_game_table
25 | create_cbt_model_table
26 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/destroy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | source ./common.sh
17 | 
18 | ./cluster-down.sh
19 | 
20 | # Left here for documentation, if you really wish to delete more things:
21 | #
22 | # gcloud iam service-accounts delete $SERVICE_ACCOUNT_EMAIL
23 | 
24 | # gsutil -m rm -r gs://$BUCKET_NAME
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/evaluator/Dockerfile-cc:
--------------------------------------------------------------------------------
 1 | ARG project
 2 | #FROM gcr.io/$project/cc-base:v17-testing
 3 | from base-build-manual2
 4 | 
 5 | RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
 6 |     echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
 7 |     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
 8 |     apt-get update -y && apt-get install google-cloud-sdk -y
 9 | 
10 | RUN apt-get install python3 python3-pip -y
11 | RUN pip3 install absl-py
12 | 
13 | COPY staging/ /app
14 | WORKDIR /app
15 | RUN bazel build -c opt --define=tf=1 --define=bt=1 cc/eval
16 | 
17 | COPY evaluator_cc_wrapper.sh /app
18 | 
19 | CMD ["/bin/bash", "evaluator_cc_wrapper.sh"]
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/evaluator/Dockerfile-ringmaster:
--------------------------------------------------------------------------------
 1 | ARG project
 2 | from base-build-manual2
 3 | 
 4 | RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
 5 |     echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
 6 |     curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
 7 |     apt-get update -y && apt-get install google-cloud-sdk -y
 8 | 
 9 | RUN apt-get install python3 python3-pip -y
10 | # TODO(AMJ): Get this to compile, determine base & pip requirementes
11 | RUN pip3 install absl-py
12 | 
13 | COPY staging/ /app
14 | WORKDIR /app
15 | 
16 | COPY evaluator_ringmaster_wrapper.py /app
17 | 
18 | # long series of args here.
19 | CMD ["python3", "evaluator_ringmaster_wrapper.py"]
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/evaluator/deploy-cc-evaluator.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | 
18 | source ${SCRIPT_DIR}/../common.sh
19 | source ${SCRIPT_DIR}/../utils.sh
20 | 
21 | check_envsubst
22 | 
23 | echo $MODEL_BLACK
24 | echo $MODEL_WHITE
25 | cat ${SCRIPT_DIR}/cc-evaluator.yaml | envsubst | kubectl apply -f -
26 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/minigui/minigui-pod.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: minigui
 5 | spec:
 6 |   containers:
 7 |   - name: minigui
 8 |     image: gcr.io/$(PROJECT)/$(MINIGUI_PY_CPU_CONTAINER):$(VERSION_TAG)
 9 |     ports:
10 |     - containerPort: 5001
11 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/minigui/simple-service.yaml:
--------------------------------------------------------------------------------
 1 | kind: Service
 2 | apiVersion: v1
 3 | metadata:
 4 |   name: minigo-service
 5 | spec:
 6 |   selector:
 7 |     app: minigui
 8 |   ports:
 9 |   - protocol: TCP
10 |     port: 80
11 |     targetPort: 5001
12 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/ringmaster/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | gtp:
 3 | 		mkdir -p staging/cc
 4 | 		cp ../../WORKSPACE staging/
 5 | 		cp ../../.bazelrc staging/
 6 | 		cp -r -p ../../cc/ staging/
 7 | 		cp p100-lz-tuning staging/
 8 | 		rm -rfd staging/cc/tensorflow
 9 | 		docker build --build-arg PROJECT=${PROJECT} -f mggtp-Dockerfile -t "gcr.io/$(PROJECT)/mg-gtp:$(VERSION_TAG)" .
10 | 
11 | ring:
12 | 		mkdir -p staging/cc
13 | 		cp ../../WORKSPACE staging/
14 | 		cp ../../.bazelrc staging/
15 | 		cp -r -p ../../cc/ staging/
16 | 		cp p100-lz-tuning staging/
17 | 		cp ringmaster_wrapper.sh staging/
18 | 		rm -rfd staging/cc/tensorflow
19 | 		docker build --build-arg PROJECT=${PROJECT} -f lz-Dockerfile -t "gcr.io/$(PROJECT)/mg-ring:$(VERSION_TAG)" .
20 | 
21 | 
22 | gtp-push: gtp
23 | 	gcloud docker --verbosity=error -- push "gcr.io/$(PROJECT)/mg-gtp:$(VERSION_TAG)"
24 | 
25 | ring-push: ring
26 | 	gcloud docker --verbosity=error -- push "gcr.io/$(PROJECT)/mg-ring:$(VERSION_TAG)"
27 | 
28 | clean:
29 | 		rm -rfd staging/
30 | .PHONY: gtp gtp-push clean
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/ringmaster/example.ctl:
--------------------------------------------------------------------------------
 1 | competition_type = 'playoff'
 2 | description = """Example control file"""
 3 | 
 4 | board_size = 19
 5 | komi = 7.5
 6 | 
 7 | record_games = True
 8 | stderr_to_log = True
 9 | 
10 | def LeelaPlayer(model, playouts):
11 |     return Player(
12 |         "./leelaz -g --noponder -w {} -t 1 -p {} --timemanage off ".format(
13 |             model, playouts),
14 |         startup_gtp_commands=["time_settings 0 1 0"])
15 | 
16 | matchups = []
17 | players = {}
18 | for playouts in ["10000"]:
19 |   p1 = LeelaPlayer('lz202.gz', playouts)
20 |   p2 = LeelaPlayer('mg-16-833.gz', playouts)
21 |   p1_name = "lz202_p{}".format(playouts)
22 |   p2_name = "mg-16-833{}".format(playouts)
23 |   players[p1_name] = p1
24 |   players[p2_name] = p2
25 |   matchup_name = "lz202_vs_mg-16-833_p{}".format(playouts)
26 |   matchups.append(Matchup(
27 |      p1_name, p2_name, id=matchup_name, number_of_games=2,
28 |      alternating=True, scorer='players'))
29 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/ringmaster/mggtp-Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PROJECT
2 | FROM gcr.io/$PROJECT/cc-base:latest
3 | 
4 | COPY staging/ /app
5 | RUN bazel build -c opt --define=tf=1 --define=gpu=1 --define=bt=1 cc/gtp
6 | 
7 | ENTRYPOINT ["bazel-bin/cc/gtp"]
8 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/ringmaster/p100-lz-tuning:
--------------------------------------------------------------------------------
1 | 0;XgemmBatched;256;25;256;36; -DKWG=32 -DKWI=8 -DMDIMA=8 -DMDIMC=8 -DMWG=32 -DNDIMB=8 -DNDIMC=8 -DNWG=32 -DSA=1 -DSB=1 -DSTRM=0 -DSTRN=0 -DVWM=4 -DVWN=4;OpenCL: NVIDIA Corporation Tesla P100-PCIE-16GB @ 1328MHz
2 | 0;XgemmBatchedHalf;256;25;256;36; -DKWG=16 -DKWI=8 -DMDIMA=16 -DMDIMC=16 -DMWG=64 -DNDIMB=8 -DNDIMC=8 -DNWG=32 -DSA=1 -DSB=1 -DSTRM=0 -DSTRN=0 -DVWM=4 -DVWN=2;OpenCL: NVIDIA Corporation Tesla P100-PCIE-16GB @ 1328MHz
3 | 1;XgemmBatched;256;25;256;36; -DKWG=32 -DKWI=8 -DMDIMA=8 -DMDIMC=8 -DMWG=32 -DNDIMB=8 -DNDIMC=8 -DNWG=32 -DSA=1 -DSB=1 -DSTRM=0 -DSTRN=0 -DTCE=0 -DVWM=4 -DVWN=4;OpenCL: NVIDIA Corporation Tesla P100-PCIE-16GB @ 1328MHz
4 | 1;XgemmBatchedHalf;256;25;256;36; -DKWG=16 -DKWI=8 -DMDIMA=16 -DMDIMC=16 -DMWG=64 -DNDIMB=8 -DNDIMC=8 -DNWG=32 -DSA=1 -DSB=1 -DSTRM=0 -DSTRN=0 -DTCE=0 -DVWM=4 -DVWN=2;OpenCL: NVIDIA Corporation Tesla P100-PCIE-16GB @ 1328MHz
5 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/ringmaster/ringmaster_wrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | : ${RINGMASTER_CONTROL_PATH?"Need to set RINGMASTER_CONTROL_PATH"}
 6 | : ${OUT_PATH?"Need to set OUT_PATH"}
 7 | : ${MODEL_ONE?"Need to set MODEL_ONE"}
 8 | : ${MODEL_TWO?"Need to set MODEL_TWO"}
 9 | 
10 | gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS
11 | gsutil cp $RINGMASTER_CONTROL_PATH .
12 | gsutil cp $MODEL_ONE .
13 | gsutil cp $MODEL_TWO .
14 | 
15 | 
16 | RING_BASENAME=`basename $RINGMASTER_CONTROL_PATH`
17 | 
18 | # if your control file doesn't end in .ctl, things are bad.
19 | RING_FILES=`basename $RINGMASTER_CONTROL_PATH .ctl`
20 | 
21 | date
22 | echo "Running Ringmaster: $RING_BASENAME"
23 | 
24 | #/mg_venv/bin/ringmaster $RING_BASENAME check
25 | /mg_venv/bin/ringmaster $RING_BASENAME run
26 | 
27 | echo "Ringmaster all done"
28 | POD_NAME=`hostname | rev | cut -d'-' -f 1 | rev`
29 | 
30 | gsutil -m cp -r $RING_FILES.* $OUT_PATH/$POD_NAME/
31 | 
32 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/selfplay/Dockerfile-cc:
--------------------------------------------------------------------------------
 1 | ARG PROJECT
 2 | FROM gcr.io/$PROJECT/cc-base:latest
 3 | 
 4 | WORKDIR /app
 5 | # Now bring in the rest of our code; changing our code will only trigger rebuilds below here
 6 | COPY staging /app
 7 | COPY staging/rl_loop/ /app
 8 | COPY staging/mask_flags.py /app
 9 | 
10 | RUN bazel build -c opt cc/selfplay --define=tf=1 --define=tpu=0 --define=bt=1
11 | CMD ["sh", "-c", "python rl_loop/selfplay.py --bucket_name=$BUCKET_NAME --mode=cc"]
12 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/selfplay/Dockerfile-tpu:
--------------------------------------------------------------------------------
 1 | ARG PROJECT
 2 | FROM gcr.io/$PROJECT/cc-base:latest
 3 | 
 4 | # Tensorflow is needed for gfile
 5 | RUN pip3 install tensorflow==1.15.0
 6 | WORKDIR /app
 7 | 
 8 | ARG RUNMODE
 9 | 
10 | ENV RUNMODE=$RUNMODE
11 | ENV BOARD_SIZE="19"
12 | 
13 | COPY staging /app
14 | 
15 | COPY staging/rl_loop/ /app
16 | COPY staging/mask_flags.py /app
17 | 
18 | RUN bazel build -c opt cc/selfplay --define=tf=1 --define=tpu=1 --define=bt=1
19 | CMD ["sh", "-c", "python rl_loop/selfplay.py --bucket_name=$BUCKET_NAME --mode=$RUNMODE"]
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/selfplay/deploy-cc-player.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | 
18 | source ${SCRIPT_DIR}/../common.sh
19 | source ${SCRIPT_DIR}/../utils.sh
20 | 
21 | check_envsubst
22 | 
23 | cat ${SCRIPT_DIR}/cc-player.yaml | envsubst | kubectl apply -f -
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/selfplay/deploy-cpu-player.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | source ${SCRIPT_DIR}/../common.sh
18 | source ${SCRIPT_DIR}/../utils.sh
19 | 
20 | check_envsubst
21 | 
22 | cat ${SCRIPT_DIR}/cpu-player.yaml | envsubst | kubectl apply -f -
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/trainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PROJECT
 2 | FROM gcr.io/$PROJECT/cc-base:latest
 3 | 
 4 | RUN pip3 install tensorflow==1.15.0
 5 | WORKDIR /app
 6 | 
 7 | ENV BOARD_SIZE="19"
 8 | 
 9 | COPY staging /app
10 | COPY staging/rl_loop/ /app
11 | COPY staging/mask_flags.py /app
12 | 
13 | CMD ["sh", "-c", "python rl_loop/train_and_validate.py --bucket_name=$BUCKET_NAME --pro_dataset=$PRO_DATASET"]
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/trainer/Makefile:
--------------------------------------------------------------------------------
 1 | # See existing images with
 2 | #     gcloud container images list
 3 | #
 4 | # Usage:
 5 | #  	  PROJECT=(gcp project id) VERSION_TAG=0.XY make target
 6 | 
 7 | define staging =
 8 | 	mkdir -p staging
 9 | 	cp ../../WORKSPACE staging/
10 | 	cp ../../.bazelrc staging/
11 | 	cp -r -p ../../rl_loop/ staging/
12 | 	cp -r -p ../../cc/ staging/
13 | 	rm -rfd staging/cc/tensorflow
14 | 	cp ../../*.py staging/
15 | 	cp ../../requirements.txt staging/
16 | endef
17 | 
18 | 
19 | image:
20 | 	$(staging)
21 | 	docker build --quiet --build-arg PROJECT=$(PROJECT) -f Dockerfile -t "gcr.io/$(PROJECT)/minigo-tpu-trainer:$(VERSION_TAG)" .
22 | 
23 | push: image
24 | 	gcloud docker --verbosity=error -- push "gcr.io/$(PROJECT)/minigo-tpu-trainer:$(VERSION_TAG)"
25 | 
26 | clean:
27 | 	rm -rfd staging
28 | 
29 | .PHONY: image push clean
30 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/cluster/trainer/deploy-trainer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | 
18 | source ${SCRIPT_DIR}/../common.sh
19 | source ${SCRIPT_DIR}/../utils.sh
20 | 
21 | check_envsubst
22 | 
23 | if [[ -z "${BUCKET_NAME}" ]]; then
24 |   echo >&2 "BUCKET_NAME is not defined"
25 |   return 1
26 | fi
27 | 
28 | cat ${SCRIPT_DIR}/tpu-trainer-deployment.yaml | envsubst | kubectl apply -f -
29 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/control/leelaz.ctl:
--------------------------------------------------------------------------------
 1 | board_size = 19
 2 | 
 3 | players = {
 4 |   "leelaz" : Player("../leela-zero/build/leelaz"
 5 |                     " --weights best-network"
 6 |                     " --timemanage fast"
 7 |                     " -g",
 8 |                     startup_gtp_commands=[],
 9 | 		    cwd="../leela-zero/build"),
10 | }
11 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/control/minigo_edgetpu.ctl:
--------------------------------------------------------------------------------
 1 | board_size = 19
 2 | 
 3 | players = {
 4 |   "minigo_edgetpu" : Player("python"
 5 |                             " -u"
 6 |                             " gtp.py"
 7 |                             " --load_file=saved_models/v17-2019-04-29-edgetpu.tflite"
 8 |                             " --minigui_mode=true"
 9 |                             " --num_readouts=800"
10 |                             " --resign_threshold=-0.8"
11 |                             " --parallel_readouts=1"
12 |                             " --verbose=2",
13 |                             startup_gtp_commands=[],
14 |                             environ={"BOARD_SIZE": str(board_size)}),
15 | }
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/control/minigo_py.ctl:
--------------------------------------------------------------------------------
 1 | board_size = 19
 2 | 
 3 | players = {
 4 |   "minigo_py" : Player("python"
 5 |                        " -u"
 6 |                        " gtp.py"
 7 |                        " --load_file=saved_models/000990-cormorant"
 8 |                        " --minigui_mode=true"
 9 |                        " --num_readouts=64"
10 |                        " --conv_width=256"
11 |                        " --resign_threshold=-0.8"
12 |                        " --verbose=2",
13 |                        startup_gtp_commands=[],
14 |                        environ={"BOARD_SIZE": str(board_size)}),
15 | }
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/control/minigo_tf.ctl:
--------------------------------------------------------------------------------
 1 | board_size = 19
 2 | 
 3 | players = {
 4 |   "minigo_tf" : Player("bazel-bin/cc/gtp"
 5 |                        " --minigui=true"
 6 |                        " --engine=tf"
 7 |                        " --model=saved_models/000990-cormorant.pb"
 8 |                        " --num_readouts=64"
 9 |                        " --value_init_penalty=0"
10 |                        " --courtesy_pass=true"
11 |                        " --virtual_losses=8"
12 |                        " --resign_threshold=-0.8",
13 |                        startup_gtp_commands=[
14 |                          "report_search_interval 100",
15 |                        ]),
16 | }
17 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/control/vs.ctl:
--------------------------------------------------------------------------------
 1 | players = {
 2 |   "leelaz" : Player("../leela-zero/build/leelaz"
 3 |                     " --weights best-network"
 4 |                     " --timemanage off -r 3"
 5 |                     " --noponder"
 6 |                     " -g",
 7 |                     startup_gtp_commands=[
 8 |                       "time_settings 0 5 1",
 9 |                     ],
10 |                     cwd="../leela-zero/build"),
11 | 
12 |   "minigo" : Player("bazel-bin/cc/gtp"
13 |                     " --minigui=true"
14 |                     " --model=tf,saved_models/000990-cormorant.pb"
15 |                     " --num_readouts=200"
16 |                     " --value_init_penalty=0"
17 |                     " --courtesy_pass=true"
18 |                     " --virtual_losses=8"
19 |                     " --resign_threshold=-0.8",
20 |                     startup_gtp_commands=[
21 |                       "report_search_interval 100",
22 |                     ]),
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/edgetpu/start_chromium.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2019 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # You can pass a parameter as the start URL
16 | killall chromium
17 | export DISPLAY=:0
18 | export GDK_BACKEND=x11
19 | chromium --incognito $1 &
20 | CHROMIUM_PID=$!
21 | sleep 5
22 | xte -x :0 "key F11"
23 | xte -x :0 "keydown Control_L" "key 0" "keyup Control_L"
24 | wait ${CHROMIUM_PID}
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 | numpy
3 | flask
4 | flask-socketio
5 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/static/view.js:
--------------------------------------------------------------------------------
 1 | define(["require", "exports"], function (require, exports) {
 2 |     "use strict";
 3 |     Object.defineProperty(exports, "__esModule", { value: true });
 4 |     class ViewPainter {
 5 |         constructor() {
 6 |             this.pendingViews = [];
 7 |         }
 8 |         draw(view) {
 9 |             if (this.pendingViews.length == 0) {
10 |                 window.requestAnimationFrame(() => {
11 |                     for (let view of this.pendingViews) {
12 |                         view.drawImpl();
13 |                     }
14 |                     this.pendingViews = [];
15 |                 });
16 |             }
17 |             if (this.pendingViews.indexOf(view) == -1) {
18 |                 this.pendingViews.push(view);
19 |             }
20 |         }
21 |     }
22 |     let painter = new ViewPainter();
23 |     class View {
24 |         draw() { painter.draw(this); }
25 |     }
26 |     exports.View = View;
27 | });
28 | //# sourceMappingURL=view.js.map


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/minigui/unset-minigui-common.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | unset MINIGUI_PYTHON
16 | unset MINIGUI_BUCKET_NAME
17 | unset MINIGUI_GCS_DIR
18 | unset MINIGUI_MODEL
19 | unset MINIGUI_MODEL_TMPDIR
20 | unset MINIGUI_BOARD_SIZE
21 | unset MINIGUI_PORT
22 | unset MINIGUI_HOST
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoint/
2 | results/
3 | target/
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/architecture.flags:
--------------------------------------------------------------------------------
 1 | # architecture.flags: Flags that control the model architecture.
 2 | 
 3 | --conv_width=64
 4 | --fc_width=64
 5 | --trunk_layers=6
 6 | --value_cost_weight=0.5
 7 | --summary_steps=128
 8 | 
 9 | --bool_features=1
10 | 
11 | # --input_features=$FEATURES and --input_layout=$LAYOUT must match
12 | # --model=random:$FEATURES:$LAYOUT:0 in bootstrap.flags.
13 | --input_features=mlperf07
14 | --input_layout=nchw
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/bootstrap.flags:
--------------------------------------------------------------------------------
 1 | # bootstrap.flags
 2 | # Flags for the first bootstrap round of selfplay.
 3 | 
 4 | --flagfile=ml_perf/flags/19/selfplay.flags
 5 | 
 6 | --num_readouts=80
 7 | --fastplay_frequency=0
 8 | 
 9 | --holdout_pct=0
10 | --device=0
11 | --cache_size_mb=0
12 | 
13 | --output_threads=4
14 | --selfplay_threads=48
15 | --parallel_search=1
16 | --parallel_inference=48
17 | --concurrent_games_per_thread=64
18 | 
19 | --min_resign_threshold=-1.00
20 | --max_resign_threshold=-0.99
21 | 
22 | --allow_pass=0
23 | --target_pruning=0
24 | --restrict_pass_alive_play_threshold=4
25 | 
26 | # --model=random:$FEATURES:$LAYOUT:0 must match --input_features=$FEATURES and
27 | # --input_layout=$LAYOUT in architecture.flags.
28 | --model=random:mlperf07:nchw:0
29 | 
30 | --num_games=8192
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/eval.flags:
--------------------------------------------------------------------------------
1 | # eval.flags: Flags for playing eval games.
2 | 
3 | --flagfile=ml_perf/flags/19/selfplay.flags
4 | 
5 | --value_init_penalty=0.2
6 | --num_readouts=100
7 | --fastplay_frequency=0
8 | --resign_enabled=false
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/rl_loop.flags:
--------------------------------------------------------------------------------
 1 | --flags_dir=ml_perf/flags/19/
 2 | --checkpoint_dir=ml_perf/checkpoint/19/
 3 | 
 4 | --iterations=200
 5 | --gating_win_rate=0.49
 6 | --window_size=5
 7 | 
 8 | --train_devices=0
 9 | --eval_device=1
10 | --selfplay_devices=0,1,2,3,4,5,6,7
11 | 
12 | --train_filter=0.3
13 | 
14 | --bootstrap_target_win_rate=0.05
15 | 
16 | --eval_num_games=100
17 | 
18 | --validate=1
19 | 
20 | # Consider also updating num_games in bootstrap.flags if updating
21 | # min_games_per_iteration.
22 | --min_games_per_iteration=8192
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/selfplay.flags:
--------------------------------------------------------------------------------
 1 | # selfplay.flags: Flags for selfplay.
 2 | 
 3 | # This flagfile also serves as the base for the boostrap & eval stages of
 4 | # the RL loop.
 5 | 
 6 | --num_readouts=800
 7 | --fastplay_frequency=0.75
 8 | --fastplay_readouts=80
 9 | --value_init_penalty=0.2
10 | --holdout_pct=0.0
11 | --disable_resign_pct=0.0
12 | --min_resign_threshold=-1.0
13 | --max_resign_threshold=-0.9
14 | --virtual_losses=4
15 | 
16 | --dirichlet_alpha=0.03
17 | --noise_mix=0.3
18 | 
19 | --cache_size_mb=8192
20 | --verbose=false
21 | 
22 | --selfplay_threads=3
23 | --parallel_search=4
24 | --parallel_inference=2
25 | --concurrent_games_per_thread=32
26 | 
27 | --target_pruning=1
28 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/train.flags:
--------------------------------------------------------------------------------
 1 | # train.flags: Flags for training.
 2 | 
 3 | --flagfile=ml_perf/flags/19/architecture.flags
 4 | 
 5 | --shuffle_buffer_size=0
 6 | --shuffle_examples=false
 7 | 
 8 | --keep_checkpoint_max=100
 9 | 
10 | # Device specific hyperparameters re: batch size and LR schedules.
11 | --train_batch_size=4096
12 | 
13 | --lr_rates=0.016
14 | --lr_rates=0.16
15 | --lr_rates=0.016
16 | --lr_rates=0.0016
17 | 
18 | --lr_boundaries=128
19 | --lr_boundaries=10000
20 | --lr_boundaries=20000
21 | --l2_strength=0.0001
22 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/train_loop.flags:
--------------------------------------------------------------------------------
 1 | --iterations=100
 2 | 
 3 | --window_size=5
 4 | --train_filter=0.3
 5 | 
 6 | --validate=0
 7 | 
 8 | # --num_games in bootstrap.flags must be >= --min_games_per_iteration.
 9 | --min_games_per_iteration=8192
10 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/19/validate.flags:
--------------------------------------------------------------------------------
1 | # validate.flags Flags for validation.
2 | 
3 | --flagfile=ml_perf/flags/19/architecture.flags
4 | 
5 | --examples_to_validate=512
6 | --train_batch_size=64
7 | --summary_steps=8
8 | --l2_strength=0.0001
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/architecture.flags:
--------------------------------------------------------------------------------
 1 | # architecture.flags: Flags that control the model architecture.
 2 | 
 3 | --conv_width=64
 4 | --fc_width=64
 5 | --trunk_layers=9
 6 | --value_cost_weight=0.5
 7 | --summary_steps=64
 8 | 
 9 | --bool_features=1
10 | 
11 | # --input_features=$FEATURES and --input_layout=$LAYOUT must match
12 | # --model=random:$FEATURES:$LAYOUT:0 in bootstrap.flags.
13 | --input_features=mlperf07
14 | --input_layout=nhwc
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/bootstrap.flags:
--------------------------------------------------------------------------------
 1 | # bootstrap.flags
 2 | # Flags for the first bootstrap round of selfplay.
 3 | 
 4 | --flagfile=ml_perf/flags/9/selfplay.flags
 5 | 
 6 | --num_readouts=80
 7 | --fastplay_frequency=0
 8 | 
 9 | --holdout_pct=0
10 | --device=0
11 | --cache_size_mb=0
12 | 
13 | --output_threads=4
14 | --selfplay_threads=48
15 | --parallel_search=1
16 | --parallel_inference=48
17 | --concurrent_games_per_thread=64
18 | 
19 | --min_resign_threshold=-1.00
20 | --max_resign_threshold=-0.99
21 | 
22 | --allow_pass=0
23 | --target_pruning=0
24 | --restrict_pass_alive_play_threshold=4
25 | 
26 | # --model=random:$FEATURES:$LAYOUT:0 must match --input_features=$FEATURES and
27 | # --input_layout=$LAYOUT in architecture.flags.
28 | --model=random:mlperf07:nhwc:0
29 | 
30 | --num_games=4096
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/eval.flags:
--------------------------------------------------------------------------------
 1 | # eval.flags: Flags for playing eval games.
 2 | 
 3 | --flagfile=ml_perf/flags/9/selfplay.flags
 4 | 
 5 | # --num_readouts=240
 6 | --fastplay_frequency=0
 7 | --resign_enabled=false
 8 | 
 9 | --parallel_games=100
10 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/rl_loop.flags:
--------------------------------------------------------------------------------
 1 | --flags_dir=ml_perf/flags/9/
 2 | --checkpoint_dir=ml_perf/checkpoint/9/
 3 | 
 4 | --iterations=50
 5 | --gating_win_rate=0.49
 6 | --window_size=5
 7 | 
 8 | --train_devices=0
 9 | --eval_device=1
10 | --selfplay_devices=0,1,2,3,4,5,6,7
11 | 
12 | --train_filter=0.3
13 | 
14 | --bootstrap_target_win_rate=0.05
15 | 
16 | --eval_num_games=100
17 | 
18 | --validate=1
19 | 
20 | # Consider also updating num_games in bootstrap.flags if updating
21 | # min_games_per_iteration.
22 | --min_games_per_iteration=4096
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/selfplay.flags:
--------------------------------------------------------------------------------
 1 | # selfplay.flags: Flags for selfplay.
 2 | 
 3 | # This flagfile also serves as the base for the boostrap & eval stages of
 4 | # the RL loop.
 5 | 
 6 | --num_readouts=240
 7 | --fastplay_frequency=0
 8 | --value_init_penalty=0.2
 9 | --holdout_pct=0.025
10 | --disable_resign_pct=1.0
11 | --min_resign_threshold=-1.0
12 | --max_resign_threshold=-0.8
13 | --virtual_losses=2
14 | 
15 | --dirichlet_alpha=0.135
16 | --noise_mix=0.3
17 | 
18 | --cache_size_mb=2048
19 | --verbose=false
20 | 
21 | --selfplay_threads=3
22 | --parallel_search=2
23 | --parallel_inference=3
24 | --concurrent_games_per_thread=128
25 | 
26 | --target_pruning=1
27 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/train.flags:
--------------------------------------------------------------------------------
 1 | # train.flags: Flags for training.
 2 | 
 3 | --flagfile=ml_perf/flags/9/architecture.flags
 4 | 
 5 | --shuffle_buffer_size=0
 6 | --shuffle_examples=false
 7 | 
 8 | --keep_checkpoint_max=100
 9 | 
10 | # see --train_filter in rl_loop.flags
11 | # --filter_amount=1
12 | 
13 | # Device specific hyperparameters re: batch size and LR schedules.
14 | --train_batch_size=4096
15 | --lr_rates=0.16
16 | --lr_rates=0.016
17 | --lr_rates=0.0016
18 | --lr_boundaries=25000
19 | --lr_boundaries=37500
20 | --l2_strength=0.0001
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/train_loop.flags:
--------------------------------------------------------------------------------
 1 | --iterations=50
 2 | 
 3 | --window_size=5
 4 | --train_filter=0.3
 5 | 
 6 | --validate=1
 7 | 
 8 | # Consider also updating num_games in bootstrap.flags if updating
 9 | --min_games_per_iteration=4096
10 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/flags/9/validate.flags:
--------------------------------------------------------------------------------
1 | # validate.flags Flags for validation.
2 | 
3 | --flagfile=ml_perf/flags/9/architecture.flags
4 | 
5 | --examples_to_validate=512
6 | --train_batch_size=64
7 | --summary_steps=8
8 | --l2_strength=0.0001
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/ml_perf/scripts/stop_selfplay.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Stops selfplay processes by creating an "abort file" at a location monitored
16 | # by the selfplay processes.
17 | # This script is called automatically by train.sh on exit.
18 | 
19 | 
20 | source ml_perf/scripts/common.sh
21 | 
22 | 
23 | touch "${abort_file}"
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/oneoffs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/oneoffs/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/requirements-analysis.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | pandas
3 | choix
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/requirements-colab.txt:
--------------------------------------------------------------------------------
 1 | # Copy of requirements.txt with packages in colab commented.
 2 | 
 3 | #absl-py
 4 | autopep8>=1.3
 5 | fire
 6 | google.cloud.logging
 7 | google.cloud.bigtable
 8 | #grpcio-tools
 9 | #keras
10 | #numpy>=1.14.0
11 | #protobuf
12 | pylint
13 | sgf==0.5
14 | #six
15 | #tqdm>=4.17
16 | 
17 | #oauth2client==4.1
18 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py
 2 | autopep8>=1.3
 3 | choix>=0.3.3
 4 | fire
 5 | google.cloud.logging
 6 | google.cloud.bigtable
 7 | grpcio-tools
 8 | keras
 9 | numpy>=1.14.0
10 | protobuf
11 | pylint
12 | sgf==0.5
13 | six
14 | tqdm>=4.17
15 | pyasn1>=0.4.1
16 | setuptools>=34.0.0
17 | 
18 | oauth2client==4.1
19 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/rl_loop/distributed_flags_nr:
--------------------------------------------------------------------------------
 1 | # this file is the subset of flags used by the set of clusters that will do
 2 | # selfplay with resignation disabled (AKA 'calibration' games).
 3 | #
 4 | # Network architecture flags
 5 | --conv_width=256
 6 | --fc_width=256
 7 | --trunk_layers=19
 8 | --use_SE
 9 | --use_SE_bias
10 | 
11 | # Selfplay related flags.
12 | # These flags can be overwritten by --flags_path (see cc/main.cc for details)
13 | --run_forever=true
14 | --inject_noise=true
15 | --soft_pick=true
16 | --random_symmetry=true
17 | --virtual_losses=8
18 | --parallel_games=8
19 | --num_readouts=800
20 | --disable_resign_pct=1.00
21 | --resign_threshold=-1.0
22 | --value_init_penalty=2.0
23 | --output_bigtable=tensor-go,minigo-instance,v17-games-nr
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/rl_loop/local_flags:
--------------------------------------------------------------------------------
 1 | # This file is an enumeration of all hyperparameters required by the
 2 | # AGZ pipeline. The flags in this file get passed into all of the scripts
 3 | # in the rl_loop/ directory via the mask_flags.py helper library.
 4 | 
 5 | --conv_width=8
 6 | --fc_width=16
 7 | --trunk_layers=1
 8 | --train_batch_size=16
 9 | --shuffle_buffer_size=1000
10 | --num_evaluation_games=1
11 | --verbose=0
12 | --num_readouts=10
13 | --value_init_penalty=2.0
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/minigo/tensorflow/minigo/tests/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/tests/test_flags:
--------------------------------------------------------------------------------
1 | # One line per flag
2 | # For example --someflag=value
3 | --conv_width=8
4 | --fc_width=2
5 | --trunk_layers=1
6 | --cbt_project=foo
7 | --cbt_instance=bar
8 | --cbt_table=baz
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/minigo/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "ESNEXT",
 4 |         "module": "system",
 5 |         "noImplicitAny": true,
 6 |         "noImplicitThis": true,
 7 |         "strictNullChecks": true,
 8 |         "removeComments": true,
 9 |         "preserveConstEnums": true,
10 |         "outDir": "minigui/static/",
11 |         "sourceMap": true,
12 | 	"module": "amd"
13 |     },
14 |     "include": [
15 |         "minigui/*.ts"
16 |     ],
17 |     "exclude": [
18 |     ]
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script should be only executed in docker.
 3 | # Run minigo... stop when it converges.
 4 | set -e
 5 | 
 6 | SEED=$1
 7 | mkdir -p /research/results/minigo/final/
 8 | cd /research/reinforcement/minigo
 9 | bash loop_main.sh params/final.json $SEED
10 | 


--------------------------------------------------------------------------------
/retired_benchmarks/minigo/tensorflow/run_and_time.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | # runs benchmark and reports time to convergence
 3 | # to use the script:
 4 | #   run_and_time.sh <random seed 1-5>
 5 | 
 6 | 
 7 | set -e
 8 | 
 9 | # start timing
10 | start=$(date +%s)
11 | start_fmt=$(date +%Y-%m-%d\ %r)
12 | echo "STARTING TIMING RUN AT $start_fmt"
13 | 
14 | 
15 | # run benchmark
16 | 
17 | seed=${1:-1}
18 | 
19 | echo "running benchmark with seed $seed"
20 | # The termination quality is set in params/final.json. See RAEDME.md.
21 | ./run.sh $seed
22 | sleep 3
23 | ret_code=$?; if [[ $ret_code != 0 ]]; then exit $ret_code; fi
24 | 
25 | 
26 | # end timing
27 | end=$(date +%s)
28 | end_fmt=$(date +%Y-%m-%d\ %r)
29 | echo "ENDING TIMING RUN AT $end_fmt"
30 | 
31 | 
32 | # report result
33 | result=$(( $end - $start ))
34 | result_name="reinforcement"
35 | 
36 | 
37 | echo "RESULT,$result_name,$seed,$result,$USER,$start_fmt"
38 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/config/dataset/c4_mlperf.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: c4_mlperf
 2 | train_dataset_path: gs://mlperf-llm-public2/c4/en_json/3.0.1
 3 | eval_dataset_path: gs://mlperf-llm-public2/c4/en_val_subset_json
 4 | streaming: True
 5 | 
 6 | # num of process in data processing
 7 | num_proc: 1
 8 | 
 9 | # whether to load dataset from cache
10 | load_from_cache_file: True
11 | 
12 | shuffle_buffer_size: 256


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/config/dataset/wikitext.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: wikitext
 2 | dataset_config_name: wikitext-2-raw-v1
 3 | streaming: False
 4 | 
 5 | # num of process in data processing
 6 | num_proc: 1
 7 | 
 8 | # whether to load dataset from cache
 9 | load_from_cache_file: True
10 | 
11 | shuffle_buffer_size: 256


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/config/model/blank_model.yaml:
--------------------------------------------------------------------------------
 1 | config_path: null
 2 | name_or_path: mistralai/Mixtral-8x7B-v0.1
 3 | dtype: bfloat16
 4 | flash_attention: True
 5 | capacity_factor: 0  # dropped implementation with a positive number
 6 | max_sequence_length: ${max_length}
 7 | 
 8 | fsdp_config: 
 9 |   fsdp_transformer_layer_cls_to_wrap: ["MixtralDecoderLayer"]
10 |   min_num_params: 0
11 |   xla_fsdp_grad_ckpt: true
12 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/config/sched/CosineAnnealing.yaml:
--------------------------------------------------------------------------------
1 | name: CosineAnnealing
2 | warmup_ratio: 0.25
3 | # warmup_steps: 150
4 | min_lr: ${multiply:0.1,${lr}}
5 | max_steps: ${max_steps}
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/config/sched/WarmupHoldPolicy.yaml:
--------------------------------------------------------------------------------
1 | name: WarmupHoldPolicy
2 | warmup_ratio: 0.25
3 | # warmup_steps: 150
4 | hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant
5 | max_steps: ${max_steps}
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/docker/gpu/build_and_push_image.sh:
--------------------------------------------------------------------------------
 1 | set -euox pipefail
 2 | SCRIPTS_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" && pwd )"
 3 | DATE=$(date +%Y%m%d)
 4 | : ${PROJECT_ID:=cloud-tpu-multipod-dev}
 5 | : ${IMAGE:=gcr.io/${PROJECT_ID}/${USER}-pytorch-nemo-moe-${DATE}}
 6 | : ${DOCKER_BUILD_ARGS:=""}
 7 | 
 8 | pushd ${SCRIPTS_DIR}
 9 | 
10 | docker build --network host \
11 |   --file Dockerfile \
12 |   --tag ${IMAGE}-base \
13 |   ${DOCKER_BUILD_ARGS} \
14 |   .
15 | 
16 | docker build --network host \
17 |   --file Dockerfile.GCP \
18 |   --tag ${IMAGE} \
19 |   --build-arg FROM_BASE_IMAGE=${IMAGE}-base \
20 |   .
21 | 
22 | popd
23 | 
24 | docker push ${IMAGE}
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/docker/tpu/build_and_push_image.sh:
--------------------------------------------------------------------------------
 1 | set -euox pipefail
 2 | SCRIPTS_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" && pwd )"
 3 | DATE=$(date +%Y%m%d)
 4 | : ${PROJECT_ID:=cloud-tpu-multipod-dev}
 5 | : ${IMAGE:=gcr.io/${PROJECT_ID}/${USER}-pytorch-xla-moe-${DATE}}
 6 | : ${DOCKER_BUILD_ARGS:=""}
 7 | 
 8 | pushd ${SCRIPTS_DIR}
 9 | 
10 | docker build --network host \
11 |   --file Dockerfile \
12 |   --tag ${IMAGE} \
13 |   ${DOCKER_BUILD_ARGS} \
14 |   .
15 | popd
16 | 
17 | docker push ${IMAGE}
18 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/download_dataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright 2024 Google LLC
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |      https://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | import sys
18 | from huggingface_hub import snapshot_download
19 | 
20 | snapshot_download(repo_id=sys.argv[1], local_dir=sys.argv[2])
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/helm_context/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: megatron_moe_benchmark
3 | description: megatron_moe_benchmark
4 | type: application
5 | version: 0.1.0
6 | appVersion: "1.16.0"


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/mixtral80.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "MixtralForCausalLM"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 1,
 7 |   "eos_token_id": 2,
 8 |   "hidden_act": "silu",
 9 |   "hidden_size": 4096,
10 |   "initializer_range": 0.02,
11 |   "intermediate_size": 14336,
12 |   "max_position_embeddings": 32768,
13 |   "model_type": "mixtral",
14 |   "num_attention_heads": 32,
15 |   "num_experts_per_tok": 2,
16 |   "num_hidden_layers": 1,
17 |   "num_key_value_heads": 8,
18 |   "num_local_experts": 8,
19 |   "output_router_logits": false,
20 |   "rms_norm_eps": 1e-05,
21 |   "rope_theta": 1000000.0,
22 |   "router_aux_loss_coef": 0.02,
23 |   "sliding_window": null,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.36.0.dev0",
27 |   "use_cache": true,
28 |   "vocab_size": 32000
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/mixtral822-instruct.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "MixtralForCausalLM"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 1,
 7 |   "eos_token_id": 2,
 8 |   "hidden_act": "silu",
 9 |   "hidden_size": 6144,
10 |   "initializer_range": 0.02,
11 |   "intermediate_size": 16384,
12 |   "max_position_embeddings": 65536,
13 |   "model_type": "mixtral",
14 |   "num_attention_heads": 48,
15 |   "num_experts_per_tok": 2,
16 |   "num_hidden_layers": 56,
17 |   "num_key_value_heads": 8,
18 |   "num_local_experts": 8,
19 |   "output_router_logits": false,
20 |   "rms_norm_eps": 1e-05,
21 |   "rope_theta": 1000000,
22 |   "router_aux_loss_coef": 0.001,
23 |   "sliding_window": null,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.38.0",
27 |   "use_cache": true,
28 |   "vocab_size": 32768
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/mixtral822.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "MixtralForCausalLM"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 1,
 7 |   "eos_token_id": 2,
 8 |   "hidden_act": "silu",
 9 |   "hidden_size": 6144,
10 |   "initializer_range": 0.02,
11 |   "intermediate_size": 16384,
12 |   "max_position_embeddings": 65536,
13 |   "model_type": "mixtral",
14 |   "num_attention_heads": 48,
15 |   "num_experts_per_tok": 2,
16 |   "num_hidden_layers": 56,
17 |   "num_key_value_heads": 8,
18 |   "num_local_experts": 8,
19 |   "output_router_logits": false,
20 |   "rms_norm_eps": 1e-05,
21 |   "rope_theta": 1000000,
22 |   "router_aux_loss_coef": 0.001,
23 |   "sliding_window": null,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.38.0",
27 |   "use_cache": true,
28 |   "vocab_size": 32000
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/retired_benchmarks/mixtral8x22b/mixtral87.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "MixtralForCausalLM"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 1,
 7 |   "eos_token_id": 2,
 8 |   "hidden_act": "silu",
 9 |   "hidden_size": 4096,
10 |   "initializer_range": 0.02,
11 |   "intermediate_size": 14336,
12 |   "max_position_embeddings": 32768,
13 |   "model_type": "mixtral",
14 |   "num_attention_heads": 32,
15 |   "num_experts_per_tok": 2,
16 |   "num_hidden_layers": 32,
17 |   "num_key_value_heads": 8,
18 |   "num_local_experts": 8,
19 |   "output_router_logits": false,
20 |   "rms_norm_eps": 1e-05,
21 |   "rope_theta": 1000000.0,
22 |   "router_aux_loss_coef": 0.02,
23 |   "sliding_window": null,
24 |   "tie_word_embeddings": false,
25 |   "torch_dtype": "bfloat16",
26 |   "transformers_version": "4.36.0.dev0",
27 |   "use_cache": true,
28 |   "vocab_size": 32000
29 | }
30 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ncf/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ncf/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FROM_IMAGE_NAME=pytorch/pytorch:1.0.1-cuda10.0-cudnn7-runtime
 2 | FROM ${FROM_IMAGE_NAME}
 3 | 
 4 | # Install Python dependencies
 5 | WORKDIR /workspace/recommendation
 6 | 
 7 | COPY requirements.txt .
 8 | RUN pip install -r requirements.txt
 9 | 
10 | COPY negative_sampling_cpp ./negative_sampling_cpp
11 | WORKDIR /workspace/recommendation/negative_sampling_cpp
12 | RUN python setup.py install
13 | 
14 | # Copy NCF code and build
15 | WORKDIR /workspace/recommendation
16 | COPY . .
17 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ncf/negative_sampling_cpp/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import CppExtension, BuildExtension
3 | 
4 | setup(name='negative_sampling',
5 |       ext_modules=[CppExtension('negative_sampling', ['negative_sampling.cpp'])],
6 |       cmdclass={'build_ext': BuildExtension})
7 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ncf/negative_sampling_cpp/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import negative_sampling
 3 | 
 4 | n_positives = 1000
 5 | n_users = 10
 6 | n_items = 500
 7 | users = torch.randint(size=[n_positives, 1], low=0, high=n_users)
 8 | items = torch.randint(size=[n_positives, 1], low=0, high=n_items)
 9 | 
10 | positives = torch.cat([users, items], dim=1)
11 | positives, _ = torch.sort(positives, dim=1)
12 | positives, _ = torch.sort(positives, dim=0)
13 | 
14 | print("positives: ", positives)
15 | 
16 | 
17 | sampler = negative_sampling.NegativeSampler(positives, n_users, n_items)
18 | train_negatives = sampler.generate_train(4)
19 | test_negatives = sampler.generate_test(20)
20 | 
21 | print(train_negatives)
22 | print(test_negatives)
23 | 
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ncf/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm==4.20.0
2 | scipy
3 | torch
4 | numpy
5 | numpy_indexed
6 | pandas
7 | mlperf_compliance==0.0.10
8 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/sentiment_analysis/download_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Use the python script to download the IMDB dataset
4 | python download.py
5 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/sentiment_analysis/paddle/run_and_time.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Start timing
 4 | start_time=$(date +%s)
 5 | start_fmt=$(date +%Y-%m-%d\ %r)
 6 | echo "STARTING TIMING RUN AT $start_fmt"
 7 | 
 8 | seed=$1
 9 | echo "Running sentiment benchmark with seed $seed"
10 | 
11 | # Train a sentiment_analysis model (default: conv model), with a user
12 | # specified seed
13 | python train.py -s ${seed}
14 | 
15 | # End timing
16 | end_time=$(date +%s)
17 | end_fmt=$(date +%Y-%m-%d\ %r)
18 | echo "ENDING TIMING RUN AT $end_fmt"
19 | 
20 | # Report result
21 | result=$(( ${end_time} - ${start_time} ))
22 | result_name="sentiment"
23 | 
24 | echo "RESULT,$result_name,$seed,$result,$USER,$start_fmt"
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/sentiment_analysis/verify.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import hashlib
 4 | import os
 5 | 
 6 | URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
 7 | MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
 8 | DIR = os.path.expanduser('~/.cache/paddle/dataset/imdb')
 9 | PATH = os.path.join(DIR, URL.split('/')[-1])
10 | CHUNK_SIZE = 4096
11 | 
12 | def md5content(fname):
13 |     hash_md5 = hashlib.md5()
14 |     f = open(fname, "rb")
15 |     for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
16 |         hash_md5.update(chunk)
17 |     f.close()
18 |     return hash_md5.hexdigest()
19 | 
20 | # Verify MD5 checksum
21 | def verify():
22 |     if md5content(PATH) == MD5:
23 |         print("PASSED!")
24 |     else:
25 |         print("FAILED")
26 | 
27 | if __name__ == "__main__":
28 |     verify()
29 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/sentiment_analysis/verify_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Use the python script to verify the MD5 checksum 
4 | # of the downloaded dataset.
5 | python verify.py
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/__init__.py:
--------------------------------------------------------------------------------
1 | import data
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/data/.gitignore:
--------------------------------------------------------------------------------
1 | an4_dataset/
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import data_loader
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/data/data-LibriSpeech-ref-cksum.out:
--------------------------------------------------------------------------------
1 | 2730530160 113699829760 data-LibriSpeech-ref.tar
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/download_dataset.sh:
--------------------------------------------------------------------------------
1 | # Script to download Librispeech Dataset
2 | #for script testing, only fetch minimal, clean dataset
3 | 
4 | python data/librispeech.py #--files_to_use train-clean-100.tar.gz,dev-clean.tar.gz,test-clean.tar.gz
5 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/labels.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   "_",
 3 |   "'",
 4 |   "A",
 5 |   "B",
 6 |   "C",
 7 |   "D",
 8 |   "E",
 9 |   "F",
10 |   "G",
11 |   "H",
12 |   "I",
13 |   "J",
14 |   "K",
15 |   "L",
16 |   "M",
17 |   "N",
18 |   "O",
19 |   "P",
20 |   "Q",
21 |   "R",
22 |   "S",
23 |   "T",
24 |   "U",
25 |   "V",
26 |   "W",
27 |   "X",
28 |   "Y",
29 |   "Z",
30 |   " "
31 | ]


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/pytorch/.gitignore:
--------------------------------------------------------------------------------
1 | logs/
2 | models/
3 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/pytorch/docker/base.gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 2 | 
 3 | WORKDIR /tmp
 4 | 
 5 | # Generic python installations
 6 | # PyTorch Audio for DeepSpeech: https://github.com/SeanNaren/deepspeech.pytorch/releases
 7 | # Development environment installations
 8 | RUN apt-get update && apt-get install -y \
 9 |   apt-utils \
10 |   python \
11 |   python-pip \
12 |   sox \
13 |   libsox-dev \
14 |   libsox-fmt-all \
15 |   git \
16 |   cmake \
17 |   tree \
18 |   htop \
19 |   bmon \
20 |   iotop \
21 |   tmux \
22 |   vim \
23 |   g++
24 | 
25 | RUN which g++
26 | 
27 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/pytorch/docker/build-docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nvidia-docker build . --rm -f Dockerfile.gpu -t ds2-cuda9cudnn7:gpu
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/pytorch/docker/run-dev.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | nvidia-docker run \
3 |   -v /mnt/disk/mnt_dir:/mnt/disk/mnt_dir:rw \
4 |   -v /etc/passwd:/etc/passwd:ro \
5 |   -it --rm --user $(id -u) ds2-cuda9cudnn7:gpu
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/pytorch/run_and_time.sh:
--------------------------------------------------------------------------------
1 | # Script to train and time DeepSpeech 2 implementation
2 | 
3 | RANDOM_SEED=1
4 | TARGET_ACC=23
5 | 
6 | python train.py --model_path models/deepspeech_t$RANDOM_SEED.pth.tar --seed $RANDOM_SEED --acc $TARGET_ACC
7 | 


--------------------------------------------------------------------------------
/retired_benchmarks/never-adopted/speech_recognition/verify_dataset.sh:
--------------------------------------------------------------------------------
 1 | # Script to verify the dataset
 2 | 
 3 | #generate tar, this takes a few minutes
 4 | tar -cf data-LibriSpeech-ref.tar LibriSpeech_dataset
 5 | 
 6 | #generate checksum on tar, this takes a few minutes
 7 | cksum data-LibriSpeech-ref.tar > data-LibriSpeech-cksum.out
 8 | 
 9 | #check against ref checksum and report success/failure
10 | cmp --silent data-LibriSpeech-cksum.out data/data-LibriSpeech-ref-cksum.out && echo 'Dataset Checksum Passed.' || echo 'WARNING: Dataset Checksum Failed.'
11 | 
12 | #remove generated checksum and tar
13 | rm data-LibriSpeech-ref.tar
14 | rm data-LibriSpeech-cksum.out
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 2 | 
 3 | 
 4 | WORKDIR /research
 5 | 
 6 | RUN apt-get update
 7 | 
 8 | RUN apt-get update && apt-get install -y --no-install-recommends \
 9 |     ca-certificates \
10 |     build-essential \
11 |     git \
12 |     python \
13 |     python-pip
14 | 
15 | 
16 | ENV HOME /research
17 | ENV PYENV_ROOT $HOME/.pyenv
18 | ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
19 | 
20 | 
21 | RUN apt-get install -y python-setuptools
22 | 
23 | RUN apt-get install -y python-pip python3-pip virtualenv htop
24 | RUN pip3 install --upgrade numpy scipy sklearn tf-nightly-gpu
25 | 
26 | 
27 | # Mount data into the docker
28 | ADD . /research/resnet
29 | 
30 | 
31 | WORKDIR /research/resnet
32 | RUN pip3 install -r official/requirements.txt
33 | 
34 | 
35 | ENTRYPOINT ["/bin/bash"]
36 | 
37 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/README.md:
--------------------------------------------------------------------------------
 1 | Install
 2 | ==========
 3 | 
 4 | In order to run this, you must first set stuff up... for now see Transformer's README.
 5 | 
 6 | 
 7 | Downlaoding Data
 8 | ==========
 9 | 
10 | Downloading data is TBD.
11 | 
12 | 
13 | Processing Data
14 | =============
15 | 
16 | TBD.
17 | 
18 | 
19 | Running the Benchmark
20 | ============
21 | 
22 | You first must build the docker file;
23 | 
24 |     docker build .
25 | 
26 | 
27 | Remember the image name/number.
28 | 
29 | 
30 | 1. Make sure /imn on the host contains the pre-processed data. (Scripts for this TODO).
31 | 2. Choose your random seed (below we use 77)
32 | 3. Enter your docker's image name (below we use 5ca81979cbc2 which you don't have)
33 | 
34 | Then, executute the following:
35 | 
36 |     sudo docker run -v /imn:/imn --runtime=nvidia -t -i 5ca81979cbc2 "./run_and_time.sh" 77 | tee benchmark.log
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/.gitignore:
--------------------------------------------------------------------------------
1 | MNIST-data
2 | labels.txt
3 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | # Docker image for running examples in Tensorflow models.
 2 | # base_image depends on whether we are running on GPUs or non-GPUs
 3 | FROM ubuntu:latest
 4 | 
 5 | RUN apt-get update && apt-get install -y --no-install-recommends \
 6 |     ca-certificates \
 7 |     build-essential \
 8 |     git \
 9 |     python \
10 |     python-pip \
11 |     python-setuptools
12 | 
13 | RUN pip install tf-nightly
14 | 
15 | # Checkout tensorflow/models at HEAD
16 | RUN git clone https://github.com/tensorflow/models.git /tensorflow_models
17 | 
18 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | # Docker image for running examples in Tensorflow models.
 2 | # base_image depends on whether we are running on GPUs or non-GPUs
 3 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 4 | 
 5 | RUN apt-get update && apt-get install -y --no-install-recommends \
 6 |     ca-certificates \
 7 |     build-essential \
 8 |     git \
 9 |     python \
10 |     python-pip \
11 |     python-setuptools
12 | 
13 | RUN pip install tf-nightly-gpu
14 | 
15 | # Checkout tensorflow/models at HEAD
16 | RUN git clone https://github.com/tensorflow/models.git /tensorflow_models
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/requirements.txt:
--------------------------------------------------------------------------------
1 | psutil>=5.4.3
2 | py-cpuinfo>=3.3.0
3 | google-cloud-bigquery>=0.31.0


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/resnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/resnet/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/arg_parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/arg_parsers/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/export/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/export/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/logs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/logs/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/misc/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/results.json:
--------------------------------------------------------------------------------
1 | [1, 1, 0.4701630473136902, 0.4701630473136902, 0.4701630473136902]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/dense/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180325", "v1.7.0-rc1-750-g6c1737e6c8"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
1 | ʼ|?


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/results.json:
--------------------------------------------------------------------------------
1 | [0.9872556924819946]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/reference_data_test/uniform_random/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180325", "v1.7.0-rc1-750-g6c1737e6c8"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.08920872211456299, 0.8918969631195068, 4064.7060546875, 32, 4, 4, 8, 0.0, 0.10715862363576889, 2344.4775390625]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-1_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.918815016746521, 0.1826801300048828, 4064.4677734375, 32, 4, 4, 8, -1.3153012990951538, 0.011247094720602036, 261.84716796875]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_projection_version-2_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.1677999496459961, 0.7767924070358276, 4089.44189453125, 32, 8, 8, 4, 0.8615571856498718, 1.1359407901763916, 5806.876953125]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-1_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.8239736557006836, 0.3485994338989258, 4108.87548828125, 32, 8, 8, 4, 0.16798323392868042, -0.2975311279296875, 2860.068359375]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_bottleneck_version-2_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.5349493026733398, 0.5126370191574097, 4070.01220703125, 32, 4, 4, 8, 0.0, 2.7680201530456543, 2341.23486328125]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-1_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.7820245027542114, 0.8173515796661377, 4095.256591796875, 32, 4, 4, 8, 0.0679062008857727, 0.009305447340011597, -137.36178588867188]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_projection_version-2_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.23128163814544678, 0.22117376327514648, 4100.51806640625, 32, 8, 8, 4, 1.1768392324447632, 0.2728465795516968, 5832.6416015625]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-1_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/results.json:
--------------------------------------------------------------------------------
1 | [32, 8, 8, 4, 0.7616699934005737, 0.5485763549804688, 4106.8720703125, 32, 8, 8, 4, -0.056346118450164795, 0.5792689919471741, 2972.37255859375]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch-size-32_building_version-2_width-8_channels-4/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/expected_graph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/expected_graph


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/model.ckpt.index


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/results.json:
--------------------------------------------------------------------------------
1 | [32, 16, 16, 3, 0.9722558259963989, 0.18413543701171875, 12374.20703125, 32, 16, 16, 3, 1.6126631498336792, -1.096894383430481, -0.041595458984375]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/official/utils/testing/reference_data/resnet/batch_norm/tf_version.json:
--------------------------------------------------------------------------------
1 | ["1.8.0-dev20180408", "v1.7.0-1345-gb874783ccd"]


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/preprocess.sh:
--------------------------------------------------------------------------------
1 | # TODO
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/requirements.txt:
--------------------------------------------------------------------------------
1 | psutil>=5.4.3
2 | py-cpuinfo>=3.3.0
3 | google-cloud-bigquery>=0.31.0
4 | mlperf_compliance==0.0.6
5 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf1/run.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | RANDOM_SEED=$1
 4 | QUALITY=$2
 5 | set -e
 6 | 
 7 | # Register the model as a source root
 8 | export PYTHONPATH="$(pwd):${PYTHONPATH}"
 9 | 
10 | MODEL_DIR="/tmp/resnet_imagenet_${RANDOM_SEED}"
11 | 
12 | python3 official/resnet/imagenet_main.py $RANDOM_SEED --data_dir /imn/imagenet/combined/  \
13 |   --model_dir $MODEL_DIR --train_epochs 10000 --stop_threshold $QUALITY --batch_size 64 \
14 |   --version 1 --resnet_size 50 --epochs_between_evals 4
15 | 
16 | # To run on 8xV100s, instead run:
17 | #python3 official/resnet/imagenet_main.py $RANDOM_SEED --data_dir /imn/imagenet/combined/ \
18 | #   --model_dir $MODEL_DIR --train_epochs 10000 --stop_threshold $QUALITY --batch_size 1024 \
19 | #   --version 1 --resnet_size 50 --dtype fp16 --num_gpus 8 \
20 | #   --epochs_between_evals 4
21 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf2/download_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # TODO
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf2/tensorflow2/tf2_common/utils/flags/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf2/tensorflow2/tf2_common/utils/flags/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf2/tensorflow2/tf2_common/utils/logs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/resnet-tf2/tensorflow2/tf2_common/utils/logs/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf2/tensorflow2/tf2_common/utils/mlp_log/__init__.py:
--------------------------------------------------------------------------------
1 | """MLP Logging."""
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/resnet-tf2/verify_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # TODO
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/.dockerignore:
--------------------------------------------------------------------------------
1 | checkpoints/
2 | tb_*/
3 | results/
4 | __pycache__
5 | _legacy/
6 | lightning_logs/
7 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/NOTICE:
--------------------------------------------------------------------------------
1 | RNN-T in PyTorch
2 | 
3 | This repository includes source code (in "rnnt/") from:
4 | * https://github.com/keithito/tacotron and https://github.com/ryanleary/patter licensed under MIT license.
5 | 
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/rnnt/pytorch/common/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/common/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .helpers import *
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/common/data/dali/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #           http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/common/data/helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #           http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from common.data.dali.data_loader import DaliDataLoader
16 | 
17 | 
18 | def dataset_size(dataset):
19 |     if isinstance(dataset, DaliDataLoader): # DALI
20 |         return dataset.dataset_size
21 |     else: # PyTorch
22 |         return dataset.sampler.num_samples
23 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/common/text/symbols.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Keith Ito
 2 | """ from https://github.com/keithito/tacotron """
 3 | 
 4 | '''
 5 | Defines the set of symbols used in text input to the model.
 6 | 
 7 | The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. '''
 8 | from . import cmudict
 9 | 
10 | _pad        = '_'
11 | _punctuation = '!\'(),.:;? '
12 | _special = '-'
13 | _letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
14 | 
15 | # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
16 | _arpabet = ['@' + s for s in cmudict.valid_symbols]
17 | 
18 | # Export all symbols:
19 | symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.3'
 2 | services:
 3 |     test:
 4 |         deploy:
 5 |             resources:
 6 |                 reservations:
 7 |                     devices:
 8 |                         - capabilities:
 9 |                             - gpu
10 |         build:
11 |             context: .
12 |             dockerfile: tests/Dockerfile
13 |         volumes:
14 |             - .:/code
15 |             - /mnt/mwawrzos/storage/datasets/LibriSpeech/LibriSpeech:/datasets/LibriSpeech
16 |         stdin_open: true
17 |         tty: true
18 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/mlperf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/rnnt/pytorch/mlperf/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/requirements.txt:
--------------------------------------------------------------------------------
 1 | https://github.com/NVIDIA/dllogger/archive/26a0f8f1958de2c0c460925ff6102a4d2486d6cc.zip
 2 | https://github.com/mlcommons/logging/archive/d08740cadb4188a5ebeb84ad6c68f98c1e129805.zip
 3 | tensorboard==2.3.0
 4 | unidecode==1.1.1
 5 | inflect==4.1.0
 6 | soundfile==0.10.3.post1
 7 | librosa==0.8.0
 8 | sox==1.4.1
 9 | sentencepiece==0.1.94
10 | pandas==1.1.5
11 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/scripts/docker/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker build . --rm -t mlperf/rnn_speech_recognition
4 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/scripts/inference_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | set -a
18 | 
19 | : ${CUDNN_BENCHMARK:=true}
20 | : ${MAX_DURATION:=36}
21 | : ${PAD_TO_MAX_DURATION:=true}
22 | 
23 | bash ./scripts/inference.sh "$@"
24 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/tests/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:20.10-py3
 2 | 
 3 | COPY tests/requirements.txt .
 4 | RUN pip install --upgrade pip && \
 5 |     pip install -r requirements.txt
 6 | 
 7 | COPY requirements.txt .
 8 | RUN pip install -r requirements.txt
 9 | 
10 | 
11 | 
12 | WORKDIR /code
13 | 
14 | CMD bash
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==6.1.2
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/rnnt/pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/utils/inference_librispeech.csv:
--------------------------------------------------------------------------------
1 | url,md5
2 | http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1
3 | http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931
4 | http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9
5 | http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/rnnt/pytorch/utils/librispeech.csv:
--------------------------------------------------------------------------------
1 | url,md5
2 | http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1
3 | http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931
4 | http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9
5 | http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135
6 | http://www.openslr.org/resources/12/train-clean-100.tar.gz,2a93770f6d5c6c964bc36631d331a522
7 | http://www.openslr.org/resources/12/train-clean-360.tar.gz,c0e676e450a7ff2f54aeade5171606fa
8 | http://www.openslr.org/resources/12/train-other-500.tar.gz,d1a0fd59409feb2c614ce4d30c387708
9 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ssd-v1/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime
 2 | 
 3 | # Set working directory
 4 | WORKDIR /mlperf/ssd
 5 | 
 6 | # Necessary zone info for tzdata
 7 | ENV TZ=America/New_York
 8 | RUN ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime
 9 | 
10 | # Install system dependencies
11 | RUN apt-get update && \
12 |     apt-get install -y python3-tk python-pip numactl git
13 | 
14 | RUN pip install --upgrade pip
15 | 
16 | # Necessary pip packages
17 | COPY requirements.txt /requirements.txt
18 | 
19 | RUN pip install --no-cache-dir cython \
20 |  && pip install --no-cache-dir https://github.com/mlperf/logging/archive/9ea0afa.zip \
21 |  && pip install --no-cache-dir -r /requirements.txt
22 | 
23 | # Copy SSD code
24 | COPY ssd .
25 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ssd-v1/download_dataset.sh:
--------------------------------------------------------------------------------
1 | # Get COCO 2017 data sets
2 | dir=$(pwd)
3 | mkdir /coco; cd /coco
4 | curl -O http://images.cocodataset.org/zips/train2017.zip; unzip train2017.zip
5 | curl -O http://images.cocodataset.org/zips/val2017.zip; unzip val2017.zip
6 | curl -O http://images.cocodataset.org/annotations/annotations_trainval2017.zip; unzip annotations_trainval2017.zip
7 | cd $dir
8 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ssd-v1/download_resnet34_backbone.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ssd/
4 | curl -O https://download.pytorch.org/models/resnet34-333f7ec4.pth
5 | 
6 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ssd-v1/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython==0.28.4
 2 | git+git://github.com/NVIDIA/apex.git@9041a868a1a253172d94b113a963375b9badd030#egg=apex
 3 | mlperf-compliance==0.0.10
 4 | cycler==0.10.0
 5 | kiwisolver==1.0.1
 6 | matplotlib==2.2.2
 7 | numpy==1.19.1
 8 | Pillow==5.2.0
 9 | pyparsing==2.2.0
10 | python-dateutil==2.7.3
11 | pytz==2018.5
12 | six==1.11.0
13 | torchvision==0.2.1
14 | pycocotools==2.0.2
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ssd-v1/ssd/config_DGX1_32.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ## DL params
 4 | EXTRA_PARAMS=(
 5 |                --batch-size      "32"
 6 |              )
 7 | 
 8 | ## System run parms
 9 | DGXNNODES=1
10 | DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
11 | WALLTIME=24:00:00
12 | 
13 | ## System config params
14 | DGXNGPU=1
15 | DGXSOCKETCORES=4
16 | DGXNSOCKET=1
17 | DGXHT=1 	# HT is on is 2, HT off is 1
18 | DGXIBDEVICES=''
19 | 


--------------------------------------------------------------------------------
/retired_benchmarks/ssd-v1/ssd/config_DGX1_singlenode.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ## DL params
 4 | EXTRA_PARAMS=(
 5 |                --batch-size      "128"
 6 |                --warmup          "2.619685" # 300 iterations * 8 GPUs * 1 nodes * 128 batch size / 117266 non-empty images
 7 |              )
 8 | 
 9 | ## System run parms
10 | DGXNNODES=1
11 | DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
12 | WALLTIME=12:00:00
13 | 
14 | ## System config params
15 | DGXNGPU=8
16 | DGXSOCKETCORES=20
17 | DGXNSOCKET=1
18 | DGXHT=1 	# HT is on is 2, HT off is 1
19 | DGXIBDEVICES=''
20 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/download_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | wget https://raw.githubusercontent.com/tensorflow/models/master/official/transformer/test_data/newstest2014.en -O tensorflow/newstest2014.en
4 | wget https://raw.githubusercontent.com/tensorflow/models/master/official/transformer/test_data/newstest2014.de -O tensorflow/newstest2014.de
5 | 
6 | python3 data_download.py --raw_dir raw_data
7 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 2 | WORKDIR /research
 3 | RUN apt-get update
 4 | RUN apt-get update && apt-get install -y --no-install-recommends \
 5 |     ca-certificates \
 6 |     build-essential \
 7 |     git \
 8 |     python \
 9 |     python-pip
10 | ENV HOME /research
11 | ENV PYENV_ROOT $HOME/.pyenv
12 | ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
13 | RUN apt-get install -y python-setuptools
14 | RUN apt-get install -y python-pip python3-pip virtualenv htop
15 | RUN pip3 install --upgrade numpy scipy sklearn tensorflow-gpu==1.9.0
16 | 
17 | ENV LANG C.UTF-8
18 | ENV LC_ALL C.UTF-8
19 | 
20 | # Mount data into the docker
21 | ADD . /research/transformer
22 | WORKDIR /research/transformer
23 | RUN pip3 install -r requirements.txt
24 | ENTRYPOINT ["/bin/bash"]
25 | 
26 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/bert/README.md:
--------------------------------------------------------------------------------
1 | README
2 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/bert/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2018 The Google AI Language Team Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.2.0
 2 | astor==0.6.2
 3 | bleach==1.5.0
 4 | cachetools==2.0.1
 5 | certifi==2018.4.16
 6 | chardet==3.0.4
 7 | gast==0.2.0
 8 | google-api-core==1.1.1
 9 | google-auth==1.4.1
10 | google-cloud-bigquery==1.1.0
11 | google-cloud-core==0.28.1
12 | google-resumable-media==0.3.1
13 | googleapis-common-protos==1.5.3
14 | grpcio==1.11.0
15 | html5lib==0.9999999
16 | idna==2.6
17 | Markdown==2.6.11
18 | numpy==1.14.2
19 | protobuf==3.6.0
20 | psutil==5.4.5
21 | py-cpuinfo==4.0.0
22 | pyasn1==0.4.2
23 | pyasn1-modules==0.2.1
24 | pytz==2018.4
25 | requests==2.18.4
26 | rsa==3.4.2
27 | scikit-learn==0.19.1
28 | scipy==1.0.1
29 | six==1.11.0
30 | sklearn==0.0
31 | tb-nightly==1.8.0a20180420
32 | termcolor==1.1.0
33 | urllib3==1.22
34 | virtualenv==15.0.1
35 | Werkzeug==0.14.1
36 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/run_preprocessing.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | SEED=$1
 6 | 
 7 | cd /research/transformer
 8 | 
 9 | # TODO: Add SEED to process_data.py since this uses a random generator (future PR)
10 | export PYTHONPATH=/research/transformer/transformer:${PYTHONPATH}
11 | # Add compliance to PYTHONPATH
12 | # export PYTHONPATH=/mlperf/training/compliance:${PYTHONPATH}
13 | 
14 | python3 process_data.py --raw_dir /raw_data/ --data_dir processed_data
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/run_training.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | SEED=$1
 6 | QUALITY=$2
 7 | 
 8 | cd /research/transformer
 9 | 
10 | export PYTHONPATH=/research/transformer/transformer:${PYTHONPATH}
11 | # Add compliance to PYTHONPATH
12 | # export PYTHONPATH=/mlperf/training/compliance:${PYTHONPATH}
13 | 
14 | python3 transformer/transformer_main.py --random_seed=${SEED} --data_dir=processed_data/ --model_dir=model --params=big --bleu_threshold ${QUALITY} --bleu_source=newstest2014.en --bleu_ref=newstest2014.de
15 | 


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/transformer/tensorflow/transformer/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/transformer/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/transformer/tensorflow/transformer/model/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/transformer/tensorflow/transformer/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/retired_benchmarks/transformer/tensorflow/transformer/utils/__init__.py


--------------------------------------------------------------------------------
/retired_benchmarks/unet3d/pytorch/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FROM_IMAGE_NAME=pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
 2 | #ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:21.02-py3
 3 | FROM ${FROM_IMAGE_NAME}
 4 | 
 5 | ADD . /workspace/unet3d
 6 | WORKDIR /workspace/unet3d
 7 | 
 8 | RUN apt-get update && \
 9 |     apt-get upgrade -y && \
10 |     apt-get install -y git
11 | RUN apt-get install -y vim
12 | 
13 | RUN pip install --upgrade pip
14 | RUN pip install --disable-pip-version-check -r requirements.txt
15 | 
16 | #RUN pip uninstall -y apex; pip uninstall -y apex; git clone --branch seryilmaz/fused_dropout_softmax  https://github.com/seryilmaz/apex.git; cd apex;  pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--xentropy" --global-option="--deprecated_fused_adam" --global-option="--deprecated_fused_lamb" --global-option="--fast_multihead_attn" .
17 | 


--------------------------------------------------------------------------------
/retired_benchmarks/unet3d/pytorch/evaluation_cases.txt:
--------------------------------------------------------------------------------
 1 | 00000
 2 | 00003
 3 | 00005
 4 | 00006
 5 | 00012
 6 | 00024
 7 | 00034
 8 | 00041
 9 | 00044
10 | 00049
11 | 00052
12 | 00056
13 | 00061
14 | 00065
15 | 00066
16 | 00070
17 | 00076
18 | 00078
19 | 00080
20 | 00084
21 | 00086
22 | 00087
23 | 00092
24 | 00111
25 | 00112
26 | 00125
27 | 00128
28 | 00138
29 | 00157
30 | 00160
31 | 00161
32 | 00162
33 | 00169
34 | 00171
35 | 00176
36 | 00185
37 | 00187
38 | 00189
39 | 00198
40 | 00203
41 | 00206
42 | 00207


--------------------------------------------------------------------------------
/retired_benchmarks/unet3d/pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/NVIDIA/dllogger
2 | https://github.com/mlcommons/logging/archive/refs/tags/1.1.0-rc4.zip
3 | nibabel==3.2.1
4 | scipy==1.5.2


--------------------------------------------------------------------------------
/single_stage_detector/.dockerignore:
--------------------------------------------------------------------------------
1 | mlcube/workspace/


--------------------------------------------------------------------------------
/single_stage_detector/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-image>=0.15.0
2 | ujson>=4.0.2
3 | matplotlib>=3.5.1
4 | pycocotools>=2.0.4
5 | git+https://github.com/mlcommons/logging.git@1.1.0-rc4
6 | fiftyone==0.15.1
7 | 


--------------------------------------------------------------------------------
/single_stage_detector/scripts/download_backbone.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DOWNLOAD_LINK='https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth'
 4 | SHA512='15c9f0bc1c8d64750712f86ffaded3b0bc6a87e77a395dcda3013d8af65b7ebf3ca1c24dd3aae60c0d83e510b4d27731f0526b6f9392c0a85ffc18e5fecd8a13'
 5 | FILENAME='resnext50_32x4d-7cdf4587.pth'
 6 | FOLDER_PATH="./"
 7 | 
 8 | # Handle MLCube parameters
 9 | while [ $# -gt 0 ]; do
10 |   case "$1" in
11 |     --model_dir=*)
12 |       FOLDER_PATH="${1#*=}"
13 |       ;;
14 |     *)
15 |   esac
16 |   shift
17 | done
18 | 
19 | wget -c $DOWNLOAD_LINK -P $FOLDER_PATH
20 | echo "${SHA512}  ${FOLDER_PATH}/${FILENAME}" | sha512sum -c
21 | 


--------------------------------------------------------------------------------
/single_stage_detector/scripts/download_openimages_demo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | : "${DATASET_PATH:=/datasets/open-images-v6-mlperf}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |   case $1 in
 7 |   -d | --dataset-path)
 8 |     shift
 9 |     DATASET_PATH=$1
10 |     ;;
11 |   --data_dir=*)
12 |     if [[ "$PWD" = /workspace/single_stage_detector/ssd ]]; then
13 |       cd ../scripts
14 |       DATASET_PATH="${1#*=}"
15 |     fi
16 |     ;;
17 |   esac
18 |   shift
19 | done
20 | 
21 | echo "saving to"
22 | echo $DATASET_PATH
23 | ls $DATASET_PATH
24 | 
25 | MLPERF_CLASSES=('Apple' 'Banana')
26 | 
27 | python fiftyone_openimages.py \
28 |   --dataset-dir=${DATASET_PATH} \
29 |   --output-labels="openimages-mlperf.json" \
30 |   --classes "${MLPERF_CLASSES[@]}"
31 | 


--------------------------------------------------------------------------------
/single_stage_detector/scripts/download_openimages_full.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | : "${DATASET_PATH:=/datasets/open-images-v6}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -d | --dataset-path  )        shift
 8 |                                       DATASET_PATH=$1
 9 |                                       ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | python fiftyone_openimages.py \
15 |     --dataset-dir=${DATASET_PATH}
16 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Installer logs
 7 | pip-log.txt
 8 | pip-delete-this-directory.txt
 9 | 
10 | # Unit test / coverage reports
11 | htmlcov/
12 | .tox/
13 | .nox/
14 | .coverage
15 | .coverage.*
16 | .cache
17 | nosetests.xml
18 | coverage.xml
19 | *.cover
20 | .hypothesis/
21 | .pytest_cache/
22 | 
23 | # IPython
24 | profile_default/
25 | ipython_config.py
26 | 
27 | # Environments
28 | .env
29 | .venv
30 | env/
31 | venv/
32 | ENV/
33 | env.bak/
34 | venv.bak/
35 | 
36 | # IDEs
37 | .idea/
38 | .vscode/
39 | 
40 | # binary files
41 | *.pth
42 | *.pickle
43 | *.onnx
44 | 
45 | # Misc
46 | torch-model-cache/
47 | nogit/
48 | TODO
49 | *.log
50 | 
51 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/config_DGXA100_001x08x032.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## DL params
 4 | export BATCHSIZE=32
 5 | export NUMEPOCHS=${NUMEPOCHS:-8}
 6 | export DATASET_DIR="/datasets/open-images-v6-mlperf"
 7 | export EXTRA_PARAMS='--lr 0.0001 --output-dir=/results'
 8 | 
 9 | ## System run parms
10 | export DGXNNODES=1
11 | export DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
12 | export WALLTIME=08:00:00
13 | 
14 | ## System config params
15 | export DGXNGPU=8
16 | export DGXSOCKETCORES=64
17 | export DGXNSOCKET=2
18 | export DGXHT=2  # HT is on is 2, HT off is 1
19 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/config_DGXA100_002x08x016.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## DL params
 4 | export BATCHSIZE=16
 5 | export NUMEPOCHS=${NUMEPOCHS:-8}
 6 | export DATASET_DIR="/datasets/open-images-v6-mlperf"
 7 | export EXTRA_PARAMS='--lr 0.0001 --output-dir=/results'
 8 | 
 9 | ## System run parms
10 | export DGXNNODES=2
11 | export DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
12 | export WALLTIME=04:00:00
13 | 
14 | ## System config params
15 | export DGXNGPU=8
16 | export DGXSOCKETCORES=64
17 | export DGXNSOCKET=2
18 | export DGXHT=2  # HT is on is 2, HT off is 1
19 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/config_DGXA100_008x08x004_inference_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## DL params
 4 | export BATCHSIZE=4
 5 | export NUMEPOCHS=${NUMEPOCHS:-15}
 6 | export DATASET_DIR="/datasets/open-images-v6-mlperf"
 7 | export EXTRA_PARAMS='--lr 0.0001 --output-dir=/results --target-map 0.99'
 8 | 
 9 | ## System run parms
10 | export DGXNNODES=8
11 | export DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
12 | export WALLTIME=04:00:00
13 | 
14 | ## System config params
15 | export DGXNGPU=8
16 | export DGXSOCKETCORES=64
17 | export DGXNSOCKET=2
18 | export DGXHT=2  # HT is on is 2, HT off is 1
19 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/config_DGXA100_008x08x008.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## DL params
 4 | export BATCHSIZE=8
 5 | export NUMEPOCHS=${NUMEPOCHS:-8}
 6 | export DATASET_DIR="/datasets/open-images-v6-mlperf"
 7 | export EXTRA_PARAMS='--lr 0.0001 --output-dir=/results'
 8 | 
 9 | ## System run parms
10 | export DGXNNODES=8
11 | export DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
12 | export WALLTIME=04:00:00
13 | 
14 | ## System config params
15 | export DGXNGPU=8
16 | export DGXSOCKETCORES=64
17 | export DGXNSOCKET=2
18 | export DGXHT=2  # HT is on is 2, HT off is 1
19 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/config_DGXA100_032x08x032.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## DL params
 4 | export BATCHSIZE=32
 5 | export NUMEPOCHS=${NUMEPOCHS:-35}
 6 | export DATASET_DIR="/datasets/open-images-v6-mlperf"
 7 | export EXTRA_PARAMS='--lr 0.0001 --output-dir=/results'
 8 | 
 9 | ## System run parms
10 | export DGXNNODES=32
11 | export DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' )
12 | export WALLTIME=04:00:00
13 | 
14 | ## System config params
15 | export DGXNGPU=8
16 | export DGXSOCKETCORES=64
17 | export DGXNSOCKET=2
18 | export DGXHT=2  # HT is on is 2, HT off is 1
19 | 


--------------------------------------------------------------------------------
/single_stage_detector/ssd/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/single_stage_detector/ssd/model/__init__.py


--------------------------------------------------------------------------------
/single_stage_detector/ssd/model/image_list.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from typing import List, Tuple
 4 | 
 5 | 
 6 | class ImageList(object):
 7 |     """
 8 |     Structure that holds a list of images (of possibly
 9 |     varying sizes) as a single tensor.
10 |     This works by padding the images to the same size,
11 |     and storing in a field the original sizes of each image
12 |     """
13 | 
14 |     def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]):
15 |         """
16 |         Args:
17 |             tensors (tensor)
18 |             image_sizes (list[tuple[int, int]])
19 |         """
20 |         self.tensors = tensors
21 |         self.image_sizes = image_sizes
22 | 
23 |     def to(self, device: torch.device) -> 'ImageList':
24 |         cast_tensor = self.tensors.to(device)
25 |         return ImageList(cast_tensor, self.image_sizes)
26 | 


--------------------------------------------------------------------------------
/stable_diffusion/.dockerignore:
--------------------------------------------------------------------------------
1 | nogit/
2 | mlperf_compliance.log
3 | 


--------------------------------------------------------------------------------
/stable_diffusion/.gitignore:
--------------------------------------------------------------------------------
1 | nogit/
2 | mlperf_compliance.log
3 | 


--------------------------------------------------------------------------------
/stable_diffusion/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:24.01-py3
 2 | FROM ${FROM_IMAGE_NAME}
 3 | 
 4 | ENV DEBIAN_FRONTEND=noninteractive
 5 | ENV RCLONE_VER=v1.67.0
 6 | 
 7 | # apt dependencies
 8 | RUN apt-get update
 9 | RUN apt-get install -y ffmpeg libsm6 libxext6
10 | 
11 | # Install rclone from upstream, see https://github.com/mlcommons/training/issues/751
12 | RUN wget https://github.com/rclone/rclone/releases/download/${RCLONE_VER}/rclone-${RCLONE_VER}-linux-amd64.zip \
13 |     && unzip rclone-${RCLONE_VER}-linux-amd64.zip \
14 |     && cd rclone-${RCLONE_VER}-linux-amd64 \
15 |     && cp rclone /usr/bin/ \
16 |     && chmod 755 /usr/bin/rclone \
17 |     && rm -rf /rclone-${RCLONE_VER}-linux-amd64* \
18 |     && rclone --version
19 | 
20 | # Remove the opencv version shipped with the base image
21 | # https://github.com/opencv/opencv-python/issues/884
22 | RUN pip uninstall -y opencv
23 | RUN rm -rf /usr/local/lib/python3.10/dist-packages/cv2/
24 | 
25 | # install LDM
26 | COPY . /diffusion
27 | RUN cd /diffusion && \
28 |     pip install --no-cache-dir -r requirements.txt
29 | 


--------------------------------------------------------------------------------
/stable_diffusion/imgs/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/imgs/overview.png


--------------------------------------------------------------------------------
/stable_diffusion/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/ldm/data/__init__.py


--------------------------------------------------------------------------------
/stable_diffusion/ldm/data/tsv.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from torch.utils.data import Dataset, DataLoader
 3 | 
 4 | 
 5 | class TsvDataset(Dataset):
 6 |     def __init__(self, annotations_file, keys):
 7 |         self.df = pd.read_csv(annotations_file, sep='\t', header=0)
 8 |         self.keys = keys
 9 | 
10 |     def __len__(self):
11 |         return len(self.df)
12 | 
13 |     def __getitem__(self, idx):
14 |         sample = {}
15 |         for key in self.keys:
16 |             sample[key] = self.df[key].iloc[idx]
17 |         return sample
18 | 
19 | 
20 | def build_dataloader(annotations_file,
21 |                      keys,
22 |                      batch_size,
23 |                      shuffle=False,
24 |                      num_workers=1,
25 |                      pin_memory=True):
26 |     dataset = TsvDataset(annotations_file, keys=keys)
27 |     return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=pin_memory)
28 | 


--------------------------------------------------------------------------------
/stable_diffusion/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/stable_diffusion/ldm/models/diffusion/dpm_solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampler import DPMSolverSampler


--------------------------------------------------------------------------------
/stable_diffusion/ldm/models/diffusion/sampling_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def append_dims(x, target_dims):
 6 |     """Appends dimensions to the end of a tensor until it has target_dims dimensions.
 7 |     From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py"""
 8 |     dims_to_append = target_dims - x.ndim
 9 |     if dims_to_append < 0:
10 |         raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
11 |     return x[(...,) + (None,) * dims_to_append]
12 | 
13 | 
14 | def norm_thresholding(x0, value):
15 |     s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim)
16 |     return x0 * (value / s)
17 | 
18 | 
19 | def spatial_norm_thresholding(x0, value):
20 |     # b c h w
21 |     s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value)
22 |     return x0 * (value / s)


--------------------------------------------------------------------------------
/stable_diffusion/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/stable_diffusion/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/stable_diffusion/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/training/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/stable_diffusion/ldm/modules/fid/README.md:
--------------------------------------------------------------------------------
1 | Copied from https://github.com/mseitzer/pytorch-fid@0a754fb
2 | 


--------------------------------------------------------------------------------
/stable_diffusion/requirements.txt:
--------------------------------------------------------------------------------
 1 | albumentations==1.3.0
 2 | opencv-python-headless==4.9.0.80
 3 | pudb==2019.2
 4 | prefetch_generator==1.0.3
 5 | imageio==2.9.0
 6 | imageio-ffmpeg==0.4.2
 7 | torchmetrics==0.11.4
 8 | omegaconf==2.1.1
 9 | test-tube>=0.7.5
10 | streamlit>=0.73.1
11 | einops==0.3.0
12 | transformers==4.19.2
13 | webdataset==0.2.5
14 | open-clip-torch==2.7.0
15 | gradio==3.11
16 | lightning==1.9.4
17 | titans==0.0.7
18 | datasets==2.10.1
19 | colossalai==0.2.7
20 | invisible-watermark==0.1.5
21 | diffusers==0.14.0
22 | cloudpathlib==0.13.0
23 | xformers==0.0.24
24 | bitsandbytes==0.37.2
25 | git+https://github.com/mlcommons/logging.git@4.0.0-rc2
26 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/checkpoints/download_clip.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/checkpoints/clip}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )       shift
 8 |                                   OUTPUT_DIR=$1
 9 |                                   ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | CLIP_WEIGHTS_URL="https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin"
15 | CLIP_WEIGHTS_SHA256="9a78ef8e8c73fd0df621682e7a8e8eb36c6916cb3c16b291a082ecd52ab79cc4"
16 | 
17 | CLIP_CONFIG_URL="https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/open_clip_config.json"
18 | 
19 | wget -N -P ${OUTPUT_DIR} ${CLIP_WEIGHTS_URL}
20 | wget -N -P ${OUTPUT_DIR} ${CLIP_CONFIG_URL}
21 | echo "${CLIP_WEIGHTS_SHA256}  ${OUTPUT_DIR}/open_clip_pytorch_model.bin"                    | sha256sum -c
22 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/checkpoints/download_inception.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/checkpoints/inception}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )       shift
 8 |                                   OUTPUT_DIR=$1
 9 |                                   ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | FID_WEIGHTS_URL='https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'
15 | FID_WEIGHTS_SHA1="bd836944fd6db519dfd8d924aa457f5b3c8357ff"
16 | 
17 | wget -N -P ${OUTPUT_DIR} ${FID_WEIGHTS_URL}
18 | echo "${FID_WEIGHTS_SHA1}  ${OUTPUT_DIR}/pt_inception-2015-12-05-6726825d.pth"                    | sha1sum -c
19 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/checkpoints/download_sd.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/checkpoints/sd}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )       shift
 8 |                                   OUTPUT_DIR=$1
 9 |                                   ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | SD_WEIGHTS_URL='https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt'
15 | SD_WEIGHTS_SHA256="d635794c1fedfdfa261e065370bea59c651fc9bfa65dc6d67ad29e11869a1824"
16 | 
17 | wget -N -P ${OUTPUT_DIR} ${SD_WEIGHTS_URL}
18 | echo "${SD_WEIGHTS_SHA256}  ${OUTPUT_DIR}/512-base-ema.ckpt"                    | sha256sum -c
19 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/coco-2014-validation-download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | : "${DOWNLOAD_PATH:=/datasets/coco2014}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -d | --download-path )       shift
 8 |                                      DOWNLOAD_PATH=$1
 9 |                                      ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | mkdir -p ${DOWNLOAD_PATH}
15 | cd ${DOWNLOAD_PATH}
16 | 
17 | wget -c http://images.cocodataset.org/zips/val2014.zip
18 | wget -c http://images.cocodataset.org/annotations/annotations_trainval2014.zip
19 | 
20 | echo "fbedd73593f242db65cce6bcefde193fcedcc5c0  ./val2014.zip"                    | sha1sum -c
21 | echo "8e0b9df54c175f1688400e98d1a97f292e726870  ./annotations_trainval2014.zip"   | sha1sum -c
22 | 
23 | unzip val2014.zip
24 | unzip annotations_trainval2014.zip
25 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/coco2014-validation-download-prompts.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/datasets/coco2014}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )     shift
 8 |                                 OUTPUT_DIR=$1
 9 |                                 ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | mkdir -p ${OUTPUT_DIR}
15 | 
16 | rclone config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
17 | 
18 | rclone copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/coco2014/val2014_30k.tsv ${OUTPUT_DIR} -P
19 | 
20 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/coco2014-validation-download-stats.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/datasets/coco2014}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )     shift
 8 |                                 OUTPUT_DIR=$1
 9 |                                 ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | mkdir -p ${OUTPUT_DIR}
15 | 
16 | rclone config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
17 | 
18 | rclone copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/coco2014/val2014_30k_stats.npz ${OUTPUT_DIR} -P
19 | 
20 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/generate-fid-statistics.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${DATASET_DIR:=/datasets/coco2014/val2014_30k}"
 4 | : "${OUTPUT_FILE:=/datasets/coco2014/val2014_30k_stats.npz}"
 5 | 
 6 | while [ "$1" != "" ]; do
 7 |     case $1 in
 8 |         -d | --dataset-dir )      shift
 9 |                                   DATASET_DIR=$1
10 |                                   ;;
11 |         -o | --output-file  )     shift
12 |                                   OUTPUT_FILE=$1
13 |                                   ;;
14 |     esac
15 |     shift
16 | done
17 | 
18 | python ldm/modules/fid/fid_score.py --save-stats ${DATASET_DIR} ${OUTPUT_FILE}
19 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/laion400m-convert-images-to-moments.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | : "${INPUT_FOLDER:=/datasets/laion-400m/webdataset-filtered}"
 4 | : "${OUTPUT_FOLDER:=/datasets/laion-400m/webdataset-latents-filtered}"
 5 | 
 6 | while [ "$1" != "" ]; do
 7 |     case $1 in
 8 |         -i | --input-folder )       shift
 9 |                                     INPUT_FOLDER=$1
10 |                                     ;;
11 |         -o | --output-folder )      shift
12 |                                     OUTPUT_FOLDER=$1
13 |                                     ;;
14 |     esac
15 |     shift
16 | done
17 | 
18 | mkdir -p ${OUTPUT_FOLDER}
19 | 
20 | # Loop over each tar file in the input directory
21 | for tar_file in ${INPUT_FOLDER}/*.tar; do
22 |     file_name=$(basename "$tar_file")
23 |     base_name="${file_name%.*}"
24 |     python webdataset_images2latents.py \
25 |         --input-tar ${tar_file} \
26 |         --output-tar ${OUTPUT_FOLDER}/${base_name}.tar \
27 |         --config configs/train_512.yaml \
28 |         --ckpt /checkpoints/sd/512-base-ema.ckpt
29 | done
30 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/laion400m-download-metadata.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/datasets/laion-400m/metadata}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )     shift
 8 |                                 OUTPUT_DIR=$1
 9 |                                 ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | mkdir -p ${OUTPUT_DIR}
15 | 
16 | for i in {00000..00031}; do wget -N -P ${OUTPUT_DIR} https://the-eye.eu/public/AI/cah/laion400m-met-release/laion400m-meta/part-$i-5b54c5d5-bbcf-484d-a2ce-0d6f73df1a36-c000.snappy.parquet; done
17 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/laion400m-filter-metadata.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${INPUT_METADATA_DIR:=/datasets/laion-400m/metadata}"
 4 | : "${OUTPUT_METADATA_DIR:=/datasets/laion-400m/metadata-filtered}"
 5 | 
 6 | while [ "$1" != "" ]; do
 7 |     case $1 in
 8 |         -i | --input-metadata-dir )     shift
 9 |                                         INPUT_METADATA_DIR=$1
10 |                                         ;;
11 |         -o | --output-metadata-dir )    shift
12 |                                         OUTPUT_METADATA_DIR=$1
13 |                                         ;;
14 |     esac
15 |     shift
16 | done
17 | 
18 | mkdir -p ${OUTPUT_METADATA_DIR}
19 | 
20 | python scripts/datasets/filter-metadata.py \
21 |     --input-folder ${INPUT_METADATA_DIR} \
22 |     --output-folder ${OUTPUT_METADATA_DIR}
23 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/laion400m-filtered-download-images.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/datasets/laion-400m/webdataset-filtered}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )     shift
 8 |                                 OUTPUT_DIR=$1
 9 |                                 ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | mkdir -p ${OUTPUT_DIR}
15 | cd ${OUTPUT_DIR}
16 | 
17 | 
18 | rclone config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
19 | 
20 | rclone copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/laion-400m/images-webdataset-filtered/ ${OUTPUT_DIR} --include="*.tar" -P
21 | 
22 | rclone copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/laion-400m/images-webdataset-filtered/sha512sums.txt ${OUTPUT_DIR} -P
23 | 
24 | sha512sum --quiet -c sha512sums.txt
25 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/datasets/laion400m-filtered-download-moments.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${OUTPUT_DIR:=/datasets/laion-400m/webdataset-moments-filtered}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -o | --output-dir )     shift
 8 |                                 OUTPUT_DIR=$1
 9 |                                 ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | mkdir -p ${OUTPUT_DIR}
15 | cd ${OUTPUT_DIR}
16 | 
17 | 
18 | rclone config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
19 | 
20 | rclone copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/laion-400m/moments-webdataset-filtered/ ${OUTPUT_DIR} --include="*.tar" -P
21 | 
22 | rclone copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/laion-400m/moments-webdataset-filtered/sha512sums.txt ${OUTPUT_DIR} -P
23 | 
24 | sha512sum --quiet -c sha512sums.txt
25 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/docker/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${SRC_IMG:=nvcr.io/nvidia/pytorch:24.01-py3}"
 4 | : "${DST_IMG:=mlperf_sd:24.01-py3}"
 5 | 
 6 | while [ "$1" != "" ]; do
 7 |     case $1 in
 8 |         -s | --src-img )        shift
 9 |                                 SRC_IMG=$1
10 |                                 ;;
11 |         -d | --dst-img  )       shift
12 |                                 DST_IMG=$1
13 |                                 ;;
14 |     esac
15 |     shift
16 | done
17 | 
18 | docker build -f Dockerfile . --rm -t ${DST_IMG} --build-arg FROM_IMAGE_NAME=${SRC_IMG}
19 | 


--------------------------------------------------------------------------------
/stable_diffusion/scripts/docker/launch.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | : "${DST_IMG:=mlperf_sd:22.12-py3}"
 4 | 
 5 | while [ "$1" != "" ]; do
 6 |     case $1 in
 7 |         -d | --dst-img  )       shift
 8 |                                 DST_IMG=$1
 9 |                                 ;;
10 |     esac
11 |     shift
12 | done
13 | 
14 | docker run --rm -it --gpus=all --ipc=host \
15 |     -e PYTHONPYCACHEPREFIX=/tmp/.pycache \
16 |     --workdir /pwd \
17 |     -v ${PWD}:/pwd \
18 |     -v /datasets/laion-400m:/datasets/laion-400m \
19 |     -v /datasets/coco2014:/datasets/coco2014 \
20 |     -v /checkpoints:/checkpoints \
21 |     -v /results:/results \
22 |     ${DST_IMG} bash
23 | 


--------------------------------------------------------------------------------