├── .ci
    └── torchbench
    │   ├── check-ssh.sh
    │   ├── install-conda.sh
    │   ├── install.sh
    │   └── test.sh
├── .clang-format
├── .flake8
├── .github
    ├── scripts
    │   ├── abtest.py
    │   ├── bisection-config.sample.yaml
    │   ├── bmutils
    │   │   ├── __init__.py
    │   │   ├── analyze-bisection-result.py
    │   │   └── summarize.py
    │   ├── generate-abtest-config.py
    │   ├── run-bisection.sh
    │   ├── run-config.py
    │   ├── run.sh
    │   ├── test-repeated-runs.py
    │   └── userbenchmark
    │   │   ├── __init__.py
    │   │   ├── aicluster.py
    │   │   └── schedule-benchmarks.py
    └── workflows
    │   ├── _linux-benchmark-cuda.yml
    │   ├── _linux-test-cpu.yml
    │   ├── _linux-test-cuda.yml
    │   ├── bisection.md
    │   ├── build-gcp-docker.yml
    │   ├── build-nightly-docker.yml
    │   ├── clean-nightly-docker.yml
    │   ├── linux-test-a10g.yml
    │   ├── pr-test.yml
    │   ├── torchao.yml
    │   ├── userbenchmark-a100-bisection.yml
    │   ├── userbenchmark-a100-release.yml
    │   ├── userbenchmark-a100.yml
    │   ├── userbenchmark-ai-cluster.yml
    │   ├── userbenchmark-c5-24xlarge.yml
    │   ├── userbenchmark-ibmcloud-testrunner.yml
    │   ├── userbenchmark-regression-detector.yml
    │   ├── userbenchmark-t4-metal.yml
    │   ├── v2-bisection.yml
    │   ├── v2-nightly.yml
    │   ├── v3-bisection.yml
    │   └── v3-nightly.yml
├── .gitignore
├── .gitmodules
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── bisection.py
├── conftest.py
├── docker
    ├── build-gcp-a100-docker.sh
    ├── build-torchbench-nightly-docker.sh
    ├── gcp-a100-runner-dind.dockerfile
    ├── infra
    │   ├── README.md
    │   ├── daemonset.yaml
    │   └── values.yaml
    └── torchbench-nightly.dockerfile
├── gen_summary_metadata.py
├── install.py
├── pyproject.toml
├── regression_detector.py
├── requirements.txt
├── run.py
├── run_benchmark.py
├── run_e2e.py
├── scripts
    ├── activate_conda.sh
    ├── install_conda.sh
    ├── proper_bs.py
    ├── update_device_batch_size.py
    ├── upload_scribe.py
    ├── upload_scribe_v2.py
    └── userbenchmark
    │   ├── upload_s3.py
    │   ├── upload_s3_csv.py
    │   └── upload_scribe.py
├── setup.py
├── test.py
├── test_bench.py
├── test_imports.py
├── torchbenchmark
    ├── __init__.py
    ├── _components
    │   ├── __init__.py
    │   ├── _impl
    │   │   ├── __init__.py
    │   │   ├── tasks
    │   │   │   ├── __init__.py
    │   │   │   └── base.py
    │   │   └── workers
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── in_process_worker.py
    │   │   │   ├── subprocess_rpc.py
    │   │   │   └── subprocess_worker.py
    │   ├── kineto
    │   │   ├── __init__.py
    │   │   └── trace.py
    │   ├── model_analyzer
    │   │   ├── TorchBenchAnalyzer.py
    │   │   ├── __init__.py
    │   │   ├── dcgm
    │   │   │   ├── __init__.py
    │   │   │   ├── cpu_monitor.py
    │   │   │   ├── dcgm_agent.py
    │   │   │   ├── dcgm_field_helpers.py
    │   │   │   ├── dcgm_fields.py
    │   │   │   ├── dcgm_fields_internal.py
    │   │   │   ├── dcgm_monitor.py
    │   │   │   ├── dcgm_structs.py
    │   │   │   ├── dcgm_value.py
    │   │   │   ├── monitor.py
    │   │   │   └── nvml_monitor.py
    │   │   ├── readme.md
    │   │   ├── requirements.txt
    │   │   ├── sync_upstream.md
    │   │   ├── tb_dcgm_types
    │   │   │   ├── __init__.py
    │   │   │   ├── config.py
    │   │   │   ├── cpu_peak_memory.py
    │   │   │   ├── cpu_record.py
    │   │   │   ├── da_exceptions.py
    │   │   │   ├── gpu_device.py
    │   │   │   ├── gpu_device_factory.py
    │   │   │   ├── gpu_dram_active.py
    │   │   │   ├── gpu_fp32active.py
    │   │   │   ├── gpu_free_memory.py
    │   │   │   ├── gpu_pcie_rx.py
    │   │   │   ├── gpu_pcie_tx.py
    │   │   │   ├── gpu_peak_memory.py
    │   │   │   ├── gpu_power_usage.py
    │   │   │   ├── gpu_record.py
    │   │   │   ├── gpu_tensoractive.py
    │   │   │   ├── gpu_utilization.py
    │   │   │   ├── record.py
    │   │   │   ├── record_aggregator.py
    │   │   │   └── tb_logger.py
    │   │   └── test.py
    │   ├── ncu
    │   │   ├── __init__.py
    │   │   └── analyzer.py
    │   └── test
    │   │   ├── __init__.py
    │   │   ├── test_subprocess.py
    │   │   └── test_worker.py
    ├── canary_models
    │   ├── DALLE2_pytorch
    │   │   ├── __init__.py
    │   │   ├── dalle2_pytorch.patch
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   └── requirements.txt
    │   ├── __init__.py
    │   ├── codellama
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── diffuser_instruct_pix2pix
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── fambench_dlrm
    │   │   ├── __init__.py
    │   │   ├── args.py
    │   │   ├── config.py
    │   │   ├── data.py
    │   │   ├── dlrmnet.py
    │   │   ├── fbgemm_embedding.py
    │   │   ├── install.py
    │   │   ├── lrscheduler.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   └── utils.py
    │   ├── fambench_xlmr
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── gat
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── gcn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_GPT2_generate
    │   │   ├── __init__.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_MPT_7b_instruct
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Yi
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_mixtral
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── lit_llama
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── lit_llama_generate
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── lit_llama_lora
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── llama_v2_13b
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── llama_v2_70b
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── llama_v2_7b
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── llama_v31_8b
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── mistral_7b_instruct
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── orca_2
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── phi_1_5
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── phi_2
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── sage
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── stable_diffusion_xl
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   └── torchrec_dlrm
    │   │   ├── __init__.py
    │   │   ├── args.py
    │   │   ├── data
    │   │       └── dlrm_dataloader.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   └── requirements.txt
    ├── data
    │   ├── README.md
    │   └── index.yaml
    ├── e2e.py
    ├── e2e_models
    │   ├── __init__.py
    │   ├── fambench_xlmr
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── requirements.txt
    │   ├── hf_bert
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── requirements.txt
    │   │   └── trainer.py
    │   ├── hf_t5
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── requirements.txt
    │   └── vision_resnet50
    │   │   ├── __init__.py
    │   │   └── resnet.py
    ├── models
    │   ├── ADDING_MODELS.md
    │   ├── BERT_pytorch
    │   │   ├── .circleci
    │   │   │   └── config.yml
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── bert_pytorch
    │   │   │   ├── __init__.py
    │   │   │   ├── __main__.py
    │   │   │   ├── dataset
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── dataset.py
    │   │   │   │   └── vocab.py
    │   │   │   ├── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── attention
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── multi_head.py
    │   │   │   │   │   └── single.py
    │   │   │   │   ├── bert.py
    │   │   │   │   ├── embedding
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── bert.py
    │   │   │   │   │   ├── position.py
    │   │   │   │   │   ├── segment.py
    │   │   │   │   │   └── token.py
    │   │   │   │   ├── language_model.py
    │   │   │   │   ├── transformer.py
    │   │   │   │   └── utils
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── feed_forward.py
    │   │   │   │   │   ├── layer_norm.py
    │   │   │   │   │   ├── sublayer.py
    │   │   │   │   │   └── tensor2tensor.py
    │   │   │   └── trainer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── optim_schedule.py
    │   │   │   │   └── pretrain.py
    │   │   ├── install.py
    │   │   ├── install.sh
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── setup.py
    │   │   └── test.py
    │   ├── Background_Matting
    │   │   ├── .gitignore
    │   │   ├── Data_adobe
    │   │   │   ├── compose.py
    │   │   │   ├── prepare.sh
    │   │   │   ├── test_data_list.txt
    │   │   │   └── train_data_list.txt
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── data_loader.py
    │   │   ├── functions.py
    │   │   ├── install.py
    │   │   ├── install.sh
    │   │   ├── loss_functions.py
    │   │   ├── metadata.yaml
    │   │   ├── networks.py
    │   │   ├── origin
    │   │   ├── prepare_real.py
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── test_background-matting_image.py
    │   │   ├── test_pre_process.m
    │   │   ├── test_pre_process.py
    │   │   ├── test_pre_process_video.m
    │   │   ├── test_pre_process_video.py
    │   │   ├── test_segmentation_deeplab.py
    │   │   ├── train_adobe.py
    │   │   └── train_real_fixed.py
    │   ├── LearningToPaint
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── LearningToPaint.ipynb
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── baseline
    │   │   │   ├── DRL
    │   │   │   │   ├── actor.py
    │   │   │   │   ├── critic.py
    │   │   │   │   ├── ddpg.py
    │   │   │   │   ├── evaluator.py
    │   │   │   │   ├── multi.py
    │   │   │   │   ├── rpm.py
    │   │   │   │   └── wgan.py
    │   │   │   ├── Renderer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── stroke_gen.py
    │   │   │   ├── env.py
    │   │   │   ├── test.py
    │   │   │   ├── train.py
    │   │   │   ├── train_renderer.py
    │   │   │   └── utils
    │   │   │   │   ├── tensorboard.py
    │   │   │   │   └── util.py
    │   │   ├── baseline_modelfree
    │   │   │   ├── DRL
    │   │   │   │   ├── actor.py
    │   │   │   │   ├── critic.py
    │   │   │   │   ├── ddpg.py
    │   │   │   │   ├── evaluator.py
    │   │   │   │   ├── multi.py
    │   │   │   │   ├── rpm.py
    │   │   │   │   └── wgan.py
    │   │   │   ├── Renderer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── stroke_gen.py
    │   │   │   ├── env.py
    │   │   │   ├── test.py
    │   │   │   ├── train.py
    │   │   │   ├── train_renderer.py
    │   │   │   └── utils
    │   │   │   │   ├── tensorboard.py
    │   │   │   │   └── util.py
    │   │   ├── install.py
    │   │   ├── install.sh
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   └── run.sh
    │   ├── Super_SloMo
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── dataloader.py
    │   │   ├── eval.py
    │   │   ├── install.py
    │   │   ├── install.sh
    │   │   ├── metadata.yaml
    │   │   ├── model_wrapper.py
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── slomo_model.py
    │   │   ├── train.ipynb
    │   │   ├── train.py
    │   │   └── video_to_slomo.py
    │   ├── __init__.py
    │   ├── alexnet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── basic_gnn_edgecnn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── basic_gnn_gcn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── basic_gnn_gin
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── basic_gnn_sage
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── cm3leon_generate
    │   │   ├── __init__.py
    │   │   ├── metadata.yaml
    │   │   └── model.py
    │   ├── dcgan
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── demucs
    │   │   ├── .gitignore
    │   │   ├── CODE_OF_CONDUCT.md
    │   │   ├── CONTRIBUTING.md
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── check.py
    │   │   ├── demucs
    │   │   │   ├── __init__.py
    │   │   │   ├── __main__.py
    │   │   │   ├── audio.py
    │   │   │   ├── augment.py
    │   │   │   ├── compressed.py
    │   │   │   ├── model.py
    │   │   │   ├── parser.py
    │   │   │   ├── raw.py
    │   │   │   ├── separate.py
    │   │   │   ├── tasnet.py
    │   │   │   ├── test.py
    │   │   │   ├── train.py
    │   │   │   └── utils.py
    │   │   ├── dora.py
    │   │   ├── environment-cpu.yml
    │   │   ├── environment-cuda.yml
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── quantize.py
    │   │   ├── requirements.txt
    │   │   ├── result_table.py
    │   │   ├── run.py
    │   │   ├── run.sh
    │   │   ├── run_overall.sh
    │   │   ├── run_slurm.py
    │   │   ├── setup.cfg
    │   │   └── valid_table.py
    │   ├── densenet121
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fasterrcnn_r_101_c4
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fasterrcnn_r_101_dc5
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fasterrcnn_r_101_fpn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fasterrcnn_r_50_c4
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fasterrcnn_r_50_dc5
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fasterrcnn_r_50_fpn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_fcos_r_50_fpn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_maskrcnn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── origin
    │   ├── detectron2_maskrcnn_r_101_c4
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_maskrcnn_r_101_fpn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_maskrcnn_r_50_c4
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── detectron2_maskrcnn_r_50_fpn
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── dlrm
    │   │   ├── CODE_OF_CONDUCT.md
    │   │   ├── CONTRIBUTING.md
    │   │   ├── Dockerfile
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── bench
    │   │   │   ├── dlrm_s_benchmark.sh
    │   │   │   ├── dlrm_s_criteo_kaggle.sh
    │   │   │   ├── dlrm_s_criteo_terabyte.sh
    │   │   │   └── run_and_time.sh
    │   │   ├── cython
    │   │   │   ├── cython_compile.py
    │   │   │   └── cython_criteo.py
    │   │   ├── data_loader_terabyte.py
    │   │   ├── data_utils.py
    │   │   ├── dlrm_data_caffe2.py
    │   │   ├── dlrm_data_pytorch.py
    │   │   ├── dlrm_s_caffe2.py
    │   │   ├── dlrm_s_pytorch.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   ├── test
    │   │   │   └── dlrm_s_test.sh
    │   │   ├── tools
    │   │   │   └── visualize.py
    │   │   └── tricks
    │   │   │   ├── md_embedding_bag.py
    │   │   │   └── qr_embedding_bag.py
    │   ├── doctr_det_predictor
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── doctr_reco_predictor
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── drq
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── drq.py
    │   │   ├── drqutils.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── replay_buffer.py
    │   │   └── requirements.txt
    │   ├── fastNLP_Bert
    │   │   ├── __init__.py
    │   │   ├── bert_config.json
    │   │   ├── cmrc2018_simulator.py
    │   │   ├── fastnlp.patch
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── functorch_dp_cifar10
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── functorch_maml_omniglot
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Albert
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Bart
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Bert
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Bert_large
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_BigBird
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_DistilBert
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_GPT2
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_GPT2_large
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Longformer
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Reformer
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Roberta_base
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_T5
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_T5_base
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_T5_generate
    │   │   ├── __init__.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_T5_large
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── hf_Whisper
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── hf_clip
    │   │   ├── __init__.py
    │   │   └── metadata.yaml
    │   ├── hf_distil_whisper
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── lennard_jones
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── llama
    │   │   ├── __init__.py
    │   │   ├── generation.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   └── tokenizer.py
    │   ├── llama_v2_7b_16h
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── llava
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── maml
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── learner.py
    │   │   ├── meta.py
    │   │   ├── metadata.yaml
    │   │   └── origin
    │   ├── maml_omniglot
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── microbench_unbacked_tolist_sum
    │   │   ├── __init__.py
    │   │   └── metadata.yaml
    │   ├── mnasnet1_0
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── mobilenet_v2
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── mobilenet_v2_quantized_qat
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── mobilenet_v3_large
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── moco
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── detection
    │   │   │   ├── README.md
    │   │   │   ├── configs
    │   │   │   │   ├── Base-RCNN-C4-BN.yaml
    │   │   │   │   ├── coco_R_50_C4_2x.yaml
    │   │   │   │   ├── coco_R_50_C4_2x_moco.yaml
    │   │   │   │   ├── pascal_voc_R_50_C4_24k.yaml
    │   │   │   │   └── pascal_voc_R_50_C4_24k_moco.yaml
    │   │   │   ├── convert-pretrain-to-detectron2.py
    │   │   │   └── train_net.py
    │   │   ├── install.py
    │   │   ├── main_lincls.py
    │   │   ├── main_moco.py
    │   │   ├── metadata.yaml
    │   │   ├── moco
    │   │   │   ├── __init__.py
    │   │   │   ├── builder.py
    │   │   │   └── loader.py
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   └── run.sh
    │   ├── moondream
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── nanogpt
    │   │   ├── __init__.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   └── origin
    │   ├── nvidia_deeprecommender
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── compute_RMSE.py
    │   │   ├── data_utils
    │   │   │   ├── movie_lense_data_converter.py
    │   │   │   └── netflix_data_convert.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── nvinfer.py
    │   │   ├── nvtrain.py
    │   │   ├── origin
    │   │   ├── reco_encoder
    │   │   │   ├── __init__.py
    │   │   │   ├── data
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── input_layer.py
    │   │   │   └── model
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── model.py
    │   │   └── requirements.txt
    │   ├── opacus_cifar10
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── phlippe_densenet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── phlippe_resnet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── pyhpc_equation_of_state
    │   │   ├── __init__.py
    │   │   ├── eos_pytorch.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── origin
    │   ├── pyhpc_isoneutral_mixing
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── isoneutral_pytorch.py
    │   │   ├── metadata.yaml
    │   │   └── origin
    │   ├── pyhpc_turbulent_kinetic_energy
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   └── tke_pytorch.py
    │   ├── pytorch_CycleGAN_and_pix2pix
    │   │   ├── .gitignore
    │   │   ├── CycleGAN.ipynb
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── aligned_dataset.py
    │   │   │   ├── base_dataset.py
    │   │   │   ├── colorization_dataset.py
    │   │   │   ├── image_folder.py
    │   │   │   ├── single_dataset.py
    │   │   │   ├── template_dataset.py
    │   │   │   └── unaligned_dataset.py
    │   │   ├── docs
    │   │   │   ├── Dockerfile
    │   │   │   ├── README_es.md
    │   │   │   ├── datasets.md
    │   │   │   ├── docker.md
    │   │   │   ├── overview.md
    │   │   │   ├── qa.md
    │   │   │   └── tips.md
    │   │   ├── environment.yml
    │   │   ├── example_input.pt
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── base_model.py
    │   │   │   ├── colorization_model.py
    │   │   │   ├── cycle_gan_model.py
    │   │   │   ├── networks.py
    │   │   │   ├── pix2pix_model.py
    │   │   │   ├── template_model.py
    │   │   │   └── test_model.py
    │   │   ├── options
    │   │   │   ├── __init__.py
    │   │   │   ├── base_options.py
    │   │   │   ├── test_options.py
    │   │   │   └── train_options.py
    │   │   ├── origin
    │   │   ├── pix2pix.ipynb
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── scripts
    │   │   │   ├── conda_deps.sh
    │   │   │   ├── download_cyclegan_model.sh
    │   │   │   ├── download_pix2pix_model.sh
    │   │   │   ├── edges
    │   │   │   │   ├── PostprocessHED.m
    │   │   │   │   └── batch_hed.py
    │   │   │   ├── eval_cityscapes
    │   │   │   │   ├── caffemodel
    │   │   │   │   │   └── deploy.prototxt
    │   │   │   │   ├── cityscapes.py
    │   │   │   │   ├── download_fcn8s.sh
    │   │   │   │   ├── evaluate.py
    │   │   │   │   └── util.py
    │   │   │   ├── install_deps.sh
    │   │   │   ├── test_before_push.py
    │   │   │   ├── test_colorization.sh
    │   │   │   ├── test_cyclegan.sh
    │   │   │   ├── test_pix2pix.sh
    │   │   │   ├── test_single.sh
    │   │   │   ├── train_colorization.sh
    │   │   │   ├── train_cyclegan.sh
    │   │   │   └── train_pix2pix.sh
    │   │   ├── test_cyclegan.py
    │   │   ├── train_cyclegan.py
    │   │   └── util
    │   │   │   ├── __init__.py
    │   │   │   ├── get_data.py
    │   │   │   ├── html.py
    │   │   │   ├── image_pool.py
    │   │   │   ├── util.py
    │   │   │   └── visualizer.py
    │   ├── pytorch_stargan
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── data_loader.py
    │   │   ├── download.sh
    │   │   ├── install.py
    │   │   ├── logger.py
    │   │   ├── main.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   └── solver.py
    │   ├── pytorch_unet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   └── pytorch_unet
    │   │   │   ├── Dockerfile
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── evaluate.py
    │   │   │   ├── hubconf.py
    │   │   │   ├── predict.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── scripts
    │   │   │       └── download_data.sh
    │   │   │   ├── train.py
    │   │   │   ├── unet
    │   │   │       ├── __init__.py
    │   │   │       ├── unet_model.py
    │   │   │       └── unet_parts.py
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       ├── data_loading.py
    │   │   │       ├── dice_score.py
    │   │   │       └── utils.py
    │   ├── resnet152
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── resnet18
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── resnet50
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── resnet50_quantized_qat
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── resnext50_32x4d
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── sam
    │   │   ├── __init__.py
    │   │   ├── build_sam.py
    │   │   ├── common.py
    │   │   ├── image_encoder.py
    │   │   ├── install.py
    │   │   ├── mask_decoder.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── predictor.py
    │   │   ├── prompt_encoder.py
    │   │   ├── requirements.txt
    │   │   ├── sam.py
    │   │   ├── transformer.py
    │   │   └── transforms.py
    │   ├── sam_fast
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── shufflenet_v2_x1_0
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── simple_gpt
    │   │   ├── __init__.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   └── origin
    │   ├── simple_gpt_tp_manual
    │   │   ├── __init__.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   ├── origin
    │   │   └── tp.py
    │   ├── soft_actor_critic
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── envs.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── nets.py
    │   │   ├── replay.py
    │   │   ├── requirements.txt
    │   │   ├── sac.py
    │   │   └── sac_utils.py
    │   ├── speech_transformer
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   ├── requirements.txt
    │   │   └── speech_transformer
    │   │   │   ├── data
    │   │   │       ├── __init__.py
    │   │   │       └── data.py
    │   │   │   ├── transformer
    │   │   │       ├── __init__.py
    │   │   │       ├── attention.py
    │   │   │       ├── decoder.py
    │   │   │       ├── encoder.py
    │   │   │       ├── loss.py
    │   │   │       ├── module.py
    │   │   │       ├── optimizer.py
    │   │   │       └── transformer.py
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       ├── data2json.sh
    │   │   │       ├── dump.sh
    │   │   │       ├── filt.py
    │   │   │       ├── json2trn.py
    │   │   │       ├── mergejson.py
    │   │   │       ├── scp2json.py
    │   │   │       └── utils.py
    │   ├── squeezenet1_1
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── stable_diffusion_text_encoder
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── stable_diffusion_unet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── tacotron2
    │   │   ├── .gitmodules
    │   │   ├── Dockerfile
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── audio_processing.py
    │   │   ├── data_utils.py
    │   │   ├── distributed.py
    │   │   ├── hparams.py
    │   │   ├── inference.ipynb
    │   │   ├── install.py
    │   │   ├── install.sh
    │   │   ├── layers.py
    │   │   ├── logger.py
    │   │   ├── loss_function.py
    │   │   ├── loss_scaler.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   ├── multiproc.py
    │   │   ├── origin
    │   │   ├── plotting_utils.py
    │   │   ├── requirements.txt
    │   │   ├── stft.py
    │   │   ├── tacotron2_utils.py
    │   │   ├── text
    │   │   │   ├── LICENSE
    │   │   │   ├── __init__.py
    │   │   │   ├── cleaners.py
    │   │   │   ├── cmudict.py
    │   │   │   ├── numbers.py
    │   │   │   └── symbols.py
    │   │   ├── train_tacotron2.py
    │   │   └── waveglow
    │   │   │   ├── .gitmodules
    │   │   │   ├── LICENSE
    │   │   │   ├── README.md
    │   │   │   ├── config.json
    │   │   │   ├── convert_model.py
    │   │   │   ├── denoiser.py
    │   │   │   ├── distributed.py
    │   │   │   ├── glow.py
    │   │   │   ├── glow_old.py
    │   │   │   ├── inference.py
    │   │   │   ├── mel2samp.py
    │   │   │   ├── tacotron2
    │   │   │       ├── Dockerfile
    │   │   │       ├── LICENSE
    │   │   │       ├── README.md
    │   │   │       ├── audio_processing.py
    │   │   │       ├── data_utils.py
    │   │   │       ├── distributed.py
    │   │   │       ├── filelists
    │   │   │       │   ├── ljs_audio_text_test_filelist.txt
    │   │   │       │   └── ljs_audio_text_val_filelist.txt
    │   │   │       ├── fp16_optimizer.py
    │   │   │       ├── hparams.py
    │   │   │       ├── layers.py
    │   │   │       ├── logger.py
    │   │   │       ├── loss_function.py
    │   │   │       ├── loss_scaler.py
    │   │   │       ├── model.py
    │   │   │       ├── multiproc.py
    │   │   │       ├── plotting_utils.py
    │   │   │       ├── stft.py
    │   │   │       ├── text
    │   │   │       │   ├── LICENSE
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── cleaners.py
    │   │   │       │   ├── cmudict.py
    │   │   │       │   ├── numbers.py
    │   │   │       │   └── symbols.py
    │   │   │       ├── train.py
    │   │   │       └── utils.py
    │   │   │   └── train.py
    │   ├── timm_efficientdet
    │   │   ├── __init__.py
    │   │   ├── args.py
    │   │   ├── effdet.patch
    │   │   ├── install.py
    │   │   ├── loader.py
    │   │   ├── metadata.yaml
    │   │   ├── pycocotools.patch
    │   │   ├── requirements.txt
    │   │   └── train.py
    │   ├── timm_efficientnet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── timm_nfnet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── timm_regnet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── timm_resnest
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── timm_vision_transformer
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── timm_vision_transformer_large
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── timm_vovnet
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── torch_multimodal_clip
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   └── requirements.txt
    │   ├── tts_angular
    │   │   ├── __init__.py
    │   │   ├── angular_tts_main.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── model.py
    │   │   └── requirements.txt
    │   ├── vgg16
    │   │   ├── __init__.py
    │   │   ├── install.py
    │   │   └── metadata.yaml
    │   ├── vision_maskrcnn
    │   │   ├── __init__.py
    │   │   ├── coco_utils.py
    │   │   ├── install.py
    │   │   ├── metadata.yaml
    │   │   ├── origin
    │   │   └── requirements.txt
    │   └── yolov3
    │   │   ├── .dockerignore
    │   │   ├── Dockerfile
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── cfg
    │   │       ├── cd53s-yolov3.cfg
    │   │       ├── cd53s.cfg
    │   │       ├── csresnext50-panet-spp.cfg
    │   │       ├── yolov3-1cls.cfg
    │   │       ├── yolov3-asff.cfg
    │   │       ├── yolov3-spp-1cls.cfg
    │   │       ├── yolov3-spp-3cls.cfg
    │   │       ├── yolov3-spp-matrix.cfg
    │   │       ├── yolov3-spp-pan-scale.cfg
    │   │       ├── yolov3-spp.cfg
    │   │       ├── yolov3-spp3.cfg
    │   │       ├── yolov3-tiny-1cls.cfg
    │   │       ├── yolov3-tiny-3cls.cfg
    │   │       ├── yolov3-tiny.cfg
    │   │       ├── yolov3-tiny3-1cls.cfg
    │   │       ├── yolov3-tiny3.cfg
    │   │       ├── yolov3.cfg
    │   │       ├── yolov4-relu.cfg
    │   │       ├── yolov4-tiny.cfg
    │   │       └── yolov4.cfg
    │   │   ├── check.py
    │   │   ├── detect.py
    │   │   ├── install.py
    │   │   ├── install.sh
    │   │   ├── metadata.yaml
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── test.py
    │   │   ├── train_batch0.jpg
    │   │   ├── weights
    │   │       └── download_yolov3_weights.sh
    │   │   ├── yolo_models.py
    │   │   ├── yolo_train.py
    │   │   └── yolo_utils
    │   │       ├── __init__.py
    │   │       ├── adabound.py
    │   │       ├── datasets.py
    │   │       ├── evolve.sh
    │   │       ├── gcp.sh
    │   │       ├── google_utils.py
    │   │       ├── layers.py
    │   │       ├── parse_config.py
    │   │       ├── torch_utils.py
    │   │       └── utils.py
    ├── tasks.py
    └── util
    │   ├── __init__.py
    │   ├── backends
    │       ├── __init__.py
    │       ├── ait.py
    │       ├── cudagraph.py
    │       ├── jit.py
    │       ├── torchdynamo.py
    │       └── trt.py
    │   ├── classify_graphs.py
    │   ├── distributed
    │       ├── README.md
    │       ├── core_model
    │       │   ├── apply_trainer.py
    │       │   └── trainer.py
    │       ├── requirements.txt
    │       ├── submit.py
    │       └── trainer.py
    │   ├── distribution.py
    │   ├── e2emodel.py
    │   ├── env_check.py
    │   ├── experiment
    │       ├── instantiator.py
    │       └── metrics.py
    │   ├── extra_args.py
    │   ├── framework
    │       ├── detectron2
    │       │   ├── __init__.py
    │       │   ├── config.py
    │       │   ├── model_factory.py
    │       │   └── requirements.txt
    │       ├── diffusers
    │       │   ├── __init__.py
    │       │   ├── model_factory.py
    │       │   └── requirements.txt
    │       ├── gnn
    │       │   ├── __init__.py
    │       │   ├── args.py
    │       │   ├── config.py
    │       │   ├── model_factory.py
    │       │   └── requirements.txt
    │       ├── huggingface
    │       │   ├── args.py
    │       │   ├── basic_configs.py
    │       │   ├── extended_configs.py
    │       │   ├── list_extended_configs.py
    │       │   ├── model_factory.py
    │       │   └── patch_hf.py
    │       ├── lit_llama.py
    │       ├── timm
    │       │   ├── args.py
    │       │   ├── extended_configs.py
    │       │   ├── instantiate.py
    │       │   ├── loader.py
    │       │   ├── model_factory.py
    │       │   ├── timm_config.py
    │       │   └── train.py
    │       ├── transformers
    │       │   ├── text_classification
    │       │   │   ├── args.py
    │       │   │   └── dataset.py
    │       │   └── translation
    │       │   │   ├── args.py
    │       │   │   └── dataset.py
    │       └── vision
    │       │   ├── args.py
    │       │   └── model_factory.py
    │   ├── fx_int8.py
    │   ├── gemm_shapes.csv
    │   ├── hardware
    │       ├── __init__.py
    │       └── roofline.py
    │   ├── input.py
    │   ├── machine_config.py
    │   ├── metadata_utils.py
    │   └── model.py
├── userbenchmark
    ├── ADDING_USERBENCHMARKS.md
    ├── __init__.py
    ├── api-coverage
    │   ├── __init__.py
    │   └── run.py
    ├── cpu
    │   ├── README.md
    │   ├── __init__.py
    │   ├── cpu_test.yaml
    │   ├── cpu_utils.py
    │   ├── run.py
    │   └── run_config.py
    ├── cuda-compare
    │   ├── __init__.py
    │   ├── result_analyzer.py
    │   └── run.py
    ├── ddp_experiments
    │   ├── README.md
    │   ├── __init__.py
    │   ├── parse_ddp.py
    │   └── run.py
    ├── distributed
    │   ├── README.md
    │   ├── __init__.py
    │   ├── ci.yaml
    │   ├── install.py
    │   ├── run.py
    │   └── run_ci.sh
    ├── dynamo
    │   ├── __init__.py
    │   ├── dynamobench
    │   │   ├── _dynamo
    │   │   │   ├── testing.py
    │   │   │   └── utils.py
    │   │   ├── common.py
    │   │   ├── huggingface.py
    │   │   ├── huggingface.yaml
    │   │   ├── huggingface_models_list.txt
    │   │   ├── huggingface_models_list_cpu.txt
    │   │   ├── timm_models.py
    │   │   ├── timm_models_list.txt
    │   │   ├── timm_models_list_cpu.txt
    │   │   ├── torchao_backend.py
    │   │   ├── torchbench.py
    │   │   ├── torchbench.yaml
    │   │   ├── torchbench_models_list.txt
    │   │   └── torchbench_models_list_cpu.txt
    │   └── run.py
    ├── functorch
    │   ├── __init__.py
    │   ├── cases.py
    │   ├── ci.yaml
    │   ├── run.py
    │   ├── simple_models.py
    │   ├── util.py
    │   └── vmap_hessian_fc.py
    ├── group_bench
    │   ├── __init__.py
    │   ├── configs
    │   │   ├── bmm.yaml
    │   │   └── torch_ao.yaml
    │   └── run.py
    ├── instruction-count
    │   ├── __init__.py
    │   └── run.py
    ├── lazy-tensor
    │   └── run.py
    ├── mast-sample
    │   └── main.py
    ├── model-stableness
    │   ├── __init__.py
    │   └── run.py
    ├── nvfuser
    │   ├── __init__.py
    │   ├── ir.py
    │   └── run.py
    ├── optim
    │   ├── __init__.py
    │   ├── regression_detector.py
    │   ├── run.py
    │   └── run_optim_benchmarks.py
    ├── release-test
    │   ├── __init__.py
    │   ├── configs
    │   │   ├── 1.12.1.yaml
    │   │   ├── 1.13.0.yaml
    │   │   ├── 2.0.1.yaml
    │   │   ├── 2.1.0.yaml
    │   │   ├── 2.1.1.yaml
    │   │   ├── 2.1.2.yaml
    │   │   ├── 2.5.0.yaml
    │   │   ├── 2.5.1.yaml
    │   │   └── 2.6.0.yaml
    │   ├── monitor_proc.sh
    │   ├── result_analyzer.py
    │   ├── run.py
    │   ├── run_release_test.sh
    │   ├── setup_env.sh
    │   └── version.txt
    ├── rocm-test
    │   ├── __init__.py
    │   └── run.py
    ├── test-user-invoke
    │   ├── __init__.py
    │   └── run.py
    ├── test_bench
    │   ├── __init__.py
    │   ├── install.py
    │   ├── regression_detector.py
    │   └── run.py
    ├── torch-nightly
    │   ├── __init__.py
    │   ├── regression_detector.py
    │   ├── run.py
    │   └── v3-cuda-tests.yaml
    ├── torch_trt
    │   ├── __init__.py
    │   ├── ci.yaml
    │   ├── install.py
    │   └── run.py
    ├── torchao
    │   ├── __init__.py
    │   ├── install.py
    │   ├── run.py
    │   └── upload.py
    └── utils.py
└── utils
    ├── __init__.py
    ├── build_requirements.txt
    ├── build_utils.py
    ├── cuda_utils.py
    ├── github.py
    ├── gitutils.py
    ├── python_utils.py
    ├── s3_utils.py
    └── torch_nightly_utils.py


/.ci/torchbench/check-ssh.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eou pipefail
 3 | 
 4 | echo "Holding runner for 2 hours until all ssh sessions have logged out"
 5 | for _ in $(seq 1440); do
 6 |     # Break if no ssh session exists anymore
 7 |     if [ "$(who)" = "" ]; then
 8 |       break
 9 |     fi
10 |     echo "."
11 |     sleep 5
12 | done
13 | 


--------------------------------------------------------------------------------
/.ci/torchbench/install.sh:
--------------------------------------------------------------------------------
 1 | . ${HOME}/miniconda3/etc/profile.d/conda.sh
 2 | 
 3 | if [ -z "${CONDA_ENV}" ]; then
 4 |   echo "ERROR: CONDA_ENV is not set"
 5 |   exit 1
 6 | fi
 7 | 
 8 | if [[ -n "${SETUP_SCRIPT}" && -e "${SETUP_SCRIPT}" ]]; then
 9 |   . "${SETUP_SCRIPT}"
10 | fi
11 | 
12 | . "${HOME}"/miniconda3/etc/profile.d/conda.sh
13 | 
14 | conda activate "${CONDA_ENV}"
15 | 
16 | parent_dir=$(dirname "$(readlink -f "$0")")/../..
17 | cd ${parent_dir}
18 | 
19 | python -c "import torch; print(torch.__version__); print(torch.version.git_version)"
20 | 
21 | python install.py $@
22 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: ObjC
3 | DisableFormat: true
4 | SortIncludes: false
5 | ...
6 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | ignore = E203,E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504
4 | exclude = third_party
5 | 


--------------------------------------------------------------------------------
/.github/scripts/bisection-config.sample.yaml:
--------------------------------------------------------------------------------
 1 | # The sample bisection config that solves GH issue #51380
 2 | 
 3 | # Start and end commits
 4 | start: a87a1c1
 5 | end: 0ead9d5
 6 | # 10 percent regression
 7 | threshold: 10
 8 | # Support increase, decrease, or both
 9 | # increase means performance regression, decrease means performance optimization
10 | direction: increase
11 | # Test timeout in minutes
12 | timeout: 60
13 | # Only the tests specified are executed. If not specified, use the tests in the TorchBench v0 config
14 | tests:
15 |  - test_eval[yolov3-cpu-eager]
16 | 


--------------------------------------------------------------------------------
/.github/scripts/bmutils/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | CURRENT_DIR = Path(__file__).parent
 5 | REPO_ROOT = str(CURRENT_DIR.parent.parent.parent)
 6 | 
 7 | 
 8 | class add_path:
 9 |     def __init__(self, path):
10 |         self.path = path
11 | 
12 |     def __enter__(self):
13 |         sys.path.insert(0, self.path)
14 | 
15 |     def __exit__(self, exc_type, exc_value, traceback):
16 |         try:
17 |             sys.path.remove(self.path)
18 |         except ValueError:
19 |             pass
20 | 


--------------------------------------------------------------------------------
/.github/scripts/userbenchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/.github/scripts/userbenchmark/__init__.py


--------------------------------------------------------------------------------
/.github/workflows/pr-test.yml:
--------------------------------------------------------------------------------
 1 | name: TorchBench PR Test
 2 | on:
 3 |   pull_request:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   cpu-test:
11 |     uses: ./.github/workflows/_linux-test-cpu.yml
12 |     secrets:
13 |       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
14 |   cuda-test:
15 |     uses: ./.github/workflows/_linux-test-cuda.yml
16 |     secrets:
17 |       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
18 | 
19 | concurrency:
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
21 |   cancel-in-progress: true
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .benchmarks
 2 | .vscode/
 3 | .data
 4 | ._*
 5 | */**/__pycache__
 6 | */**/*.pkl
 7 | */**/*.pt
 8 | */**/*.pyc
 9 | */**/*.tar.gz
10 | *.out*
11 | *.swp
12 | *.swo
13 | *~HEAD
14 | .DS_Store
15 | build/
16 | .ipynb_checkpoints/
17 | .idea
18 | old.json
19 | te.json
20 | logs/
21 | scripts/scribe.py
22 | .userbenchmark/
23 | torchbench.egg-info/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "submodules/FAMBench"]
2 | 	path = submodules/FAMBench
3 | 	url = https://github.com/facebookresearch/FAMBench.git
4 | [submodule "submodules/lit-llama"]
5 | 	path = submodules/lit-llama
6 | 	url = https://github.com/Lightning-AI/lit-llama.git
7 | 


--------------------------------------------------------------------------------
/docker/build-gcp-a100-docker.sh:
--------------------------------------------------------------------------------
1 | docker build . -f gcp-a100-runner-dind.dockerfile -t xzhao9/gcp-a100-runner-dind:latest
2 | 


--------------------------------------------------------------------------------
/docker/build-torchbench-nightly-docker.sh:
--------------------------------------------------------------------------------
1 | TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main}
2 | 
3 | DOCKER_BUILDKIT=0 docker build . --no-cache -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest \
4 |     --build-arg TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH}
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel"]
 3 | # Use legacy backend to import local packages in setup.py
 4 | build-backend = "setuptools.build_meta:__legacy__"
 5 | 
 6 | 
 7 | [tool.black]
 8 | line-length = 88
 9 | target-version = ["py38"]
10 | exclude = '''/submodules/.*'''
11 | 
12 | [tool.usort]
13 | excludes = ["**/submodules/**"]
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | boto3
 3 | bs4
 4 | patch
 5 | py-cpuinfo
 6 | distro
 7 | iopath
 8 | pytest
 9 | pytest-benchmark
10 | requests
11 | tabulate
12 | git+https://github.com/huggingface/pytorch-image-models.git@730b907
13 | # this version of transformers is required by linger-kernel
14 | # https://github.com/linkedin/Liger-Kernel/blob/main/pyproject.toml#L23
15 | transformers==4.44.2
16 | MonkeyType
17 | psutil
18 | pyyaml
19 | numpy
20 | opencv-python
21 | submitit
22 | pynvml>=12.0.0
23 | pandas
24 | scipy
25 | numba
26 | 


--------------------------------------------------------------------------------
/scripts/activate_conda.sh:
--------------------------------------------------------------------------------
1 | 
2 | . ${HOME}/miniconda3/etc/profile.d/conda.sh
3 | conda activate
4 | 


--------------------------------------------------------------------------------
/scripts/install_conda.sh:
--------------------------------------------------------------------------------
 1 | DEFAULT_PYTHON_VERSION=3.10
 2 | CONDA=https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 3 | filename=$(basename "$CONDA")
 4 | wget "$CONDA"
 5 | chmod +x "$filename"
 6 | ./"$filename" -b -u
 7 | 
 8 | . ${HOME}/miniconda3/etc/profile.d/conda.sh
 9 | conda activate
10 | conda install -y python=${DEFAULT_PYTHON_VERSION}
11 | pip install boto3 pyyaml


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="torchbench",
 5 |     version="0.1",
 6 |     description="Benchmarking library for PyTorch",
 7 |     author="PyTorch Team",
 8 |     url="https://github.com/pytorch/benchmark",
 9 |     packages=find_packages(include=["torchbenchmark*", "userbenchmark*"]),
10 |     classifiers=[
11 |         "Intended Audience :: Developers",
12 |         "Topic :: Software Development :: Build Tools",
13 |         "License :: OSI Approved :: BSD 3 License",
14 |         "Programming Language :: Python",
15 |     ],
16 | )
17 | 


--------------------------------------------------------------------------------
/test_imports.py:
--------------------------------------------------------------------------------
1 | import torchbenchmark.models
2 | 
3 | model, example_inputs = torchbenchmark.models.densenet121.Model(
4 |     test="eval", device="cuda", batch_size=1
5 | ).get_module()
6 | model(*example_inputs)
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/_components/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/_impl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/_impl/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/_impl/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/_impl/tasks/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/_impl/workers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/_impl/workers/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/kineto/__init__.py:
--------------------------------------------------------------------------------
1 | from .trace import do_bench_kineto
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/model_analyzer/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/dcgm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/model_analyzer/dcgm/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/requirements.txt:
--------------------------------------------------------------------------------
1 | numba
2 | pynvml


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/tb_dcgm_types/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/model_analyzer/tb_dcgm_types/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/tb_dcgm_types/config.py:
--------------------------------------------------------------------------------
1 | # default is 0.01 second
2 | DEFAULT_MONITORING_INTERVAL = 0.01
3 | 
4 | 
5 | class AnalayzerConfig:
6 |     def __init__(self):
7 |         self.monitoring_interval = DEFAULT_MONITORING_INTERVAL
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/tb_dcgm_types/cpu_record.py:
--------------------------------------------------------------------------------
 1 | from .record import Record
 2 | 
 3 | 
 4 | class CPURecord(Record):
 5 |     """
 6 |     This is a base class for any
 7 |     CPU based record
 8 |     """
 9 | 
10 |     def __init__(self, value, timestamp=0):
11 |         """
12 |         Parameters
13 |         ----------
14 |         value : float
15 |             The value of the CPU metrtic
16 |         timestamp : int
17 |             The timestamp for the record in nanoseconds
18 |         """
19 | 
20 |         super().__init__(value, timestamp)
21 |         self._device_uuid = 0x1
22 | 
23 |     def device_uuid(self):
24 |         return self._device_uuid
25 | 


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/tb_dcgm_types/da_exceptions.py:
--------------------------------------------------------------------------------
1 | class TorchBenchAnalyzerException(Exception):
2 |     """
3 |     A custom exception specific to the TorchBench Model Analyzer
4 |     """
5 | 
6 |     pass
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/_components/model_analyzer/tb_dcgm_types/tb_logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | LOGGER_NAME = "TorchBenchLogger"
 4 | 
 5 | 
 6 | def set_logger(logger_level=logging.WARNING):
 7 |     formatter = logging.Formatter(
 8 |         fmt="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
 9 |     )
10 |     handler = logging.StreamHandler()
11 |     handler.setFormatter(formatter)
12 |     logger = logging.getLogger(LOGGER_NAME)
13 |     logger.setLevel(logger_level)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/_components/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/test/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/DALLE2_pytorch/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/DALLE2_pytorch/origin:
--------------------------------------------------------------------------------
1 | origin https://github.com/lucidrains/DALLE2-pytorch.git


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/DALLE2_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/lucidrains/DALLE2-pytorch@00e07b7d61e21447d55e6d06d5c928cf8b67601d
2 | beartype==0.15.0
3 | rotary-embedding-torch==0.3.3
4 | tensorboard
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/codellama/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | 
 8 | if __name__ == "__main__":
 9 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
10 |     cache_model(model_name)
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/codellama/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: cuda
10 |   test: train
11 | train_benchmark: false
12 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/diffuser_instruct_pix2pix/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.diffusers.model_factory import DiffuserModel
 3 | 
 4 | 
 5 | class Model(DiffuserModel):
 6 |     task = COMPUTER_VISION.GENERATION
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 |     # Default eval precision on CUDA device is fp16
10 |     DEFAULT_EVAL_CUDA_PRECISION = "fp16"
11 | 
12 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
13 |         super().__init__(
14 |             name="timbrooks/instruct-pix2pix",
15 |             test=test,
16 |             device=device,
17 |             batch_size=batch_size,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/diffuser_instruct_pix2pix/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | not_implemented:
5 | - device: cpu
6 | train_benchmark: false
7 | train_deterministic: false
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/fambench_dlrm/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import sys
 4 | 
 5 | from torchbenchmark import REPO_PATH
 6 | from utils.python_utils import pip_install_requirements
 7 | 
 8 | 
 9 | def update_fambench_submodule():
10 |     "Update FAMBench submodule of the benchmark repo"
11 |     update_command = [
12 |         "git",
13 |         "submodule",
14 |         "update",
15 |         "--init",
16 |         "--recursive",
17 |         os.path.join("submodules", "FAMBench"),
18 |     ]
19 |     subprocess.check_call(update_command, cwd=REPO_PATH)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     update_fambench_submodule()
24 |     pip_install_requirements()
25 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/fambench_dlrm/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: true
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: true
6 | not_implemented:
7 |   # CUDA test disabled because it doesn't fit on the CI machine (T4)
8 |   - device: cuda
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/fambench_dlrm/origin:
--------------------------------------------------------------------------------
1 | https://github.com/facebookresearch/FAMBench


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/fambench_dlrm/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/canary_models/fambench_dlrm/requirements.txt


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/fambench_xlmr/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 64
 4 |   cpu:
 5 |     eval_batch_size: 4
 6 | eval_benchmark: false
 7 | eval_deterministic: false
 8 | eval_nograd: true
 9 | not_implemented:
10 | - device: cuda
11 |   test: train
12 | train_benchmark: false
13 | train_deterministic: false
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/fambench_xlmr/requirements.txt:
--------------------------------------------------------------------------------
1 | sacrebleu
2 | bitarray
3 | cffi
4 | omegaconf
5 | hydra-core
6 | sentencepiece
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gat/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import GNN
 2 | from torchbenchmark.util.framework.gnn.model_factory import GNNModel
 3 | 
 4 | 
 5 | class Model(GNNModel):
 6 |     task = GNN.CLASSIFICATION
 7 |     DEFAULT_TRAIN_BSIZE = 64
 8 |     DEFAULT_EVAL_BSIZE = 64
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             model_name="gat",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 |         if device == "cuda":
19 |             # TODO - Add CUDA support
20 |             raise NotImplementedError("GAT doesn't support CUDA")
21 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gat/install.py:
--------------------------------------------------------------------------------
 1 | from utils import s3_utils
 2 | from utils.python_utils import pip_install_requirements
 3 | 
 4 | if __name__ == "__main__":
 5 |     s3_utils.checkout_s3_data(
 6 |         "INPUT_TARBALLS", "Reddit_minimal.tar.gz", decompress=True
 7 |     )
 8 |     pip_install_requirements(
 9 |         extra_args=["-f", "https://data.pyg.org/whl/torch-2.1.0+cpu.html"]
10 |     )
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gat/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   cpu:
3 |     eval_batch_size: 64
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gat/requirements.txt:
--------------------------------------------------------------------------------
1 | pyg_lib
2 | torch_scatter
3 | torch_sparse
4 | pyg-nightly
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import GNN
 2 | from torchbenchmark.util.framework.gnn.model_factory import GNNModel
 3 | 
 4 | 
 5 | class Model(GNNModel):
 6 |     task = GNN.CLASSIFICATION
 7 |     DEFAULT_TRAIN_BSIZE = 64
 8 |     DEFAULT_EVAL_BSIZE = 64
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             model_name="gcn",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 |         if device == "cuda":
19 |             # TODO - Add CUDA support
20 |             raise NotImplementedError("GCN doesn't support CUDA")
21 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gcn/install.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | from utils import s3_utils
 5 | 
 6 | 
 7 | def pip_install_requirements():
 8 |     subprocess.check_call(
 9 |         [
10 |             sys.executable,
11 |             "-m",
12 |             "pip",
13 |             "install",
14 |             "-q",
15 |             "-r",
16 |             "requirements.txt",
17 |             "-f",
18 |             "https://data.pyg.org/whl/torch-2.1.0+cpu.html",
19 |         ]
20 |     )
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     s3_utils.checkout_s3_data(
25 |         "INPUT_TARBALLS", "Reddit_minimal.tar.gz", decompress=True
26 |     )
27 |     pip_install_requirements()
28 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gcn/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   cpu:
3 |     eval_batch_size: 64
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/gcn/requirements.txt:
--------------------------------------------------------------------------------
1 | pyg_lib
2 | torch_scatter
3 | torch_sparse
4 | pyg-nightly
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_GPT2_generate/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.util.framework.huggingface.model_factory import (
 2 |     HuggingFaceGenerationModel,
 3 | )
 4 | 
 5 | 
 6 | class Model(HuggingFaceGenerationModel):
 7 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
 8 |         super().__init__(
 9 |             name="hf_GPT2_generate",
10 |             test=test,
11 |             device=device,
12 |             batch_size=batch_size,
13 |             extra_args=extra_args,
14 |         )
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_GPT2_generate/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_GPT2_generate/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_MPT_7b_instruct/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     # https://huggingface.co/mosaicml/mpt-7b
 8 |     DEFAULT_TRAIN_BSIZE = 4
 9 |     DEFAULT_EVAL_BSIZE = 1
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             name="hf_MPT_7b_instruct",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 
20 |     def eval(self):
21 |         super().eval()
22 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_MPT_7b_instruct/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_MPT_7b_instruct/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_MPT_7b_instruct/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_Yi/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import sys
 4 | 
 5 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 6 |     cache_model,
 7 |     patch_transformers,
 8 | )
 9 | from utils.python_utils import pip_install_requirements
10 | 
11 | if __name__ == "__main__":
12 |     pip_install_requirements()
13 |     patch_transformers()
14 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
15 |     cache_model(model_name)
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_Yi/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: NVIDIA A10G
11 |   # - device: cpu


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_Yi/requirements.txt:
--------------------------------------------------------------------------------
1 | numba


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_mixtral/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | 
10 | if __name__ == "__main__":
11 |     pip_install_requirements()
12 |     patch_transformers()
13 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
14 |     cache_model(model_name)
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_mixtral/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: NVIDIA A10G
11 |   # - device: cpu


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/hf_mixtral/requirements.txt:
--------------------------------------------------------------------------------
1 | bitsandbytes
2 | transformers>=4.36.2
3 | numba


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/lit_llama/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.lit_llama import install_lit_llama
2 | 
3 | if __name__ == "__main__":
4 |     install_lit_llama()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/lit_llama/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - test: eval
11 |   - test: example
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/lit_llama_generate/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.lit_llama import install_lit_llama
2 | 
3 | if __name__ == "__main__":
4 |     install_lit_llama()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/lit_llama_generate/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - test: eval
11 |   - test: example
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/lit_llama_lora/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.lit_llama import install_lit_llama
2 | 
3 | if __name__ == "__main__":
4 |     install_lit_llama()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/lit_llama_lora/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - test: train
11 |   - test: example
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v2_13b/install.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model
4 | 
5 | if __name__ == "__main__":
6 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
7 |     cache_model(model_name)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v2_13b/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: cuda
10 |   test: train
11 | train_benchmark: false
12 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v2_70b/install.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model
4 | 
5 | if __name__ == "__main__":
6 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
7 |     cache_model(model_name)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v2_70b/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: cuda
10 |   test: train
11 | train_benchmark: false
12 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v2_7b/install.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model
4 | 
5 | if __name__ == "__main__":
6 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
7 |     cache_model(model_name)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v2_7b/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: cuda
10 |   test: train
11 | train_benchmark: false
12 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v31_8b/install.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model
4 | 
5 | if __name__ == "__main__":
6 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
7 |     cache_model(model_name)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: cuda
10 |   test: train
11 | train_benchmark: false
12 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/mistral_7b_instruct/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | 
 8 | if __name__ == "__main__":
 9 |     patch_transformers()
10 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
11 |     cache_model(model_name)
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/mistral_7b_instruct/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   - device: NVIDIA A10G
 3 |   - device: NVIDIA A100-SXM4-40GB
 4 | eval_batch_size: 1
 5 | eval_benchmark: false
 6 | eval_deterministic: false
 7 | eval_nograd: true
 8 | train_benchmark: false
 9 | train_deterministic: false
10 | not_implemented:
11 |   - device: cpu


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/orca_2/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | 
 8 | if __name__ == "__main__":
 9 |     patch_transformers()
10 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
11 |     cache_model(model_name)
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/orca_2/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   - device: NVIDIA A10G
3 |   - device: NVIDIA A100-SXM4-40GB
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/phi_1_5/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import sys
 4 | 
 5 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 6 |     cache_model,
 7 |     patch_transformers,
 8 | )
 9 | from utils.python_utils import pip_install_requirements
10 | 
11 | if __name__ == "__main__":
12 |     pip_install_requirements()
13 |     patch_transformers()
14 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
15 |     cache_model(model_name)
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/phi_1_5/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: NVIDIA A10G
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/phi_1_5/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | flash_attn
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/phi_2/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/phi_2/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: NVIDIA A10G
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/phi_2/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | flash_attn
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/sage/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import GNN
 2 | from torchbenchmark.util.framework.gnn.model_factory import GNNModel
 3 | 
 4 | 
 5 | class Model(GNNModel):
 6 |     task = GNN.CLASSIFICATION
 7 |     DEFAULT_TRAIN_BSIZE = 64
 8 |     DEFAULT_EVAL_BSIZE = 64
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             model_name="sage",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 |         if device == "cuda":
19 |             # TODO - Add CUDA support
20 |             raise NotImplementedError("Sage doesn't support CUDA")
21 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/sage/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     s3_utils.checkout_s3_data(
6 |         "INPUT_TARBALLS", "Reddit_minimal.tar.gz", decompress=True
7 |     )
8 |     pip_install_requirements()
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/sage/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   cpu:
3 |     eval_batch_size: 64
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/sage/requirements.txt:
--------------------------------------------------------------------------------
1 | pyg_lib
2 | torch_scatter
3 | torch_sparse
4 | pyg-nightly
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/stable_diffusion_xl/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import warnings
 3 | 
 4 | import torch
 5 | from torchbenchmark.util.framework.diffusers import install_diffusers
 6 | 
 7 | MODEL_NAME = "stabilityai/stable-diffusion-2"
 8 | 
 9 | 
10 | def load_model_checkpoint():
11 |     from diffusers import StableDiffusionPipeline
12 | 
13 |     StableDiffusionPipeline.from_pretrained(
14 |         MODEL_NAME, torch_dtype=torch.float16, safety_checker=None
15 |     )
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     if not "HUGGING_FACE_HUB_TOKEN" in os.environ:
20 |         warnings.warn(
21 |             "Make sure to set `HUGGINGFACE_HUB_TOKEN` so you can download weights"
22 |         )
23 |     else:
24 |         install_diffusers()
25 |         load_model_checkpoint()
26 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/stable_diffusion_xl/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 | - device: cpu
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/torchrec_dlrm/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/torchrec_dlrm/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | skip_cuda_memory_leak: true
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/torchrec_dlrm/origin:
--------------------------------------------------------------------------------
1 | https://github.com/facebookresearch/dlrm


--------------------------------------------------------------------------------
/torchbenchmark/canary_models/torchrec_dlrm/requirements.txt:
--------------------------------------------------------------------------------
1 | torchrec-nightly
2 | fbgemm-gpu-nightly
3 | pyre-extensions
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/data/index.yaml:
--------------------------------------------------------------------------------
 1 | INPUT_TARBALLS:
 2 |   # index file for S3 storage of the input data
 3 |   - Background_Matting_inputs.tar.gz
 4 |   - coco128.tar.gz
 5 |   - multi30k.tar.gz
 6 |   - tacotron2-minimal.tar.gz
 7 |   - coco2017-minimal.tar.gz
 8 |   - pytorch_stargan_inputs.tar.gz
 9 |   - LearningToPaint_inputs.tar.gz
10 |   - pytorch_CycleGAN_and_pix2pix_inputs.tar.gz
11 |   - Super_SloMo_inputs.tar.gz
12 |   - speech_transformer_inputs.tar.gz
13 |   - Reddit_minimal.tar.gz
14 |   - sam_inputs.tar.gz
15 | MODEL_PKLS:
16 |   - drq/obs.pkl
17 |   - maml_omniglot/batch.pt
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/fambench_xlmr/install.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | 
 5 | def pip_install_requirements():
 6 |     subprocess.check_call(
 7 |         [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"]
 8 |     )
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     pip_install_requirements()
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/fambench_xlmr/requirements.txt:
--------------------------------------------------------------------------------
1 | bitarray
2 | sacrebleu>=1.4.12
3 | omegaconf
4 | hydra-core
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/hf_bert/install.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | 
 5 | def pip_install_requirements():
 6 |     subprocess.check_call(
 7 |         [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"]
 8 |     )
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     pip_install_requirements()
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/hf_bert/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | datasets >= 1.8.0
3 | sentencepiece != 0.1.92
4 | scipy
5 | scikit-learn
6 | protobuf
7 | torch
8 | evaluate
9 | sacrebleu


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/hf_t5/install.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | 
 5 | def pip_install_requirements():
 6 |     subprocess.check_call(
 7 |         [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"]
 8 |     )
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     pip_install_requirements()
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/e2e_models/hf_t5/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | datasets >= 1.8.0
3 | torch
4 | evaluate
5 | transformers
6 | numpy


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/Makefile:
--------------------------------------------------------------------------------
1 | package:
2 | 	python setup.py sdist
3 | 	python setup.py bdist_wheel
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import BERTDataset
2 | from .vocab import WordVocab
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .bert import BERT
2 | from .language_model import BERTLM
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/attention/__init__.py:
--------------------------------------------------------------------------------
1 | from .multi_head import MultiHeadedAttention
2 | from .single import Attention
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/__init__.py:
--------------------------------------------------------------------------------
1 | from .bert import BERTEmbedding
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/segment.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | 
3 | 
4 | class SegmentEmbedding(nn.Embedding):
5 |     def __init__(self, embed_size=512):
6 |         super().__init__(3, embed_size, padding_idx=0)
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/token.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | 
3 | 
4 | class TokenEmbedding(nn.Embedding):
5 |     def __init__(self, vocab_size, embed_size=512):
6 |         super().__init__(vocab_size, embed_size, padding_idx=0)
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .feed_forward import PositionwiseFeedForward
2 | from .layer_norm import LayerNorm
3 | from .sublayer import SublayerConnection
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/feed_forward.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class PositionwiseFeedForward(nn.Module):
 5 |     "Implements FFN equation."
 6 | 
 7 |     def __init__(self, d_model, d_ff, dropout=0.1):
 8 |         super(PositionwiseFeedForward, self).__init__()
 9 |         self.w_1 = nn.Linear(d_model, d_ff)
10 |         self.w_2 = nn.Linear(d_ff, d_model)
11 |         self.dropout = nn.Dropout(dropout)
12 |         self.activation = nn.GELU()
13 | 
14 |     def forward(self, x):
15 |         return self.w_2(self.dropout(self.activation(self.w_1(x))))
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/layer_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class LayerNorm(nn.Module):
 6 |     "Construct a layernorm module (See citation for details)."
 7 | 
 8 |     def __init__(self, features, eps=1e-6):
 9 |         super(LayerNorm, self).__init__()
10 |         self.a_2 = nn.Parameter(torch.ones(features))
11 |         self.b_2 = nn.Parameter(torch.zeros(features))
12 |         self.eps = eps
13 | 
14 |     def forward(self, x):
15 |         mean = x.mean(-1, keepdim=True)
16 |         std = x.std(-1, keepdim=True)
17 |         return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/sublayer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from .layer_norm import LayerNorm
 4 | from .tensor2tensor import TensorToTensor
 5 | 
 6 | 
 7 | class SublayerConnection(nn.Module):
 8 |     """
 9 |     A residual connection followed by a layer norm.
10 |     Note for code simplicity the norm is first as opposed to last.
11 |     """
12 | 
13 |     def __init__(self, size, dropout):
14 |         super(SublayerConnection, self).__init__()
15 |         self.norm = LayerNorm(size)
16 |         self.dropout = nn.Dropout(dropout)
17 | 
18 |     def forward(self, x, sublayer: TensorToTensor):
19 |         "Apply residual connection to any sublayer with the same size."
20 |         return x + self.dropout(sublayer.forward(self.norm(x)))
21 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/tensor2tensor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | 
4 | @torch.jit.interface
5 | class TensorToTensor(torch.nn.Module):
6 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
7 |         pass
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/bert_pytorch/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from .pretrain import BERTTrainer
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/install.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | 
 5 | def setup_install():
 6 |     subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", "."])
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     setup_install()
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/install.sh:
--------------------------------------------------------------------------------
1 | python setup.py install
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 32
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/wconstab/BERT-pytorch
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | numpy
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/run.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0,1 bert -c data/corpus.small -v data/vocab.small -o bert.model $@
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/BERT_pytorch/test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | 
3 | 
4 | class BERTVocabTestCase(unittest.TestCase):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | *.pth
3 | ak/
4 | ak.png
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/Data_adobe/prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # ./prepare.sh /path/to/Adobe/Combined_Dataset
 3 | 
 4 | function copy_adobe() {
 5 |   while read p; do
 6 |     if [ -f "$1/Other/fg/$p" ]; then
 7 |       cp $1/Other/fg/$p fg_$2
 8 |       cp $1/Other/alpha/"$p" mask_$2
 9 |     else
10 |       cp $1/Adobe-licensed\ images/fg/"$p" fg_$2
11 |       cp $1/Adobe-licensed\ images/alpha/"$p" mask_$2
12 |     fi
13 |   done <$2_data_list.txt
14 | }
15 | mkdir -p fg_train fg_test mask_train mask_test merged_train merged_test
16 | copy_adobe "$1/Test_set" test
17 | copy_adobe "$1/Training_set" train
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/Data_adobe/test_data_list.txt:
--------------------------------------------------------------------------------
 1 | woman-morning-bathrobe-bathroom.png
 2 | woman-952506_1920 (1).png
 3 | girl-1219339_1920.png
 4 | wedding-dresses-1486260_1280.png
 5 | long-1245787_1920.png
 6 | pexels-photo-58463.png
 7 | girl-beautiful-young-face-53000.png
 8 | boy-1518482_1920.png
 9 | girl-1467820_1280.png
10 | model-600238_1920.png
11 | sea-sunny-person-beach.png
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/install.py:
--------------------------------------------------------------------------------
 1 | from utils import python_utils, s3_utils
 2 | 
 3 | 
 4 | def pip_install_requirements():
 5 |     python_utils.pip_install_requirements("requirements.txt")
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     pip_install_requirements()
10 |     s3_utils.checkout_s3_data(
11 |         "INPUT_TARBALLS", "Background_Matting_inputs.tar.gz", decompress=True
12 |     )
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/install.sh:
--------------------------------------------------------------------------------
1 | python -m pip install -r requirements.txt
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/metadata.yaml:
--------------------------------------------------------------------------------
1 | train_benchmark: true
2 | train_deterministic: false
3 | not_implemented:
4 |   # Disabled due to excessively slow runtime - see GH Issue #100
5 |   - test: train
6 |     device: cpu
7 |   - test: example
8 |     device: cpu
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/bertmaher/Background-Matting.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | opencv-python
3 | pandas
4 | Pillow
5 | scikit-image
6 | scipy
7 | tqdm
8 | tensorboardX
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Background_Matting/run.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0,1 python train_real_fixed.py -n Real_fixed -bs 4 -res 512 $@
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.py~
3 | 
4 | data/
5 | *.pkl
6 | 
7 | output/*
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/baseline/Renderer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/LearningToPaint/baseline/Renderer/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/baseline_modelfree/Renderer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/LearningToPaint/baseline_modelfree/Renderer/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/install.py:
--------------------------------------------------------------------------------
 1 | from utils import s3_utils
 2 | from utils.python_utils import pip_install_requirements
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     s3_utils.checkout_s3_data(
 7 |         "INPUT_TARBALLS", "Super_SloMo_inputs.tar.gz", decompress=True
 8 |     )
 9 |     pip_install_requirements()
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/install.sh:
--------------------------------------------------------------------------------
1 | python -m pip install -r requirements.txt
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 256
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/nikithamalgifb/LearningToPaint.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboardX
2 | opencv-python
3 | Pillow
4 | scipy
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/LearningToPaint/run.sh:
--------------------------------------------------------------------------------
1 | python3 baseline/train_renderer.py $@
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Super_SloMo/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     s3_utils.checkout_s3_data(
6 |         "INPUT_TARBALLS", "Super_SloMo_inputs.tar.gz", decompress=True
7 |     )
8 |     pip_install_requirements()
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Super_SloMo/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip install -r requirements.txt
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Super_SloMo/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 8
 4 | eval_benchmark: false
 5 | eval_deterministic: true
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: cuda
10 |   test: eval
11 | train_benchmark: false
12 | train_deterministic: true
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Super_SloMo/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/wconstab/Super-SloMo.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Super_SloMo/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboardX
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/Super_SloMo/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python train.py --dataset_root dataset --checkpoint_dir checkpoints --epochs 1 "$@"
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/alexnet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/alexnet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/alexnet/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1024
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_edgecnn/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel
 2 | 
 3 | 
 4 | class Model(BasicGNNModel):
 5 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
 6 |         super().__init__(
 7 |             model_name="edgecnn",
 8 |             test=test,
 9 |             device=device,
10 |             batch_size=batch_size,
11 |             extra_args=extra_args,
12 |         )
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_edgecnn/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric
2 | 
3 | if __name__ == "__main__":
4 |     install_pytorch_geometric()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_edgecnn/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_gcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel
 2 | 
 3 | 
 4 | class Model(BasicGNNModel):
 5 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
 6 |         super().__init__(
 7 |             model_name="gcn",
 8 |             test=test,
 9 |             device=device,
10 |             batch_size=batch_size,
11 |             extra_args=extra_args,
12 |         )
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_gcn/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric
2 | 
3 | if __name__ == "__main__":
4 |     install_pytorch_geometric()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_gcn/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_gin/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel
 2 | 
 3 | 
 4 | class Model(BasicGNNModel):
 5 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
 6 |         super().__init__(
 7 |             model_name="gin",
 8 |             test=test,
 9 |             device=device,
10 |             batch_size=batch_size,
11 |             extra_args=extra_args,
12 |         )
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_gin/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric
2 | 
3 | if __name__ == "__main__":
4 |     install_pytorch_geometric()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_gin/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_sage/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel
 2 | 
 3 | 
 4 | class Model(BasicGNNModel):
 5 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
 6 |         super().__init__(
 7 |             model_name="sage",
 8 |             test=test,
 9 |             device=device,
10 |             batch_size=batch_size,
11 |             extra_args=extra_args,
12 |         )
13 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_sage/install.py:
--------------------------------------------------------------------------------
1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric
2 | 
3 | if __name__ == "__main__":
4 |     install_pytorch_geometric()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/basic_gnn_sage/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/cm3leon_generate/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dcgan/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dcgan/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1024
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dcgan/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.egg-info
 3 | build
 4 | dist
 5 | Session.vim
 6 | *.log
 7 | trash
 8 | tex
 9 | demucs_release
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/check.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | import torch
4 | 
5 | a = torch.load(sys.argv[1])
6 | b = torch.load(sys.argv[2])
7 | torch.testing.assert_allclose(a, b, rtol=0.01, atol=0.01)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/demucs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/environment-cpu.yml:
--------------------------------------------------------------------------------
 1 | name: demucs
 2 | 
 3 | channels:
 4 |   - conda-forge
 5 |   - pytorch
 6 | 
 7 | dependencies:
 8 |   - python=3.7
 9 |   - ffmpeg==4.2
10 |   - pytorch=1.4.0
11 |   - scipy==1.3.1
12 |   - tqdm>=4.36.1
13 |   - pip
14 |   - pip:
15 |     - lameenc==1.2.2
16 |     - musdb==0.3.1
17 |     - museval==0.3.0
18 |     - requests==2.22
19 |     - treetable==0.2.3
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/environment-cuda.yml:
--------------------------------------------------------------------------------
 1 | name: demucs
 2 | 
 3 | channels:
 4 |   - conda-forge
 5 |   - pytorch
 6 | 
 7 | dependencies:
 8 |   - python=3.7
 9 |   - cudatoolkit=10
10 |   - ffmpeg==4.2
11 |   - pytorch=1.4.0
12 |   - scipy==1.3.1
13 |   - tqdm>=4.36.1
14 |   - pip
15 |   - pip:
16 |     - lameenc==1.2.2
17 |     - musdb==0.3.1
18 |     - museval==0.3.0
19 |     - requests==2.22
20 |     - treetable==0.2.3
21 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: true
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 |   test: train
10 | - device: cuda
11 |   test: train
12 | - device: cuda
13 |   test: eval
14 | train_benchmark: true
15 | train_deterministic: false
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/zheng-xq/demucs.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/requirements.txt:
--------------------------------------------------------------------------------
1 | ffmpeg-python
2 | scipy
3 | tqdm
4 | lameenc
5 | musdb
6 | museval
7 | requests
8 | treetable
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/run.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | set -e
 3 | 
 4 | if [ -f metadata/musdb.json ]; then
 5 |   rm metadata/musdb.json
 6 | fi
 7 | 
 8 | for f in checkpoints evals logs models; do
 9 |     if [ -d $f ]; then
10 |         rm -r $f
11 |     fi
12 | done
13 | 
14 | python3 -m demucs --musdb "$(pwd)/sample_data/" \
15 |     --batch_size 1 \
16 |     --device cuda \
17 |     --workers 1 \
18 |     --eval_workers 1 \
19 |     --restart \
20 |     --remix_group_size 1 \
21 |     --samples 100000 \
22 |     --repeat 1 \
23 |     --epochs 1 \
24 |     "$@"
25 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/run_overall.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | set -e
 3 | 
 4 | time bash run.sh --debug reference_0.out
 5 | time bash run.sh --debug reference_1.out
 6 | python check.py reference_0.out reference_1.out
 7 | 
 8 | time bash run.sh --script --debug jit.out
 9 | python check.py reference_0.out jit.out
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/demucs/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 100
3 | 
4 | [flake8]
5 | max-line-length = 100
6 | 
7 | [yapf]
8 | column_limit = 100
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/densenet121/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/densenet121/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/densenet121/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 64
 4 | eval_benchmark: true
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cuda
 9 | - device: cpu
10 | train_benchmark: true
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_101_c4/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_101_c4/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_101_dc5/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_101_dc5/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_101_fpn/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_101_fpn/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_50_c4/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_50_c4/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_50_dc5/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_50_dc5/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_50_fpn/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fasterrcnn_r_50_fpn/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fcos_r_50_fpn/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_fcos_r_50_fpn/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn/origin:
--------------------------------------------------------------------------------
1 | origin https://github.com/facebookresearch/detectron2
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_101_c4/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_101_c4/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 2
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_101_fpn/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_101_fpn/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_50_c4/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_50_c4/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_50_fpn/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2
 4 | 
 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | 
 8 | if __name__ == "__main__":
 9 |     install_detectron2(MODEL_NAME, MODEL_DIR)
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/detectron2_maskrcnn_r_50_fpn/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dlrm/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dlrm/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | ARG FROM_IMAGE_NAME=pytorch/pytorch:1.3-cuda10.1-cudnn7-runtime
 7 | FROM ${FROM_IMAGE_NAME}
 8 | 
 9 | ADD requirements.txt .
10 | RUN pip install -r requirements.txt
11 | 
12 | RUN pip install torch==1.3.1
13 | 
14 | WORKDIR /code
15 | ADD . .
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dlrm/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dlrm/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: true
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: true
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dlrm/origin:
--------------------------------------------------------------------------------
1 | https://github.com/facebookresearch/dlrm.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/dlrm/requirements.txt:
--------------------------------------------------------------------------------
1 | future
2 | numpy
3 | onnx
4 | pydot
5 | scikit-learn
6 | tqdm
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/doctr_det_predictor/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 |   - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/doctr_det_predictor/requirements.txt:
--------------------------------------------------------------------------------
1 | python-doctr
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/doctr_reco_predictor/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 64
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 |   - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/doctr_reco_predictor/requirements.txt:
--------------------------------------------------------------------------------
1 | python-doctr
2 | rapidfuzz
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/drq/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     pip_install_requirements()
6 |     s3_utils.checkout_s3_data("MODEL_PKLS", "drq/obs.pkl", decompress=False)
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/drq/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/drq/origin:
--------------------------------------------------------------------------------
1 | https://github.com/denisyarats/drq/tree/365e5969f9a748ad63d854ce8c8cc8f90c1de4e0
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/drq/requirements.txt:
--------------------------------------------------------------------------------
1 | kornia
2 | scikit-image
3 | gym
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/fastNLP_Bert/bert_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_probs_dropout_prob": 0.1, 
 3 |   "directionality": "bidi", 
 4 |   "hidden_act": "gelu", 
 5 |   "hidden_dropout_prob": 0.1, 
 6 |   "hidden_size": 768, 
 7 |   "initializer_range": 0.02, 
 8 |   "intermediate_size": 3072, 
 9 |   "max_position_embeddings": 512, 
10 |   "num_attention_heads": 12, 
11 |   "num_hidden_layers": 12, 
12 |   "pooler_fc_size": 768, 
13 |   "pooler_num_attention_heads": 12, 
14 |   "pooler_num_fc_layers": 3, 
15 |   "pooler_size_per_head": 128, 
16 |   "pooler_type": "first_token_transform", 
17 |   "type_vocab_size": 2, 
18 |   "vocab_size": 21128
19 | }
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/fastNLP_Bert/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import patch
 4 | from utils.python_utils import pip_install_requirements
 5 | 
 6 | 
 7 | def patch_fastnlp():
 8 |     import fastNLP
 9 | 
10 |     current_dir = os.path.dirname(os.path.abspath(__file__))
11 |     patch_file = os.path.join(current_dir, "fastnlp.patch")
12 |     fastNLP_dir = os.path.dirname(fastNLP.__file__)
13 |     fastNLP_target_file = os.path.join(fastNLP_dir, "embeddings", "bert_embedding.py")
14 |     p = patch.fromfile(patch_file)
15 |     if not p.apply(strip=1, root=fastNLP_dir):
16 |         print("Failed to patch fastNLP. Exit.")
17 |         exit(1)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     pip_install_requirements()
22 |     patch_fastnlp()
23 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/fastNLP_Bert/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 16
4 | eval_benchmark: false
5 | eval_deterministic: true
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: true
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/fastNLP_Bert/requirements.txt:
--------------------------------------------------------------------------------
1 | fastNLP==0.6.0
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/functorch_dp_cifar10/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/functorch_dp_cifar10/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 512
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/functorch_dp_cifar10/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/functorch_dp_cifar10/requirements.txt


--------------------------------------------------------------------------------
/torchbenchmark/models/functorch_maml_omniglot/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     pip_install_requirements()
6 |     s3_utils.checkout_s3_data("MODEL_PKLS", "maml_omniglot/batch.pt", decompress=False)
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/functorch_maml_omniglot/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/functorch_maml_omniglot/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/functorch_maml_omniglot/requirements.txt


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Albert/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 8
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Albert",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Albert/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Albert/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 16
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Albert/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bart/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Bart",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bart/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bart/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 8
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bart/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Bert",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 8
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert_large/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Bert_large",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert_large/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert_large/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 4
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Bert_large/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_BigBird/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 2
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_BigBird",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_BigBird/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_BigBird/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 4
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_BigBird/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_DistilBert/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 8
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_DistilBert",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_DistilBert/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_DistilBert/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 16
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_DistilBert/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_GPT2",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 16
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2_large/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_GPT2_large",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2_large/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2_large/metadata.yaml:
--------------------------------------------------------------------------------
 1 | eval_benchmark: false
 2 | eval_deterministic: false
 3 | eval_nograd: true
 4 | train_benchmark: false
 5 | train_deterministic: false
 6 | not_implemented:
 7 |   # OOMs on torchbench CI
 8 |   - device: cuda
 9 |   # CPU OOM on torchbench CI
10 |   - device: cpu
11 |     test: train
12 |   # CPU OOM on torchbench CI accuracy
13 |   - device: cpu
14 |     test: example
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_GPT2_large/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Longformer/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 2
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Longformer",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Longformer/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Longformer/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 4
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Longformer/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Reformer/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 8
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Reformer",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Reformer/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Reformer/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 8
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Reformer/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Roberta_base/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel
 3 | 
 4 | 
 5 | class Model(HuggingFaceModel):
 6 |     task = NLP.LANGUAGE_MODELING
 7 |     DEFAULT_TRAIN_BSIZE = 4
 8 |     DEFAULT_EVAL_BSIZE = 1
 9 | 
10 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
11 |         super().__init__(
12 |             name="hf_Roberta_base",
13 |             test=test,
14 |             device=device,
15 |             batch_size=batch_size,
16 |             extra_args=extra_args,
17 |         )
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Roberta_base/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import sys
 4 | 
 5 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 6 |     cache_model,
 7 |     patch_transformers,
 8 | )
 9 | 
10 | 
11 | def pip_install_requirements():
12 |     subprocess.check_call(
13 |         [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"]
14 |     )
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     pip_install_requirements()
19 |     patch_transformers()
20 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
21 |     cache_model(model_name)
22 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Roberta_base/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 8
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Roberta_base/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 4
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 |   - device: cpu
 9 |     test: train
10 |   - device: cuda
11 |     test: train
12 | train_benchmark: false
13 | train_deterministic: false
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_base/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_base/metadata.yaml:
--------------------------------------------------------------------------------
 1 | eval_benchmark: false
 2 | eval_deterministic: false
 3 | eval_nograd: true
 4 | train_benchmark: false
 5 | train_deterministic: false
 6 | not_implemented:
 7 |   # disable train test because of CI infra capacity issue
 8 |   - device: cpu
 9 |     test: train
10 |   - device: cuda
11 |     test: train
12 |   # CPU OOM on torchbench CI accuracy
13 |   - device: cpu
14 |     test: example
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_base/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_generate/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.util.framework.huggingface.model_factory import (
 2 |     HuggingFaceGenerationModel,
 3 | )
 4 | 
 5 | 
 6 | class Model(HuggingFaceGenerationModel):
 7 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
 8 |         super().__init__(
 9 |             name="hf_T5_generate",
10 |             test=test,
11 |             device=device,
12 |             batch_size=batch_size,
13 |             extra_args=extra_args,
14 |         )
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_generate/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_generate/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_large/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_large/metadata.yaml:
--------------------------------------------------------------------------------
 1 | eval_benchmark: false
 2 | eval_deterministic: false
 3 | eval_nograd: true
 4 | train_benchmark: false
 5 | train_deterministic: false
 6 | not_implemented:
 7 |   # disable train test because of CI infra capacity issue
 8 |   - device: cpu
 9 |     test: train
10 |   - device: cuda
11 |     test: train
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_T5_large/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Whisper/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | 
 8 | if __name__ == "__main__":
 9 |     patch_transformers()
10 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
11 |     cache_model(model_name)
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_Whisper/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 8
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - test: train
10 | train_benchmark: false
11 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_clip/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 32
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_distil_whisper/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_distil_whisper/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 16
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/hf_distil_whisper/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece
2 | datasets
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/lennard_jones/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/lennard_jones/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1000
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/lennard_jones/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/lennard_jones/requirements.txt


--------------------------------------------------------------------------------
/torchbenchmark/models/llama/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/llama/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 32
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/llama/origin:
--------------------------------------------------------------------------------
1 | origin https://github.com/facebookresearch/llama


--------------------------------------------------------------------------------
/torchbenchmark/models/llama/requirements.txt:
--------------------------------------------------------------------------------
1 | sentencepiece


--------------------------------------------------------------------------------
/torchbenchmark/models/llama_v2_7b_16h/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import NLP
 2 | from torchbenchmark.util.framework.huggingface.model_factory import (
 3 |     HuggingFaceAuthMixin,
 4 |     HuggingFaceModel,
 5 | )
 6 | 
 7 | 
 8 | class Model(HuggingFaceModel, HuggingFaceAuthMixin):
 9 |     task = NLP.LANGUAGE_MODELING
10 |     DEFAULT_TRAIN_BSIZE = 1
11 |     DEFAULT_EVAL_BSIZE = 1
12 |     DEEPCOPY = False
13 | 
14 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
15 |         HuggingFaceAuthMixin.__init__(self)
16 |         super().__init__(
17 |             name="llama_v2_7b_16h",
18 |             test=test,
19 |             device=device,
20 |             batch_size=batch_size,
21 |             extra_args=extra_args,
22 |         )
23 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/llama_v2_7b_16h/install.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model
4 | 
5 | if __name__ == "__main__":
6 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
7 |     cache_model(model_name)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/llama_v2_7b_16h/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | - device: NVIDIA A10G
10 | train_benchmark: false
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/llava/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | if __name__ == "__main__":
10 |     pip_install_requirements()
11 |     patch_transformers()
12 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
13 |     cache_model(model_name)
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/llava/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: NVIDIA A10G
11 |   # CPU OOM on the CI runner
12 |   - device: cpu
13 |   # accuracy test OOM on CUDA
14 |   - device: cuda
15 |     test: example
16 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/llava/requirements.txt:
--------------------------------------------------------------------------------
1 | einops


--------------------------------------------------------------------------------
/torchbenchmark/models/maml/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/maml/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/maml/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: true
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: true


--------------------------------------------------------------------------------
/torchbenchmark/models/maml/origin:
--------------------------------------------------------------------------------
1 | https://github.com/dragen1860/MAML-Pytorch
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/maml_omniglot/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     pip_install_requirements()
6 |     s3_utils.checkout_s3_data("MODEL_PKLS", "maml_omniglot/batch.pt", decompress=False)
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/maml_omniglot/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/maml_omniglot/requirements.txt:
--------------------------------------------------------------------------------
1 | higher
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/microbench_unbacked_tolist_sum/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/mnasnet1_0/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="mnasnet1_0",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.MNASNet1_0_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/mnasnet1_0/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mnasnet1_0/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/mnasnet1_0/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v2/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 96
 9 |     DEFAULT_EVAL_BSIZE = 16
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="mobilenet_v2",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.MobileNet_V2_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v2/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mobilenet_v2/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v2/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v2_quantized_qat/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mobilenet_v2_quantized_qat/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v2_quantized_qat/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: true
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | not_implemented:
7 |   # Disable CUDA train test because of insufficient GPU memory on CI machine
8 |   - test: train
9 |     device: cuda


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v3_large/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="mobilenet_v3_large",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.MobileNet_V3_Large_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v3_large/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mobilenet_v3_large/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/mobilenet_v3_large/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/detection/configs/Base-RCNN-C4-BN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeadsExtraNorm"
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 |   RESNETS:
11 |     NORM: "SyncBN"
12 | TEST:
13 |   PRECISE_BN:
14 |     ENABLED: True
15 | SOLVER:
16 |   IMS_PER_BATCH: 16
17 |   BASE_LR: 0.02
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/detection/configs/coco_R_50_C4_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-C4-BN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 | INPUT:
 6 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
 7 |   MIN_SIZE_TEST: 800
 8 | DATASETS:
 9 |   TRAIN: ("coco_2017_train",)
10 |   TEST: ("coco_2017_val",)
11 | SOLVER:
12 |   STEPS: (120000, 160000)
13 |   MAX_ITER: 180000
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/detection/configs/coco_R_50_C4_2x_moco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "coco_R_50_C4_2x.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "See Instructions"
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 | INPUT:
 9 |   FORMAT: "RGB"
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/detection/configs/pascal_voc_R_50_C4_24k.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN-C4-BN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   ROI_HEADS:
 6 |     NUM_CLASSES: 20
 7 | INPUT:
 8 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
 9 |   MIN_SIZE_TEST: 800
10 | DATASETS:
11 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
12 |   TEST: ('voc_2007_test',)
13 | SOLVER:
14 |   STEPS: (18000, 22000)
15 |   MAX_ITER: 24000
16 |   WARMUP_ITERS: 100
17 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/detection/configs/pascal_voc_R_50_C4_24k_moco.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "pascal_voc_R_50_C4_24k.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "See Instructions"
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 | INPUT:
 9 |   FORMAT: "RGB"
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/install.py:
--------------------------------------------------------------------------------
1 | # only needs torch and torchvision
2 | 
3 | if __name__ == "__main__":
4 |     pass
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 64
 4 | eval_benchmark: true
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: true
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/moco/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/nickgg/moco.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/moco/requirements.txt


--------------------------------------------------------------------------------
/torchbenchmark/models/moco/run.sh:
--------------------------------------------------------------------------------
1 | debug_arg=""
2 | if [ $# -gt 1 ]; then
3 |         if [ "$1" == "--debug" ]; then
4 |                 debug_arg="-d $2"
5 |         fi
6 | fi
7 | CUDA_VISIBLE_DEVICES=0 python main_moco.py   -a resnet50   --lr 0.03   --batch-size 32   --dist-url 'tcp://localhost:10001' --multiprocessing-distributed --world-size 1 --rank 0  --fake_data --epochs 2 --seed 1058467 $debug_arg dummy
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moondream/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torchbenchmark.util.framework.huggingface.patch_hf import (
 4 |     cache_model,
 5 |     patch_transformers,
 6 | )
 7 | from utils.python_utils import pip_install_requirements
 8 | 
 9 | 
10 | if __name__ == "__main__":
11 |     pip_install_requirements()
12 |     patch_transformers()
13 |     model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
14 |     cache_model(model_name)
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moondream/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: NVIDIA A10G
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/moondream/requirements.txt:
--------------------------------------------------------------------------------
1 | einops


--------------------------------------------------------------------------------
/torchbenchmark/models/nanogpt/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nanogpt/origin:
--------------------------------------------------------------------------------
1 | https://github.com/karpathy/nanoGPT


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/install.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import sys
3 | 
4 | from utils.python_utils import pip_install_requirements
5 | 
6 | if __name__ == "__main__":
7 |     pip_install_requirements()
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 512
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/nvidia/deeprecommender
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/reco_encoder/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017 NVIDIA Corporation
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/reco_encoder/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017 NVIDIA Corporation
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/reco_encoder/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017 NVIDIA Corporation
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/nvidia_deeprecommender/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/opacus_cifar10/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/opacus_cifar10/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 512
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/opacus_cifar10/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/pytorch/functorch.git
2 | # must include the fix https://github.com/pytorch/opacus/pull/426
3 | # Pinning to 1.5.3. Remove once is resolved https://github.com/pytorch/pytorch/issues/154446
4 | opacus>=1.1.2
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/phlippe_densenet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/phlippe_densenet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/phlippe_densenet/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 128
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cuda
 9 | - device: cpu
10 | train_benchmark: false
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/phlippe_resnet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/phlippe_resnet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/phlippe_resnet/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 256
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_equation_of_state/install.py:
--------------------------------------------------------------------------------
1 | if __name__ == "__main__":
2 |     pass
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_equation_of_state/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: true
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_equation_of_state/origin:
--------------------------------------------------------------------------------
1 | https://github.com/dionhaefner/pyhpc-benchmarks+650ecc650e394df829944ffcf09e9d646ec69691
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_isoneutral_mixing/install.py:
--------------------------------------------------------------------------------
1 | if __name__ == "__main__":
2 |     pass
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_isoneutral_mixing/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: true
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_isoneutral_mixing/origin:
--------------------------------------------------------------------------------
1 | https://github.com/dionhaefner/pyhpc-benchmarks+650ecc650e394df829944ffcf09e9d646ec69691
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_turbulent_kinetic_energy/install.py:
--------------------------------------------------------------------------------
1 | if __name__ == "__main__":
2 |     pass
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_turbulent_kinetic_energy/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: true
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pyhpc_turbulent_kinetic_energy/origin:
--------------------------------------------------------------------------------
1 | https://github.com/dionhaefner/pyhpc-benchmarks+650ecc650e394df829944ffcf09e9d646ec69691
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/docs/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.1-base
 2 | 
 3 | RUN apt update && apt install -y wget unzip curl bzip2 git
 4 | RUN curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
 5 | RUN bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b
 6 | RUN rm Miniconda3-latest-Linux-x86_64.sh
 7 | ENV PATH=/miniconda/bin:${PATH}
 8 | RUN conda update -y conda
 9 | 
10 | RUN conda install -y pytorch torchvision -c pytorch
11 | RUN mkdir /workspace/ && cd /workspace/ && git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix.git && cd pytorch-CycleGAN-and-pix2pix && pip install -r requirements.txt
12 | 
13 | WORKDIR /workspace
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pytorch-CycleGAN-and-pix2pix
 2 | channels:
 3 | - peterjc123
 4 | - defaults
 5 | dependencies:
 6 | - python=3.5.5
 7 | - pytorch=0.4.1
 8 | - scipy
 9 | - pip:
10 |   - dominate==2.3.1
11 |   - git+https://github.com/pytorch/vision.git
12 |   - Pillow==5.0.0
13 |   - numpy==1.14.1
14 |   - visdom==0.1.7
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/example_input.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/example_input.pt


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     s3_utils.checkout_s3_data(
6 |         "INPUT_TARBALLS", "pytorch_CycleGAN_and_pix2pix_inputs.tar.gz", decompress=True
7 |     )
8 |     pip_install_requirements()
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: true
2 | eval_deterministic: true
3 | eval_nograd: true
4 | train_benchmark: true
5 | train_deterministic: true
6 | not_implemented:
7 |   # Disabled due to excessively slow runtime - see GH Issue #100
8 |   - device: cpu


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/options/__init__.py:
--------------------------------------------------------------------------------
1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/zdevito/pytorch-CycleGAN-and-pix2pix
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/requirements.txt:
--------------------------------------------------------------------------------
1 | dominate>=2.3.1
2 | visdom>=0.1.8.3
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/conda_deps.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing
3 | conda install pytorch torchvision -c pytorch # add cuda90 if CUDA 9
4 | conda install visdom dominate -c conda-forge # install visdom and dominate
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/download_cyclegan_model.sh:
--------------------------------------------------------------------------------
 1 | FILE=$1
 2 | 
 3 | echo "Note: available models are apple2orange, orange2apple, summer2winter_yosemite, winter2summer_yosemite, horse2zebra, zebra2horse, monet2photo, style_monet, style_cezanne, style_ukiyoe, style_vangogh, sat2map, map2sat, cityscapes_photo2label, cityscapes_label2photo, facades_photo2label, facades_label2photo, iphone2dslr_flower"
 4 | 
 5 | echo "Specified [$FILE]"
 6 | 
 7 | mkdir -p ./checkpoints/${FILE}_pretrained
 8 | MODEL_FILE=./checkpoints/${FILE}_pretrained/latest_net_G.pth
 9 | URL=http://efrosgans.eecs.berkeley.edu/cyclegan/pretrained_models/$FILE.pth
10 | 
11 | wget -N $URL -O $MODEL_FILE
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/download_pix2pix_model.sh:
--------------------------------------------------------------------------------
 1 | FILE=$1
 2 | 
 3 | echo "Note: available models are edges2shoes, sat2map, map2sat, facades_label2photo, and day2night"
 4 | echo "Specified [$FILE]"
 5 | 
 6 | mkdir -p ./checkpoints/${FILE}_pretrained
 7 | MODEL_FILE=./checkpoints/${FILE}_pretrained/latest_net_G.pth
 8 | URL=http://efrosgans.eecs.berkeley.edu/pix2pix/models-pytorch/$FILE.pth
 9 | 
10 | wget -N $URL -O $MODEL_FILE
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/eval_cityscapes/download_fcn8s.sh:
--------------------------------------------------------------------------------
1 | URL=http://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/fcn-8s-cityscapes/fcn-8s-cityscapes.caffemodel
2 | OUTPUT_FILE=./scripts/eval_cityscapes/caffemodel/fcn-8s-cityscapes.caffemodel
3 | wget -N $URL -O $OUTPUT_FILE
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/install_deps.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | pip install visdom
3 | pip install dominate
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_colorization.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python test.py --dataroot ./datasets/colorization --name color_pix2pix --model colorization
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_cyclegan.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python test.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan --phase test --no_dropout
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_pix2pix.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python test.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --netG unet_256 --direction BtoA --dataset_mode aligned --norm batch
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_single.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python test.py --dataroot ./datasets/facades/testB/ --name facades_pix2pix --model test --netG unet_256 --direction BtoA --dataset_mode single --norm batch
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/train_colorization.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python train.py --dataroot ./datasets/colorization --name color_pix2pix --model colorization
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/train_cyclegan.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python train.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan --pool_size 50 --no_dropout
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/train_pix2pix.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 | python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --netG unet_256 --direction BtoA --lambda_L1 100 --dataset_mode aligned --norm batch --pool_size 0
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/util/__init__.py:
--------------------------------------------------------------------------------
1 | """This package includes a miscellaneous collection of useful helper functions."""
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_stargan/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     s3_utils.checkout_s3_data(
6 |         "INPUT_TARBALLS", "pytorch_stargan_inputs.tar.gz", decompress=True
7 |     )
8 |     pip_install_requirements()
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_stargan/logger.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class Logger:
 5 |     """Tensorboard logger."""
 6 | 
 7 |     def __init__(self, log_dir):
 8 |         """Initialize summary writer."""
 9 |         self.writer = tf.summary.create_file_writer(log_dir)
10 | 
11 |     def scalar_summary(self, tag, value, step):
12 |         """Add scalar summary."""
13 |         with self.writer.as_default():
14 |             tf.summary.scalar(tag, value, step=step)
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_stargan/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: true
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: true
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_stargan/requirements.txt:
--------------------------------------------------------------------------------
1 | # Nothing else aside from PyTorch!
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_stargan/run.sh:
--------------------------------------------------------------------------------
1 | debug_arg=""
2 | if [ $# -gt 1 ]; then
3 |   if [ "$1" == "--debug" ]; then
4 |     debug_arg="--debug $2"
5 |   fi
6 | fi
7 | python main.py --mode train --dataset CelebA --image_size 128 --c_dim 2 --sample_dir stargan_celeba/samples --log_dir stargan_celeba/logs --model_save_dir stargan_celeba/models --result_dir stargan_celeba/results --selected_attrs Male Young  --use_tensorboard False --num_iters 30 --should_script True --deterministic True $debug_arg
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements(requirements_txt="pytorch_unet/requirements.txt")
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 4
4 | eval_benchmark: false
5 | eval_deterministic: true
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/milesial/Pytorch-UNet.git


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:21.06-py3
 2 | 
 3 | RUN rm -rf /workspace/*
 4 | WORKDIR /workspace/unet
 5 | 
 6 | ADD requirements.txt .
 7 | RUN pip install --no-cache-dir --upgrade --pre pip
 8 | RUN pip install --no-cache-dir -r requirements.txt
 9 | ADD . .
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/pytorch_unet/pytorch_unet/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/hubconf.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from unet import UNet as _UNet
 3 | 
 4 | 
 5 | def unet_carvana(pretrained=False):
 6 |     """
 7 |     UNet model trained on the Carvana dataset ( https://www.kaggle.com/c/carvana-image-masking-challenge/data ).
 8 |     Set the scale to 0.5 (50%) when predicting.
 9 |     """
10 |     net = _UNet(n_channels=3, n_classes=2, bilinear=True)
11 |     if pretrained:
12 |         checkpoint = "https://github.com/milesial/Pytorch-UNet/releases/download/v2.0/unet_carvana_scale0.5_epoch1.pth"
13 |         net.load_state_dict(
14 |             torch.hub.load_state_dict_from_url(checkpoint, progress=True)
15 |         )
16 | 
17 |     return net
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | Pillow
4 | tqdm
5 | wandb
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/scripts/download_data.sh:
--------------------------------------------------------------------------------
 1 | echo -n "Kaggle username: "
 2 | read USERNAME
 3 | echo ""
 4 | echo -n "Kaggle API key: "
 5 | read APIKEY
 6 | 
 7 | pip install kaggle --upgrade
 8 | mkdir -p ~/.kaggle
 9 | echo "{\"username\":\"$USERNAME\",\"key\":\"$APIKEY\"}" > ~/.kaggle/kaggle.json
10 | 
11 | kaggle competitions download -c carvana-image-masking-challenge -f train_hq.zip
12 | unzip train_hq.zip
13 | mv train_hq/* data/imgs/
14 | rm -d train_hq
15 | rm train_hq.zip
16 | 
17 | kaggle competitions download -c carvana-image-masking-challenge -f train_masks.zip
18 | unzip train_masks.zip
19 | mv train_masks/* data/masks/
20 | rm -d train_masks
21 | rm train_masks.zip


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/unet/__init__.py:
--------------------------------------------------------------------------------
1 | from .unet_model import UNet
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/pytorch_unet/pytorch_unet/utils/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/models/pytorch_unet/pytorch_unet/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | 
 4 | def plot_img_and_mask(img, mask):
 5 |     classes = mask.shape[0] if len(mask.shape) > 2 else 1
 6 |     fig, ax = plt.subplots(1, classes + 1)
 7 |     ax[0].set_title("Input image")
 8 |     ax[0].imshow(img)
 9 |     if classes > 1:
10 |         for i in range(classes):
11 |             ax[i + 1].set_title(f"Output mask (class {i + 1})")
12 |             ax[i + 1].imshow(mask[:, :, i])
13 |     else:
14 |         ax[1].set_title(f"Output mask")
15 |         ax[1].imshow(mask)
16 |     plt.xticks([]), plt.yticks([])
17 |     plt.show()
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet152/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="resnet152",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.ResNet152_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet152/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet152/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet152/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 64
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet18/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 16
 9 |     DEFAULT_EVAL_BSIZE = 8
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="resnet18",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.ResNet18_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet18/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet18/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet18/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 256
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet50/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="resnet50",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.ResNet50_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet50/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet50/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet50/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 64
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet50_quantized_qat/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet50_quantized_qat/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/resnet50_quantized_qat/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: true
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnext50_32x4d/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 8
 9 |     DEFAULT_EVAL_BSIZE = 8
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="resnext50_32x4d",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/resnext50_32x4d/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnext50_32x4d/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/resnext50_32x4d/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 64
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/sam/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 32
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/sam/origin:
--------------------------------------------------------------------------------
1 | origin https://github.com/facebookresearch/segment-anything


--------------------------------------------------------------------------------
/torchbenchmark/models/sam/requirements.txt:
--------------------------------------------------------------------------------
1 | # Actually needed
2 | opencv-python
3 | pycocotools
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/sam_fast/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 |   - device: cpu
11 |   - device: cuda
12 |     test: example
13 | skip_cuda_memory_leak: true
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/sam_fast/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/pytorch-labs/segment-anything-fast.git
2 | opencv-python
3 | pycocotools
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/shufflenet_v2_x1_0/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel
 3 | from torchvision import models
 4 | 
 5 | 
 6 | class Model(TorchVisionModel):
 7 |     task = COMPUTER_VISION.CLASSIFICATION
 8 |     DEFAULT_TRAIN_BSIZE = 128
 9 |     DEFAULT_EVAL_BSIZE = 64
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             model_name="shufflenet_v2_x1_0",
14 |             test=test,
15 |             device=device,
16 |             batch_size=batch_size,
17 |             weights=models.ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1,
18 |             extra_args=extra_args,
19 |         )
20 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/shufflenet_v2_x1_0/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/shufflenet_v2_x1_0/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/shufflenet_v2_x1_0/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/simple_gpt/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/simple_gpt/origin:
--------------------------------------------------------------------------------
1 | https://github.com/pytorch-labs/simple_gpt/
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/simple_gpt_tp_manual/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/simple_gpt_tp_manual/origin:
--------------------------------------------------------------------------------
1 | https://github.com/pytorch-labs/simple_gpt/
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/soft_actor_critic/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/soft_actor_critic/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: false
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: false
5 | train_deterministic: false


--------------------------------------------------------------------------------
/torchbenchmark/models/soft_actor_critic/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | pygame
3 | tensorboardX
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     s3_utils.checkout_s3_data(
6 |         "INPUT_TARBALLS", "speech_transformer_inputs.tar.gz", decompress=True
7 |     )
8 |     pip_install_requirements()
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 | train_benchmark: false
10 | train_deterministic: false
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/kaituoxu/Speech-Transformer
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/requirements.txt:
--------------------------------------------------------------------------------
1 | kaldi_io
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/speech_transformer/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import *
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/speech_transformer/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import *
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/speech_transformer/speech_transformer/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/squeezenet1_1/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/squeezenet1_1/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/squeezenet1_1/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 256
 4 | eval_benchmark: true
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 |   - device: cpu
 9 |     test: train
10 |   - device: cuda
11 |     test: train
12 | train_benchmark: true
13 | train_deterministic: false
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/stable_diffusion_text_encoder/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 | - device: cpu
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/stable_diffusion_unet/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import warnings
 3 | 
 4 | import torch
 5 | from torchbenchmark.util.framework.diffusers import install_diffusers
 6 | 
 7 | MODEL_NAME = "stabilityai/stable-diffusion-2"
 8 | 
 9 | 
10 | def load_model_checkpoint():
11 |     from diffusers import StableDiffusionPipeline
12 | 
13 |     StableDiffusionPipeline.from_pretrained(
14 |         MODEL_NAME, torch_dtype=torch.float16, safety_checker=None
15 |     )
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     install_diffusers()
20 |     if not "HUGGING_FACE_HUB_TOKEN" in os.environ:
21 |         warnings.warn(
22 |             "Make sure to set `HUGGINGFACE_HUB_TOKEN` so you can download weights"
23 |         )
24 |     else:
25 |         load_model_checkpoint()
26 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/stable_diffusion_unet/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 32
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | train_benchmark: false
 8 | train_deterministic: false
 9 | not_implemented:
10 | - device: cpu
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "waveglow"]
2 | 	path = waveglow
3 | 	url = https://github.com/NVIDIA/waveglow
4 | 	branch = master
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:nightly-devel-cuda10.0-cudnn7
 2 | ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
 3 | 
 4 | RUN apt-get update -y
 5 | 
 6 | RUN pip install numpy scipy matplotlib librosa==0.6.0 tensorflow tensorboardX inflect==0.2.5 Unidecode==1.0.22 pillow jupyter
 7 | 
 8 | ADD apex /apex/
 9 | WORKDIR /apex/
10 | RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
11 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from utils import s3_utils
 5 | from utils.python_utils import pip_install_requirements
 6 | 
 7 | 
 8 | def check_data_dir():
 9 |     current_dir = Path(os.path.dirname(os.path.realpath(__file__)))
10 |     tacotron2_data_dir = os.path.join(
11 |         current_dir.parent.parent, "data", ".data", "tacotron2-minimal"
12 |     )
13 |     assert os.path.exists(
14 |         tacotron2_data_dir
15 |     ), "Couldn't find tacotron2 minimal data dir, please run install.py again."
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     pip_install_requirements()
20 |     s3_utils.checkout_s3_data(
21 |         "INPUT_TARBALLS", "tacotron2-minimal.tar.gz", decompress=True
22 |     )
23 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/install.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | git submodule init; git submodule update
3 | pip install -r requirements.txt
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 128
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cuda
 9 | - device: cpu
10 | train_benchmark: false
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/multiproc.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | import time
 4 | 
 5 | import torch
 6 | 
 7 | argslist = list(sys.argv)[1:]
 8 | num_gpus = torch.cuda.device_count()
 9 | argslist.append("--n_gpus={}".format(num_gpus))
10 | workers = []
11 | job_id = time.strftime("%Y_%m_%d-%H%M%S")
12 | argslist.append("--group_name=group_{}".format(job_id))
13 | 
14 | for i in range(num_gpus):
15 |     argslist.append("--rank={}".format(i))
16 |     stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i), "w")
17 |     print(argslist)
18 |     p = subprocess.Popen([str(sys.executable)] + argslist, stdout=stdout)
19 |     workers.append(p)
20 |     argslist = argslist[:-1]
21 | 
22 | for p in workers:
23 |     p.wait()
24 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/origin:
--------------------------------------------------------------------------------
1 | origin	https://github.com/ailzhang/tacotron2.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | inflect
3 | scipy
4 | Unidecode
5 | pillow
6 | librosa==0.9.2
7 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/waveglow/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tacotron2"]
2 | 	path = tacotron2
3 | 	url = http://github.com/NVIDIA/tacotron2
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/waveglow/tacotron2/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:0.4_cuda9_cudnn7
2 | RUN pip install numpy scipy matplotlib librosa==0.6.0 tensorflow tensorboardX inflect==0.2.5 Unidecode==1.0.22 jupyter
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tacotron2/waveglow/tacotron2/multiproc.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | import time
 4 | 
 5 | import torch
 6 | 
 7 | argslist = list(sys.argv)[1:]
 8 | num_gpus = torch.cuda.device_count()
 9 | argslist.append("--n_gpus={}".format(num_gpus))
10 | workers = []
11 | job_id = time.strftime("%Y_%m_%d-%H%M%S")
12 | argslist.append("--group_name=group_{}".format(job_id))
13 | 
14 | for i in range(num_gpus):
15 |     argslist.append("--rank={}".format(i))
16 |     stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i), "w")
17 |     print(argslist)
18 |     p = subprocess.Popen([str(sys.executable)] + argslist, stdout=stdout)
19 |     workers.append(p)
20 |     argslist = argslist[:-1]
21 | 
22 | for p in workers:
23 |     p.wait()
24 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_efficientdet/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 128
 4 | eval_benchmark: false
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cuda
 9 | - device: cpu
10 | train_benchmark: true
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_efficientdet/requirements.txt:
--------------------------------------------------------------------------------
1 | pycocotools
2 | git+https://github.com/rwightman/efficientdet-pytorch.git@d43c9e34cd62d22b4205831bb735f6dd83b8e881
3 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_efficientnet/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel
 3 | 
 4 | 
 5 | class Model(TimmModel):
 6 |     task = COMPUTER_VISION.CLASSIFICATION
 7 | 
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 64
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             test=test,
14 |             model_name="efficientnet_b0",
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_efficientnet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_efficientnet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_efficientnet/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_nfnet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_nfnet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_nfnet/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 128
 4 | eval_benchmark: true
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cuda
 9 |   test: train
10 | train_benchmark: true
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_regnet/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel
 3 | 
 4 | 
 5 | class Model(TimmModel):
 6 |     task = COMPUTER_VISION.CLASSIFICATION
 7 | 
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             test=test,
14 |             model_name="regnety_120",
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_regnet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_regnet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_regnet/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 32
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_resnest/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel
 3 | 
 4 | 
 5 | class Model(TimmModel):
 6 |     task = COMPUTER_VISION.CLASSIFICATION
 7 | 
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             test=test,
14 |             model_name="resnest14d",
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_resnest/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_resnest/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_resnest/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 256
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vision_transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel
 3 | 
 4 | 
 5 | class Model(TimmModel):
 6 |     task = COMPUTER_VISION.GENERATION
 7 | 
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             test=test,
14 |             model_name="vit_small_patch16_224",
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vision_transformer/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_vision_transformer/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vision_transformer/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vision_transformer_large/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel
 3 | 
 4 | 
 5 | class Model(TimmModel):
 6 |     task = COMPUTER_VISION.GENERATION
 7 | 
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             test=test,
14 |             model_name="vit_giant_patch14_224",
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vision_transformer_large/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_vision_transformer_large/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vision_transformer_large/metadata.yaml:
--------------------------------------------------------------------------------
1 | eval_benchmark: true
2 | eval_deterministic: false
3 | eval_nograd: true
4 | train_benchmark: true
5 | train_deterministic: false
6 | not_implemented:
7 | - device: cuda
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vovnet/__init__.py:
--------------------------------------------------------------------------------
 1 | from torchbenchmark.tasks import COMPUTER_VISION
 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel
 3 | 
 4 | 
 5 | class Model(TimmModel):
 6 |     task = COMPUTER_VISION.DETECTION
 7 | 
 8 |     DEFAULT_TRAIN_BSIZE = 32
 9 |     DEFAULT_EVAL_BSIZE = 32
10 | 
11 |     def __init__(self, test, device, batch_size=None, extra_args=[]):
12 |         super().__init__(
13 |             test=test,
14 |             model_name="vovnet39a",
15 |             device=device,
16 |             batch_size=batch_size,
17 |             extra_args=extra_args,
18 |         )
19 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vovnet/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_vovnet/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/timm_vovnet/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 128
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/torch_multimodal_clip/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 1
 4 |     eval_train_size: 1
 5 | eval_benchmark: false
 6 | eval_deterministic: false
 7 | eval_nograd: true
 8 | train_benchmark: false
 9 | train_deterministic: false
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/torch_multimodal_clip/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/facebookresearch/multimodal.git
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tts_angular/install.py:
--------------------------------------------------------------------------------
1 | from utils.python_utils import pip_install_requirements
2 | 
3 | if __name__ == "__main__":
4 |     pip_install_requirements()
5 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tts_angular/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 512
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/tts_angular/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | phonemizer
 4 | unidecode
 5 | Pillow
 6 | flask
 7 | tqdm
 8 | inflect
 9 | bokeh
10 | pysbd
11 | soundfile
12 | nose
13 | cardboardlint
14 | pylint
15 | gdown
16 | pyyaml
17 | librosa
18 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/vgg16/install.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/vgg16/install.py


--------------------------------------------------------------------------------
/torchbenchmark/models/vgg16/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 8
4 | eval_benchmark: true
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: true
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/vision_maskrcnn/install.py:
--------------------------------------------------------------------------------
1 | from utils import s3_utils
2 | from utils.python_utils import pip_install_requirements
3 | 
4 | if __name__ == "__main__":
5 |     s3_utils.checkout_s3_data(
6 |         "INPUT_TARBALLS", "coco2017-minimal.tar.gz", decompress=True
7 |     )
8 |     pip_install_requirements()
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/vision_maskrcnn/metadata.yaml:
--------------------------------------------------------------------------------
1 | devices:
2 |   NVIDIA A100-SXM4-40GB:
3 |     eval_batch_size: 1
4 | eval_benchmark: false
5 | eval_deterministic: false
6 | eval_nograd: true
7 | train_benchmark: false
8 | train_deterministic: false
9 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/vision_maskrcnn/origin:
--------------------------------------------------------------------------------
1 | origin https://github.com/pytorch/vision/blob/main/torchvision/models/detection/mask_rcnn.py
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/vision_maskrcnn/requirements.txt:
--------------------------------------------------------------------------------
1 | pycocotools
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/check.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | import torch
4 | 
5 | a = torch.load(sys.argv[1])
6 | b = torch.load(sys.argv[2])
7 | torch.testing.assert_allclose(a, b, rtol=0.01, atol=0.01)
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/install.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from utils import s3_utils
 5 | from utils.python_utils import pip_install_requirements
 6 | 
 7 | 
 8 | def setup_data_dir():
 9 |     current_dir = Path(os.path.dirname(os.path.realpath(__file__)))
10 |     coco128_data_dir = os.path.join(
11 |         current_dir.parent.parent, "data", ".data", "coco128"
12 |     )
13 |     assert os.path.exists(
14 |         coco128_data_dir
15 |     ), "Couldn't find coco128 data dir, please run install.py again."
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     s3_utils.checkout_s3_data("INPUT_TARBALLS", "coco128.tar.gz", decompress=True)
20 |     pip_install_requirements()
21 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/install.sh:
--------------------------------------------------------------------------------
1 | python -m pip install -r requirements.txt
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/metadata.yaml:
--------------------------------------------------------------------------------
 1 | devices:
 2 |   NVIDIA A100-SXM4-40GB:
 3 |     eval_batch_size: 8
 4 | eval_benchmark: true
 5 | eval_deterministic: false
 6 | eval_nograd: true
 7 | not_implemented:
 8 | - device: cpu
 9 |   test: train
10 | train_benchmark: true
11 | train_deterministic: false
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/run.sh:
--------------------------------------------------------------------------------
1 | debug_arg=""
2 | if [ $# -gt 1 ]; then
3 |         if [ "$1" == "--debug" ]; then
4 |                 debug_arg="--debug $2"
5 |         fi
6 | fi
7 | python3.8 train.py --data coco128.data --img 416 --batch 8 --nosave --notest --epochs 10 --weights '' $debug_arg
8 | 


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/train_batch0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/yolov3/train_batch0.jpg


--------------------------------------------------------------------------------
/torchbenchmark/models/yolov3/yolo_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/yolov3/yolo_utils/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/util/__init__.py


--------------------------------------------------------------------------------
/torchbenchmark/util/distributed/requirements.txt:
--------------------------------------------------------------------------------
1 | deepspeed
2 | evaluate
3 | datasets
4 | scikit-learn
5 | tensorboard
6 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/detectron2/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def parse_tb_args(args):
 5 |     parser = argparse.ArgumentParser()
 6 |     # default resolution: 800x1333
 7 |     parser.add_argument(
 8 |         "--resize",
 9 |         choices=["default", "448x608"],
10 |         default="default",
11 |         help="Resize the image to specified size",
12 |     )
13 |     args, unknown_args = parser.parse_known_args(args)
14 |     return args, unknown_args
15 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/detectron2/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/facebookresearch/detectron2.git@0df2d73d0013db7de629602c23cc120219b4f2b8
2 | omegaconf==2.3.0
3 | numpy
4 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/diffusers/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from utils.python_utils import pip_install_requirements
 5 | 
 6 | CURRENT_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
 7 | 
 8 | 
 9 | def install_diffusers():
10 |     requirements_file = os.path.join(CURRENT_DIR, "requirements.txt")
11 |     pip_install_requirements(requirements_txt=requirements_file)
12 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/diffusers/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.30.3
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/gnn/__init__.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | from utils.python_utils import pip_install_requirements
 4 | 
 5 | CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
 6 | 
 7 | 
 8 | def install_pytorch_geometric():
 9 |     pip_install_requirements(os.path.join(CURRENT_DIR, "requirements.txt"))
10 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/gnn/args.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/util/framework/gnn/args.py


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/gnn/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def parse_tb_args(args):
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument(
 7 |         "--graph_type",
 8 |         choices=["dense", "sparse"],
 9 |         default="dense",
10 |         help="Determine dense graph or sparse graph",
11 |     )
12 |     args, unknown_args = parser.parse_known_args(args)
13 |     return args, unknown_args
14 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/framework/gnn/requirements.txt:
--------------------------------------------------------------------------------
1 | torch_geometric @ git+https://github.com/pyg-team/pytorch_geometric.git@cabcd4097442ba60aa1efa11e1619dd9bb8fb527
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/hardware/__init__.py:
--------------------------------------------------------------------------------
1 | from .roofline import HW_ROOFLINE_SPECS
2 | 


--------------------------------------------------------------------------------
/torchbenchmark/util/hardware/roofline.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | # NVIDIA A100 GPU Spec:
 4 | # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
 5 | NV_A100 = {
 6 |     "fp32": 19.5,
 7 |     "tf32": 156,
 8 |     "bf16": 312,
 9 |     "fp16": 312,
10 | }
11 | 
12 | # NVIDIA H100 GPU Datasheet:
13 | # https://nvdam.widen.net/content/vuzumiozpb/original/h100-datasheet-2287922.pdf
14 | NV_H100 = {
15 |     "fp32": 51,
16 |     "tf32": 756,
17 |     "bf16": 1513,
18 |     "fp16": 1513,
19 |     "fp8": 3026,
20 | }
21 | 
22 | 
23 | HW_ROOFLINE_SPECS: Dict[str, Dict[str, float]] = {
24 |     "NVIDIA A100-SXM4-40GB": NV_A100,
25 |     "NVIDIA A100-PG509-200": NV_A100,
26 |     "NVIDIA H100": NV_H100,
27 | }
28 | 


--------------------------------------------------------------------------------
/userbenchmark/api-coverage/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/userbenchmark/cpu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/cpu/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/cpu/cpu_test.yaml:
--------------------------------------------------------------------------------
1 | test: eval
2 | model: resnet50,mobilenet_v2
3 | extra_args: --backend torchscript --fuser fuser3
4 | 


--------------------------------------------------------------------------------
/userbenchmark/cuda-compare/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/cuda-compare/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/ddp_experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/ddp_experiments/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/distributed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/distributed/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/distributed/ci.yaml:
--------------------------------------------------------------------------------
1 | platform: "ai_cluster"
2 | schedule:   "nightly"
3 | 


--------------------------------------------------------------------------------
/userbenchmark/distributed/install.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | 
 5 | def pip_install_requirements():
 6 |     subprocess.check_call(
 7 |         [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"]
 8 |     )
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     pip_install_requirements()
13 | 


--------------------------------------------------------------------------------
/userbenchmark/dynamo/__init__.py:
--------------------------------------------------------------------------------
1 | BM_NAME = "dynamo"
2 | 


--------------------------------------------------------------------------------
/userbenchmark/dynamo/dynamobench/torchbench_models_list.txt:
--------------------------------------------------------------------------------
 1 | BERT_pytorch,128
 2 | Background_Matting, 16
 3 | LearningToPaint,1024
 4 | alexnet,1024
 5 | dcgan,1024
 6 | densenet121,64
 7 | hf_Albert,32
 8 | hf_Bart,16
 9 | hf_Bert,16
10 | hf_GPT2,16
11 | hf_T5,4
12 | mnasnet1_0,256
13 | mobilenet_v2,128
14 | mobilenet_v3_large,256
15 | nvidia_deeprecommender,1024
16 | pytorch_unet,8
17 | resnet18,512
18 | resnet50,128
19 | resnext50_32x4d,128
20 | shufflenet_v2_x1_0,512
21 | squeezenet1_1,512
22 | timm_nfnet,256
23 | timm_efficientnet,128
24 | timm_regnet,128
25 | timm_resnest,256
26 | timm_vision_transformer,256
27 | timm_vovnet,128
28 | vgg16,128
29 | 


--------------------------------------------------------------------------------
/userbenchmark/functorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/functorch/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/functorch/ci.yaml:
--------------------------------------------------------------------------------
1 | platform: "aws_t4_metal"
2 | schedule: "nightly"
3 | 


--------------------------------------------------------------------------------
/userbenchmark/group_bench/__init__.py:
--------------------------------------------------------------------------------
1 | BM_NAME = "group_bench"
2 | 


--------------------------------------------------------------------------------
/userbenchmark/group_bench/configs/torch_ao.yaml:
--------------------------------------------------------------------------------
 1 | model: "*"
 2 | extended_models:
 3 |   - huggingface
 4 |   - timm
 5 | test: eval
 6 | device: cuda
 7 | extra_args: --precision bf16 --torchdynamo inductor --inductor-compile-mode max-autotune
 8 | metrics:
 9 |   - latencies
10 | test_group:
11 |   test_batch_size_default:
12 |     subgroup:
13 |       - extra_args:
14 |       - extra_args: --quantization int8dynamic
15 |       - extra_args: --quantization int8weightonly
16 |       - extra_args: --quantization int4weightonly
17 | 


--------------------------------------------------------------------------------
/userbenchmark/instruction-count/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/instruction-count/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/model-stableness/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/model-stableness/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/nvfuser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/nvfuser/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/optim/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/optim/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/release-test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/release-test/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/1.12.1.yaml:
--------------------------------------------------------------------------------
 1 | cuda:
 2 |   - version: 11.3
 3 |     magma_version: magma-cuda113
 4 |   - version: 11.6
 5 |     magma_version: magma-cuda116
 6 | pytorch:
 7 |   - version: 1.12.1
 8 |     conda_channel: pytorch-test
 9 |   - version: 1.12.0
10 |     conda_channel: pytorch
11 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/1.13.0.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 11.6
3 |     magma_version: magma-cuda116
4 | pytorch:
5 |   - version: 1.13.0
6 |     conda_channel: pytorch-test
7 |   - version: 1.12.1
8 |     conda_channel: pytorch
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.0.1.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 11.7
3 |     magma_version: magma-cuda117
4 | pytorch:
5 |   - version: 2.1.0
6 |     conda_channel: pytorch-test
7 |   - version: 2.0.1
8 |     conda_channel: pytorch
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.1.0.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 11.8
3 |     magma_version: magma-cuda118
4 | pytorch:
5 |   - version: 2.1.0
6 |     conda_channel: pytorch-test
7 |   - version: 2.0.1
8 |     conda_channel: pytorch
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.1.1.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 12.1
3 |     magma_version: magma-cuda121
4 | pytorch:
5 |   - version: 2.1.1
6 |     conda_channel: pytorch
7 |   - version: 2.1.0
8 |     conda_channel: pytorch
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.1.2.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 12.1
3 |     magma_version: magma-cuda121
4 | pytorch:
5 |   - version: 2.1.2
6 |     conda_channel: pytorch-test
7 |   - version: 2.1.1
8 |     conda_channel: pytorch
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.5.0.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 12.4
3 |     magma_version: magma-cuda124
4 | pytorch:
5 |   - version: 2.4.1
6 |     conda_channel: pytorch
7 |   - version: 2.5.0
8 |     conda_channel: pytorch-test
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.5.1.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 12.4
3 |     magma_version: magma-cuda124
4 | pytorch:
5 |   - version: 2.5.0
6 |     conda_channel: pytorch
7 |   - version: 2.5.1
8 |     conda_channel: pytorch-test
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/configs/2.6.0.yaml:
--------------------------------------------------------------------------------
1 | cuda:
2 |   - version: 12.4
3 |     magma_version: magma-cuda124
4 | pytorch:
5 |   - version: 2.5.1
6 |     conda_channel: pytorch
7 |   - version: 2.6.0
8 |     conda_channel: pytorch-test
9 | 


--------------------------------------------------------------------------------
/userbenchmark/release-test/version.txt:
--------------------------------------------------------------------------------
1 | 2.6.0
2 | 


--------------------------------------------------------------------------------
/userbenchmark/rocm-test/__init__.py:
--------------------------------------------------------------------------------
1 | BM_NAME = "rocm-test"
2 | 


--------------------------------------------------------------------------------
/userbenchmark/test-user-invoke/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/test-user-invoke/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/test_bench/__init__.py:
--------------------------------------------------------------------------------
1 | BM_NAME = "test_bench"
2 | 


--------------------------------------------------------------------------------
/userbenchmark/torch-nightly/__init__.py:
--------------------------------------------------------------------------------
1 | BM_NAME = "torch-nightly"
2 | 


--------------------------------------------------------------------------------
/userbenchmark/torch_trt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/torch_trt/__init__.py


--------------------------------------------------------------------------------
/userbenchmark/torch_trt/ci.yaml:
--------------------------------------------------------------------------------
1 | platform: "gcp_a100"
2 | schedule: "nightly"
3 | 


--------------------------------------------------------------------------------
/userbenchmark/torchao/__init__.py:
--------------------------------------------------------------------------------
1 | BM_NAME = "torchao"
2 | 


--------------------------------------------------------------------------------
/utils/build_requirements.txt:
--------------------------------------------------------------------------------
1 | # We need to pin numpy version to the same as the torch testing environment
2 | # which still supports python 3.8
3 | numpy==1.21.2; python_version < '3.11'
4 | numpy==1.26.0; python_version >= '3.11'
5 | psutil
6 | pyyaml


--------------------------------------------------------------------------------