├── .ci └── torchbench │ ├── check-ssh.sh │ ├── install-conda.sh │ ├── install.sh │ └── test.sh ├── .clang-format ├── .flake8 ├── .github ├── scripts │ ├── abtest.py │ ├── bisection-config.sample.yaml │ ├── bmutils │ │ ├── __init__.py │ │ ├── analyze-bisection-result.py │ │ └── summarize.py │ ├── generate-abtest-config.py │ ├── run-bisection.sh │ ├── run-config.py │ ├── run.sh │ ├── test-repeated-runs.py │ └── userbenchmark │ │ ├── __init__.py │ │ ├── aicluster.py │ │ └── schedule-benchmarks.py └── workflows │ ├── _linux-benchmark-cuda.yml │ ├── _linux-test-cpu.yml │ ├── _linux-test-cuda.yml │ ├── bisection.md │ ├── build-gcp-docker.yml │ ├── build-nightly-docker.yml │ ├── clean-nightly-docker.yml │ ├── linux-test-a10g.yml │ ├── pr-test.yml │ ├── torchao.yml │ ├── userbenchmark-a100-bisection.yml │ ├── userbenchmark-a100-release.yml │ ├── userbenchmark-a100.yml │ ├── userbenchmark-ai-cluster.yml │ ├── userbenchmark-c5-24xlarge.yml │ ├── userbenchmark-ibmcloud-testrunner.yml │ ├── userbenchmark-regression-detector.yml │ ├── userbenchmark-t4-metal.yml │ ├── v2-bisection.yml │ ├── v2-nightly.yml │ ├── v3-bisection.yml │ └── v3-nightly.yml ├── .gitignore ├── .gitmodules ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bisection.py ├── conftest.py ├── docker ├── build-gcp-a100-docker.sh ├── build-torchbench-nightly-docker.sh ├── gcp-a100-runner-dind.dockerfile ├── infra │ ├── README.md │ ├── daemonset.yaml │ └── values.yaml └── torchbench-nightly.dockerfile ├── gen_summary_metadata.py ├── install.py ├── pyproject.toml ├── regression_detector.py ├── requirements.txt ├── run.py ├── run_benchmark.py ├── run_e2e.py ├── scripts ├── activate_conda.sh ├── install_conda.sh ├── proper_bs.py ├── update_device_batch_size.py ├── upload_scribe.py ├── upload_scribe_v2.py └── userbenchmark │ ├── upload_s3.py │ ├── upload_s3_csv.py │ └── upload_scribe.py ├── setup.py ├── test.py ├── test_bench.py ├── test_imports.py ├── torchbenchmark ├── __init__.py ├── _components │ ├── __init__.py │ ├── _impl │ │ ├── __init__.py │ │ ├── tasks │ │ │ ├── __init__.py │ │ │ └── base.py │ │ └── workers │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── in_process_worker.py │ │ │ ├── subprocess_rpc.py │ │ │ └── subprocess_worker.py │ ├── kineto │ │ ├── __init__.py │ │ └── trace.py │ ├── model_analyzer │ │ ├── TorchBenchAnalyzer.py │ │ ├── __init__.py │ │ ├── dcgm │ │ │ ├── __init__.py │ │ │ ├── cpu_monitor.py │ │ │ ├── dcgm_agent.py │ │ │ ├── dcgm_field_helpers.py │ │ │ ├── dcgm_fields.py │ │ │ ├── dcgm_fields_internal.py │ │ │ ├── dcgm_monitor.py │ │ │ ├── dcgm_structs.py │ │ │ ├── dcgm_value.py │ │ │ ├── monitor.py │ │ │ └── nvml_monitor.py │ │ ├── readme.md │ │ ├── requirements.txt │ │ ├── sync_upstream.md │ │ ├── tb_dcgm_types │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── cpu_peak_memory.py │ │ │ ├── cpu_record.py │ │ │ ├── da_exceptions.py │ │ │ ├── gpu_device.py │ │ │ ├── gpu_device_factory.py │ │ │ ├── gpu_dram_active.py │ │ │ ├── gpu_fp32active.py │ │ │ ├── gpu_free_memory.py │ │ │ ├── gpu_pcie_rx.py │ │ │ ├── gpu_pcie_tx.py │ │ │ ├── gpu_peak_memory.py │ │ │ ├── gpu_power_usage.py │ │ │ ├── gpu_record.py │ │ │ ├── gpu_tensoractive.py │ │ │ ├── gpu_utilization.py │ │ │ ├── record.py │ │ │ ├── record_aggregator.py │ │ │ └── tb_logger.py │ │ └── test.py │ ├── ncu │ │ ├── __init__.py │ │ └── analyzer.py │ └── test │ │ ├── __init__.py │ │ ├── test_subprocess.py │ │ └── test_worker.py ├── canary_models │ ├── DALLE2_pytorch │ │ ├── __init__.py │ │ ├── dalle2_pytorch.patch │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ └── requirements.txt │ ├── __init__.py │ ├── codellama │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── diffuser_instruct_pix2pix │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── fambench_dlrm │ │ ├── __init__.py │ │ ├── args.py │ │ ├── config.py │ │ ├── data.py │ │ ├── dlrmnet.py │ │ ├── fbgemm_embedding.py │ │ ├── install.py │ │ ├── lrscheduler.py │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── requirements.txt │ │ └── utils.py │ ├── fambench_xlmr │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── gat │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── gcn │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_GPT2_generate │ │ ├── __init__.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_MPT_7b_instruct │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Yi │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_mixtral │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── lit_llama │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── lit_llama_generate │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── lit_llama_lora │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── llama_v2_13b │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── llama_v2_70b │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── llama_v2_7b │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── llama_v31_8b │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── mistral_7b_instruct │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── orca_2 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── phi_1_5 │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── phi_2 │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── sage │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── stable_diffusion_xl │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ └── torchrec_dlrm │ │ ├── __init__.py │ │ ├── args.py │ │ ├── data │ │ └── dlrm_dataloader.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ └── requirements.txt ├── data │ ├── README.md │ └── index.yaml ├── e2e.py ├── e2e_models │ ├── __init__.py │ ├── fambench_xlmr │ │ ├── __init__.py │ │ ├── install.py │ │ └── requirements.txt │ ├── hf_bert │ │ ├── __init__.py │ │ ├── install.py │ │ ├── requirements.txt │ │ └── trainer.py │ ├── hf_t5 │ │ ├── __init__.py │ │ ├── install.py │ │ └── requirements.txt │ └── vision_resnet50 │ │ ├── __init__.py │ │ └── resnet.py ├── models │ ├── ADDING_MODELS.md │ ├── BERT_pytorch │ │ ├── .circleci │ │ │ └── config.yml │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── Makefile │ │ ├── README.md │ │ ├── __init__.py │ │ ├── bert_pytorch │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── dataset │ │ │ │ ├── __init__.py │ │ │ │ ├── dataset.py │ │ │ │ └── vocab.py │ │ │ ├── model │ │ │ │ ├── __init__.py │ │ │ │ ├── attention │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── multi_head.py │ │ │ │ │ └── single.py │ │ │ │ ├── bert.py │ │ │ │ ├── embedding │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bert.py │ │ │ │ │ ├── position.py │ │ │ │ │ ├── segment.py │ │ │ │ │ └── token.py │ │ │ │ ├── language_model.py │ │ │ │ ├── transformer.py │ │ │ │ └── utils │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── feed_forward.py │ │ │ │ │ ├── layer_norm.py │ │ │ │ │ ├── sublayer.py │ │ │ │ │ └── tensor2tensor.py │ │ │ └── trainer │ │ │ │ ├── __init__.py │ │ │ │ ├── optim_schedule.py │ │ │ │ └── pretrain.py │ │ ├── install.py │ │ ├── install.sh │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── setup.py │ │ └── test.py │ ├── Background_Matting │ │ ├── .gitignore │ │ ├── Data_adobe │ │ │ ├── compose.py │ │ │ ├── prepare.sh │ │ │ ├── test_data_list.txt │ │ │ └── train_data_list.txt │ │ ├── README.md │ │ ├── __init__.py │ │ ├── data_loader.py │ │ ├── functions.py │ │ ├── install.py │ │ ├── install.sh │ │ ├── loss_functions.py │ │ ├── metadata.yaml │ │ ├── networks.py │ │ ├── origin │ │ ├── prepare_real.py │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── test_background-matting_image.py │ │ ├── test_pre_process.m │ │ ├── test_pre_process.py │ │ ├── test_pre_process_video.m │ │ ├── test_pre_process_video.py │ │ ├── test_segmentation_deeplab.py │ │ ├── train_adobe.py │ │ └── train_real_fixed.py │ ├── LearningToPaint │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── LearningToPaint.ipynb │ │ ├── README.md │ │ ├── __init__.py │ │ ├── baseline │ │ │ ├── DRL │ │ │ │ ├── actor.py │ │ │ │ ├── critic.py │ │ │ │ ├── ddpg.py │ │ │ │ ├── evaluator.py │ │ │ │ ├── multi.py │ │ │ │ ├── rpm.py │ │ │ │ └── wgan.py │ │ │ ├── Renderer │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ └── stroke_gen.py │ │ │ ├── env.py │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ ├── train_renderer.py │ │ │ └── utils │ │ │ │ ├── tensorboard.py │ │ │ │ └── util.py │ │ ├── baseline_modelfree │ │ │ ├── DRL │ │ │ │ ├── actor.py │ │ │ │ ├── critic.py │ │ │ │ ├── ddpg.py │ │ │ │ ├── evaluator.py │ │ │ │ ├── multi.py │ │ │ │ ├── rpm.py │ │ │ │ └── wgan.py │ │ │ ├── Renderer │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ └── stroke_gen.py │ │ │ ├── env.py │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ ├── train_renderer.py │ │ │ └── utils │ │ │ │ ├── tensorboard.py │ │ │ │ └── util.py │ │ ├── install.py │ │ ├── install.sh │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── requirements.txt │ │ └── run.sh │ ├── Super_SloMo │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── dataloader.py │ │ ├── eval.py │ │ ├── install.py │ │ ├── install.sh │ │ ├── metadata.yaml │ │ ├── model_wrapper.py │ │ ├── origin │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── slomo_model.py │ │ ├── train.ipynb │ │ ├── train.py │ │ └── video_to_slomo.py │ ├── __init__.py │ ├── alexnet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── basic_gnn_edgecnn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── basic_gnn_gcn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── basic_gnn_gin │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── basic_gnn_sage │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── cm3leon_generate │ │ ├── __init__.py │ │ ├── metadata.yaml │ │ └── model.py │ ├── dcgan │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── demucs │ │ ├── .gitignore │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── check.py │ │ ├── demucs │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── audio.py │ │ │ ├── augment.py │ │ │ ├── compressed.py │ │ │ ├── model.py │ │ │ ├── parser.py │ │ │ ├── raw.py │ │ │ ├── separate.py │ │ │ ├── tasnet.py │ │ │ ├── test.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── dora.py │ │ ├── environment-cpu.yml │ │ ├── environment-cuda.yml │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── quantize.py │ │ ├── requirements.txt │ │ ├── result_table.py │ │ ├── run.py │ │ ├── run.sh │ │ ├── run_overall.sh │ │ ├── run_slurm.py │ │ ├── setup.cfg │ │ └── valid_table.py │ ├── densenet121 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fasterrcnn_r_101_c4 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fasterrcnn_r_101_dc5 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fasterrcnn_r_101_fpn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fasterrcnn_r_50_c4 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fasterrcnn_r_50_dc5 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fasterrcnn_r_50_fpn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_fcos_r_50_fpn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_maskrcnn │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── origin │ ├── detectron2_maskrcnn_r_101_c4 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_maskrcnn_r_101_fpn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_maskrcnn_r_50_c4 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── detectron2_maskrcnn_r_50_fpn │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── dlrm │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── bench │ │ │ ├── dlrm_s_benchmark.sh │ │ │ ├── dlrm_s_criteo_kaggle.sh │ │ │ ├── dlrm_s_criteo_terabyte.sh │ │ │ └── run_and_time.sh │ │ ├── cython │ │ │ ├── cython_compile.py │ │ │ └── cython_criteo.py │ │ ├── data_loader_terabyte.py │ │ ├── data_utils.py │ │ ├── dlrm_data_caffe2.py │ │ ├── dlrm_data_pytorch.py │ │ ├── dlrm_s_caffe2.py │ │ ├── dlrm_s_pytorch.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── requirements.txt │ │ ├── test │ │ │ └── dlrm_s_test.sh │ │ ├── tools │ │ │ └── visualize.py │ │ └── tricks │ │ │ ├── md_embedding_bag.py │ │ │ └── qr_embedding_bag.py │ ├── doctr_det_predictor │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── doctr_reco_predictor │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── drq │ │ ├── __init__.py │ │ ├── config.py │ │ ├── drq.py │ │ ├── drqutils.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── replay_buffer.py │ │ └── requirements.txt │ ├── fastNLP_Bert │ │ ├── __init__.py │ │ ├── bert_config.json │ │ ├── cmrc2018_simulator.py │ │ ├── fastnlp.patch │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── functorch_dp_cifar10 │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── functorch_maml_omniglot │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Albert │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Bart │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Bert │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Bert_large │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_BigBird │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_DistilBert │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_GPT2 │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_GPT2_large │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Longformer │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Reformer │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Roberta_base │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_T5 │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_T5_base │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_T5_generate │ │ ├── __init__.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_T5_large │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── hf_Whisper │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── hf_clip │ │ ├── __init__.py │ │ └── metadata.yaml │ ├── hf_distil_whisper │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── lennard_jones │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── llama │ │ ├── __init__.py │ │ ├── generation.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ ├── origin │ │ ├── requirements.txt │ │ └── tokenizer.py │ ├── llama_v2_7b_16h │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── llava │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── maml │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── install.py │ │ ├── learner.py │ │ ├── meta.py │ │ ├── metadata.yaml │ │ └── origin │ ├── maml_omniglot │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── microbench_unbacked_tolist_sum │ │ ├── __init__.py │ │ └── metadata.yaml │ ├── mnasnet1_0 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── mobilenet_v2 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── mobilenet_v2_quantized_qat │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── mobilenet_v3_large │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── moco │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── detection │ │ │ ├── README.md │ │ │ ├── configs │ │ │ │ ├── Base-RCNN-C4-BN.yaml │ │ │ │ ├── coco_R_50_C4_2x.yaml │ │ │ │ ├── coco_R_50_C4_2x_moco.yaml │ │ │ │ ├── pascal_voc_R_50_C4_24k.yaml │ │ │ │ └── pascal_voc_R_50_C4_24k_moco.yaml │ │ │ ├── convert-pretrain-to-detectron2.py │ │ │ └── train_net.py │ │ ├── install.py │ │ ├── main_lincls.py │ │ ├── main_moco.py │ │ ├── metadata.yaml │ │ ├── moco │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── loader.py │ │ ├── origin │ │ ├── requirements.txt │ │ └── run.sh │ ├── moondream │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── nanogpt │ │ ├── __init__.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ └── origin │ ├── nvidia_deeprecommender │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── compute_RMSE.py │ │ ├── data_utils │ │ │ ├── movie_lense_data_converter.py │ │ │ └── netflix_data_convert.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── nvinfer.py │ │ ├── nvtrain.py │ │ ├── origin │ │ ├── reco_encoder │ │ │ ├── __init__.py │ │ │ ├── data │ │ │ │ ├── __init__.py │ │ │ │ └── input_layer.py │ │ │ └── model │ │ │ │ ├── __init__.py │ │ │ │ └── model.py │ │ └── requirements.txt │ ├── opacus_cifar10 │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── phlippe_densenet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── phlippe_resnet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── pyhpc_equation_of_state │ │ ├── __init__.py │ │ ├── eos_pytorch.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── origin │ ├── pyhpc_isoneutral_mixing │ │ ├── __init__.py │ │ ├── install.py │ │ ├── isoneutral_pytorch.py │ │ ├── metadata.yaml │ │ └── origin │ ├── pyhpc_turbulent_kinetic_energy │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ └── tke_pytorch.py │ ├── pytorch_CycleGAN_and_pix2pix │ │ ├── .gitignore │ │ ├── CycleGAN.ipynb │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── aligned_dataset.py │ │ │ ├── base_dataset.py │ │ │ ├── colorization_dataset.py │ │ │ ├── image_folder.py │ │ │ ├── single_dataset.py │ │ │ ├── template_dataset.py │ │ │ └── unaligned_dataset.py │ │ ├── docs │ │ │ ├── Dockerfile │ │ │ ├── README_es.md │ │ │ ├── datasets.md │ │ │ ├── docker.md │ │ │ ├── overview.md │ │ │ ├── qa.md │ │ │ └── tips.md │ │ ├── environment.yml │ │ ├── example_input.pt │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── base_model.py │ │ │ ├── colorization_model.py │ │ │ ├── cycle_gan_model.py │ │ │ ├── networks.py │ │ │ ├── pix2pix_model.py │ │ │ ├── template_model.py │ │ │ └── test_model.py │ │ ├── options │ │ │ ├── __init__.py │ │ │ ├── base_options.py │ │ │ ├── test_options.py │ │ │ └── train_options.py │ │ ├── origin │ │ ├── pix2pix.ipynb │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── scripts │ │ │ ├── conda_deps.sh │ │ │ ├── download_cyclegan_model.sh │ │ │ ├── download_pix2pix_model.sh │ │ │ ├── edges │ │ │ │ ├── PostprocessHED.m │ │ │ │ └── batch_hed.py │ │ │ ├── eval_cityscapes │ │ │ │ ├── caffemodel │ │ │ │ │ └── deploy.prototxt │ │ │ │ ├── cityscapes.py │ │ │ │ ├── download_fcn8s.sh │ │ │ │ ├── evaluate.py │ │ │ │ └── util.py │ │ │ ├── install_deps.sh │ │ │ ├── test_before_push.py │ │ │ ├── test_colorization.sh │ │ │ ├── test_cyclegan.sh │ │ │ ├── test_pix2pix.sh │ │ │ ├── test_single.sh │ │ │ ├── train_colorization.sh │ │ │ ├── train_cyclegan.sh │ │ │ └── train_pix2pix.sh │ │ ├── test_cyclegan.py │ │ ├── train_cyclegan.py │ │ └── util │ │ │ ├── __init__.py │ │ │ ├── get_data.py │ │ │ ├── html.py │ │ │ ├── image_pool.py │ │ │ ├── util.py │ │ │ └── visualizer.py │ ├── pytorch_stargan │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── data_loader.py │ │ ├── download.sh │ │ ├── install.py │ │ ├── logger.py │ │ ├── main.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ ├── requirements.txt │ │ ├── run.sh │ │ └── solver.py │ ├── pytorch_unet │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ └── pytorch_unet │ │ │ ├── Dockerfile │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── evaluate.py │ │ │ ├── hubconf.py │ │ │ ├── predict.py │ │ │ ├── requirements.txt │ │ │ ├── scripts │ │ │ └── download_data.sh │ │ │ ├── train.py │ │ │ ├── unet │ │ │ ├── __init__.py │ │ │ ├── unet_model.py │ │ │ └── unet_parts.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data_loading.py │ │ │ ├── dice_score.py │ │ │ └── utils.py │ ├── resnet152 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── resnet18 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── resnet50 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── resnet50_quantized_qat │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── resnext50_32x4d │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── sam │ │ ├── __init__.py │ │ ├── build_sam.py │ │ ├── common.py │ │ ├── image_encoder.py │ │ ├── install.py │ │ ├── mask_decoder.py │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── predictor.py │ │ ├── prompt_encoder.py │ │ ├── requirements.txt │ │ ├── sam.py │ │ ├── transformer.py │ │ └── transforms.py │ ├── sam_fast │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── shufflenet_v2_x1_0 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── simple_gpt │ │ ├── __init__.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ └── origin │ ├── simple_gpt_tp_manual │ │ ├── __init__.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ ├── origin │ │ └── tp.py │ ├── soft_actor_critic │ │ ├── __init__.py │ │ ├── config.py │ │ ├── envs.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── nets.py │ │ ├── replay.py │ │ ├── requirements.txt │ │ ├── sac.py │ │ └── sac_utils.py │ ├── speech_transformer │ │ ├── __init__.py │ │ ├── config.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ ├── requirements.txt │ │ └── speech_transformer │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ └── data.py │ │ │ ├── transformer │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── decoder.py │ │ │ ├── encoder.py │ │ │ ├── loss.py │ │ │ ├── module.py │ │ │ ├── optimizer.py │ │ │ └── transformer.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── data2json.sh │ │ │ ├── dump.sh │ │ │ ├── filt.py │ │ │ ├── json2trn.py │ │ │ ├── mergejson.py │ │ │ ├── scp2json.py │ │ │ └── utils.py │ ├── squeezenet1_1 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── stable_diffusion_text_encoder │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── stable_diffusion_unet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── tacotron2 │ │ ├── .gitmodules │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── audio_processing.py │ │ ├── data_utils.py │ │ ├── distributed.py │ │ ├── hparams.py │ │ ├── inference.ipynb │ │ ├── install.py │ │ ├── install.sh │ │ ├── layers.py │ │ ├── logger.py │ │ ├── loss_function.py │ │ ├── loss_scaler.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ ├── multiproc.py │ │ ├── origin │ │ ├── plotting_utils.py │ │ ├── requirements.txt │ │ ├── stft.py │ │ ├── tacotron2_utils.py │ │ ├── text │ │ │ ├── LICENSE │ │ │ ├── __init__.py │ │ │ ├── cleaners.py │ │ │ ├── cmudict.py │ │ │ ├── numbers.py │ │ │ └── symbols.py │ │ ├── train_tacotron2.py │ │ └── waveglow │ │ │ ├── .gitmodules │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── config.json │ │ │ ├── convert_model.py │ │ │ ├── denoiser.py │ │ │ ├── distributed.py │ │ │ ├── glow.py │ │ │ ├── glow_old.py │ │ │ ├── inference.py │ │ │ ├── mel2samp.py │ │ │ ├── tacotron2 │ │ │ ├── Dockerfile │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── audio_processing.py │ │ │ ├── data_utils.py │ │ │ ├── distributed.py │ │ │ ├── filelists │ │ │ │ ├── ljs_audio_text_test_filelist.txt │ │ │ │ └── ljs_audio_text_val_filelist.txt │ │ │ ├── fp16_optimizer.py │ │ │ ├── hparams.py │ │ │ ├── layers.py │ │ │ ├── logger.py │ │ │ ├── loss_function.py │ │ │ ├── loss_scaler.py │ │ │ ├── model.py │ │ │ ├── multiproc.py │ │ │ ├── plotting_utils.py │ │ │ ├── stft.py │ │ │ ├── text │ │ │ │ ├── LICENSE │ │ │ │ ├── __init__.py │ │ │ │ ├── cleaners.py │ │ │ │ ├── cmudict.py │ │ │ │ ├── numbers.py │ │ │ │ └── symbols.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ │ └── train.py │ ├── timm_efficientdet │ │ ├── __init__.py │ │ ├── args.py │ │ ├── effdet.patch │ │ ├── install.py │ │ ├── loader.py │ │ ├── metadata.yaml │ │ ├── pycocotools.patch │ │ ├── requirements.txt │ │ └── train.py │ ├── timm_efficientnet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── timm_nfnet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── timm_regnet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── timm_resnest │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── timm_vision_transformer │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── timm_vision_transformer_large │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── timm_vovnet │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── torch_multimodal_clip │ │ ├── __init__.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ └── requirements.txt │ ├── tts_angular │ │ ├── __init__.py │ │ ├── angular_tts_main.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── model.py │ │ └── requirements.txt │ ├── vgg16 │ │ ├── __init__.py │ │ ├── install.py │ │ └── metadata.yaml │ ├── vision_maskrcnn │ │ ├── __init__.py │ │ ├── coco_utils.py │ │ ├── install.py │ │ ├── metadata.yaml │ │ ├── origin │ │ └── requirements.txt │ └── yolov3 │ │ ├── .dockerignore │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── cfg │ │ ├── cd53s-yolov3.cfg │ │ ├── cd53s.cfg │ │ ├── csresnext50-panet-spp.cfg │ │ ├── yolov3-1cls.cfg │ │ ├── yolov3-asff.cfg │ │ ├── yolov3-spp-1cls.cfg │ │ ├── yolov3-spp-3cls.cfg │ │ ├── yolov3-spp-matrix.cfg │ │ ├── yolov3-spp-pan-scale.cfg │ │ ├── yolov3-spp.cfg │ │ ├── yolov3-spp3.cfg │ │ ├── yolov3-tiny-1cls.cfg │ │ ├── yolov3-tiny-3cls.cfg │ │ ├── yolov3-tiny.cfg │ │ ├── yolov3-tiny3-1cls.cfg │ │ ├── yolov3-tiny3.cfg │ │ ├── yolov3.cfg │ │ ├── yolov4-relu.cfg │ │ ├── yolov4-tiny.cfg │ │ └── yolov4.cfg │ │ ├── check.py │ │ ├── detect.py │ │ ├── install.py │ │ ├── install.sh │ │ ├── metadata.yaml │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── test.py │ │ ├── train_batch0.jpg │ │ ├── weights │ │ └── download_yolov3_weights.sh │ │ ├── yolo_models.py │ │ ├── yolo_train.py │ │ └── yolo_utils │ │ ├── __init__.py │ │ ├── adabound.py │ │ ├── datasets.py │ │ ├── evolve.sh │ │ ├── gcp.sh │ │ ├── google_utils.py │ │ ├── layers.py │ │ ├── parse_config.py │ │ ├── torch_utils.py │ │ └── utils.py ├── tasks.py └── util │ ├── __init__.py │ ├── backends │ ├── __init__.py │ ├── ait.py │ ├── cudagraph.py │ ├── jit.py │ ├── torchdynamo.py │ └── trt.py │ ├── classify_graphs.py │ ├── distributed │ ├── README.md │ ├── core_model │ │ ├── apply_trainer.py │ │ └── trainer.py │ ├── requirements.txt │ ├── submit.py │ └── trainer.py │ ├── distribution.py │ ├── e2emodel.py │ ├── env_check.py │ ├── experiment │ ├── instantiator.py │ └── metrics.py │ ├── extra_args.py │ ├── framework │ ├── detectron2 │ │ ├── __init__.py │ │ ├── config.py │ │ ├── model_factory.py │ │ └── requirements.txt │ ├── diffusers │ │ ├── __init__.py │ │ ├── model_factory.py │ │ └── requirements.txt │ ├── gnn │ │ ├── __init__.py │ │ ├── args.py │ │ ├── config.py │ │ ├── model_factory.py │ │ └── requirements.txt │ ├── huggingface │ │ ├── args.py │ │ ├── basic_configs.py │ │ ├── extended_configs.py │ │ ├── list_extended_configs.py │ │ ├── model_factory.py │ │ └── patch_hf.py │ ├── lit_llama.py │ ├── timm │ │ ├── args.py │ │ ├── extended_configs.py │ │ ├── instantiate.py │ │ ├── loader.py │ │ ├── model_factory.py │ │ ├── timm_config.py │ │ └── train.py │ ├── transformers │ │ ├── text_classification │ │ │ ├── args.py │ │ │ └── dataset.py │ │ └── translation │ │ │ ├── args.py │ │ │ └── dataset.py │ └── vision │ │ ├── args.py │ │ └── model_factory.py │ ├── fx_int8.py │ ├── gemm_shapes.csv │ ├── hardware │ ├── __init__.py │ └── roofline.py │ ├── input.py │ ├── machine_config.py │ ├── metadata_utils.py │ └── model.py ├── userbenchmark ├── ADDING_USERBENCHMARKS.md ├── __init__.py ├── api-coverage │ ├── __init__.py │ └── run.py ├── cpu │ ├── README.md │ ├── __init__.py │ ├── cpu_test.yaml │ ├── cpu_utils.py │ ├── run.py │ └── run_config.py ├── cuda-compare │ ├── __init__.py │ ├── result_analyzer.py │ └── run.py ├── ddp_experiments │ ├── README.md │ ├── __init__.py │ ├── parse_ddp.py │ └── run.py ├── distributed │ ├── README.md │ ├── __init__.py │ ├── ci.yaml │ ├── install.py │ ├── run.py │ └── run_ci.sh ├── dynamo │ ├── __init__.py │ ├── dynamobench │ │ ├── _dynamo │ │ │ ├── testing.py │ │ │ └── utils.py │ │ ├── common.py │ │ ├── huggingface.py │ │ ├── huggingface.yaml │ │ ├── huggingface_models_list.txt │ │ ├── huggingface_models_list_cpu.txt │ │ ├── timm_models.py │ │ ├── timm_models_list.txt │ │ ├── timm_models_list_cpu.txt │ │ ├── torchao_backend.py │ │ ├── torchbench.py │ │ ├── torchbench.yaml │ │ ├── torchbench_models_list.txt │ │ └── torchbench_models_list_cpu.txt │ └── run.py ├── functorch │ ├── __init__.py │ ├── cases.py │ ├── ci.yaml │ ├── run.py │ ├── simple_models.py │ ├── util.py │ └── vmap_hessian_fc.py ├── group_bench │ ├── __init__.py │ ├── configs │ │ ├── bmm.yaml │ │ └── torch_ao.yaml │ └── run.py ├── instruction-count │ ├── __init__.py │ └── run.py ├── lazy-tensor │ └── run.py ├── mast-sample │ └── main.py ├── model-stableness │ ├── __init__.py │ └── run.py ├── nvfuser │ ├── __init__.py │ ├── ir.py │ └── run.py ├── optim │ ├── __init__.py │ ├── regression_detector.py │ ├── run.py │ └── run_optim_benchmarks.py ├── release-test │ ├── __init__.py │ ├── configs │ │ ├── 1.12.1.yaml │ │ ├── 1.13.0.yaml │ │ ├── 2.0.1.yaml │ │ ├── 2.1.0.yaml │ │ ├── 2.1.1.yaml │ │ ├── 2.1.2.yaml │ │ ├── 2.5.0.yaml │ │ ├── 2.5.1.yaml │ │ └── 2.6.0.yaml │ ├── monitor_proc.sh │ ├── result_analyzer.py │ ├── run.py │ ├── run_release_test.sh │ ├── setup_env.sh │ └── version.txt ├── rocm-test │ ├── __init__.py │ └── run.py ├── test-user-invoke │ ├── __init__.py │ └── run.py ├── test_bench │ ├── __init__.py │ ├── install.py │ ├── regression_detector.py │ └── run.py ├── torch-nightly │ ├── __init__.py │ ├── regression_detector.py │ ├── run.py │ └── v3-cuda-tests.yaml ├── torch_trt │ ├── __init__.py │ ├── ci.yaml │ ├── install.py │ └── run.py ├── torchao │ ├── __init__.py │ ├── install.py │ ├── run.py │ └── upload.py └── utils.py └── utils ├── __init__.py ├── build_requirements.txt ├── build_utils.py ├── cuda_utils.py ├── github.py ├── gitutils.py ├── python_utils.py ├── s3_utils.py └── torch_nightly_utils.py /.ci/torchbench/check-ssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eou pipefail 3 | 4 | echo "Holding runner for 2 hours until all ssh sessions have logged out" 5 | for _ in $(seq 1440); do 6 | # Break if no ssh session exists anymore 7 | if [ "$(who)" = "" ]; then 8 | break 9 | fi 10 | echo "." 11 | sleep 5 12 | done 13 | -------------------------------------------------------------------------------- /.ci/torchbench/install.sh: -------------------------------------------------------------------------------- 1 | . ${HOME}/miniconda3/etc/profile.d/conda.sh 2 | 3 | if [ -z "${CONDA_ENV}" ]; then 4 | echo "ERROR: CONDA_ENV is not set" 5 | exit 1 6 | fi 7 | 8 | if [[ -n "${SETUP_SCRIPT}" && -e "${SETUP_SCRIPT}" ]]; then 9 | . "${SETUP_SCRIPT}" 10 | fi 11 | 12 | . "${HOME}"/miniconda3/etc/profile.d/conda.sh 13 | 14 | conda activate "${CONDA_ENV}" 15 | 16 | parent_dir=$(dirname "$(readlink -f "$0")")/../.. 17 | cd ${parent_dir} 18 | 19 | python -c "import torch; print(torch.__version__); print(torch.version.git_version)" 20 | 21 | python install.py $@ 22 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: ObjC 3 | DisableFormat: true 4 | SortIncludes: false 5 | ... 6 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | ignore = E203,E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504 4 | exclude = third_party 5 | -------------------------------------------------------------------------------- /.github/scripts/bisection-config.sample.yaml: -------------------------------------------------------------------------------- 1 | # The sample bisection config that solves GH issue #51380 2 | 3 | # Start and end commits 4 | start: a87a1c1 5 | end: 0ead9d5 6 | # 10 percent regression 7 | threshold: 10 8 | # Support increase, decrease, or both 9 | # increase means performance regression, decrease means performance optimization 10 | direction: increase 11 | # Test timeout in minutes 12 | timeout: 60 13 | # Only the tests specified are executed. If not specified, use the tests in the TorchBench v0 config 14 | tests: 15 | - test_eval[yolov3-cpu-eager] 16 | -------------------------------------------------------------------------------- /.github/scripts/bmutils/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | CURRENT_DIR = Path(__file__).parent 5 | REPO_ROOT = str(CURRENT_DIR.parent.parent.parent) 6 | 7 | 8 | class add_path: 9 | def __init__(self, path): 10 | self.path = path 11 | 12 | def __enter__(self): 13 | sys.path.insert(0, self.path) 14 | 15 | def __exit__(self, exc_type, exc_value, traceback): 16 | try: 17 | sys.path.remove(self.path) 18 | except ValueError: 19 | pass 20 | -------------------------------------------------------------------------------- /.github/scripts/userbenchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/.github/scripts/userbenchmark/__init__.py -------------------------------------------------------------------------------- /.github/workflows/pr-test.yml: -------------------------------------------------------------------------------- 1 | name: TorchBench PR Test 2 | on: 3 | pull_request: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | cpu-test: 11 | uses: ./.github/workflows/_linux-test-cpu.yml 12 | secrets: 13 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 14 | cuda-test: 15 | uses: ./.github/workflows/_linux-test-cuda.yml 16 | secrets: 17 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 18 | 19 | concurrency: 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} 21 | cancel-in-progress: true 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .benchmarks 2 | .vscode/ 3 | .data 4 | ._* 5 | */**/__pycache__ 6 | */**/*.pkl 7 | */**/*.pt 8 | */**/*.pyc 9 | */**/*.tar.gz 10 | *.out* 11 | *.swp 12 | *.swo 13 | *~HEAD 14 | .DS_Store 15 | build/ 16 | .ipynb_checkpoints/ 17 | .idea 18 | old.json 19 | te.json 20 | logs/ 21 | scripts/scribe.py 22 | .userbenchmark/ 23 | torchbench.egg-info/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/FAMBench"] 2 | path = submodules/FAMBench 3 | url = https://github.com/facebookresearch/FAMBench.git 4 | [submodule "submodules/lit-llama"] 5 | path = submodules/lit-llama 6 | url = https://github.com/Lightning-AI/lit-llama.git 7 | -------------------------------------------------------------------------------- /docker/build-gcp-a100-docker.sh: -------------------------------------------------------------------------------- 1 | docker build . -f gcp-a100-runner-dind.dockerfile -t xzhao9/gcp-a100-runner-dind:latest 2 | -------------------------------------------------------------------------------- /docker/build-torchbench-nightly-docker.sh: -------------------------------------------------------------------------------- 1 | TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main} 2 | 3 | DOCKER_BUILDKIT=0 docker build . --no-cache -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest \ 4 | --build-arg TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH} 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | # Use legacy backend to import local packages in setup.py 4 | build-backend = "setuptools.build_meta:__legacy__" 5 | 6 | 7 | [tool.black] 8 | line-length = 88 9 | target-version = ["py38"] 10 | exclude = '''/submodules/.*''' 11 | 12 | [tool.usort] 13 | excludes = ["**/submodules/**"] 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | boto3 3 | bs4 4 | patch 5 | py-cpuinfo 6 | distro 7 | iopath 8 | pytest 9 | pytest-benchmark 10 | requests 11 | tabulate 12 | git+https://github.com/huggingface/pytorch-image-models.git@730b907 13 | # this version of transformers is required by linger-kernel 14 | # https://github.com/linkedin/Liger-Kernel/blob/main/pyproject.toml#L23 15 | transformers==4.44.2 16 | MonkeyType 17 | psutil 18 | pyyaml 19 | numpy 20 | opencv-python 21 | submitit 22 | pynvml>=12.0.0 23 | pandas 24 | scipy 25 | numba 26 | -------------------------------------------------------------------------------- /scripts/activate_conda.sh: -------------------------------------------------------------------------------- 1 | 2 | . ${HOME}/miniconda3/etc/profile.d/conda.sh 3 | conda activate 4 | -------------------------------------------------------------------------------- /scripts/install_conda.sh: -------------------------------------------------------------------------------- 1 | DEFAULT_PYTHON_VERSION=3.10 2 | CONDA=https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 3 | filename=$(basename "$CONDA") 4 | wget "$CONDA" 5 | chmod +x "$filename" 6 | ./"$filename" -b -u 7 | 8 | . ${HOME}/miniconda3/etc/profile.d/conda.sh 9 | conda activate 10 | conda install -y python=${DEFAULT_PYTHON_VERSION} 11 | pip install boto3 pyyaml -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="torchbench", 5 | version="0.1", 6 | description="Benchmarking library for PyTorch", 7 | author="PyTorch Team", 8 | url="https://github.com/pytorch/benchmark", 9 | packages=find_packages(include=["torchbenchmark*", "userbenchmark*"]), 10 | classifiers=[ 11 | "Intended Audience :: Developers", 12 | "Topic :: Software Development :: Build Tools", 13 | "License :: OSI Approved :: BSD 3 License", 14 | "Programming Language :: Python", 15 | ], 16 | ) 17 | -------------------------------------------------------------------------------- /test_imports.py: -------------------------------------------------------------------------------- 1 | import torchbenchmark.models 2 | 3 | model, example_inputs = torchbenchmark.models.densenet121.Model( 4 | test="eval", device="cuda", batch_size=1 5 | ).get_module() 6 | model(*example_inputs) 7 | -------------------------------------------------------------------------------- /torchbenchmark/_components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/_impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/_impl/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/_impl/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/_impl/tasks/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/_impl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/_impl/workers/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/kineto/__init__.py: -------------------------------------------------------------------------------- 1 | from .trace import do_bench_kineto 2 | -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/model_analyzer/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/dcgm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/model_analyzer/dcgm/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/requirements.txt: -------------------------------------------------------------------------------- 1 | numba 2 | pynvml -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/tb_dcgm_types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/model_analyzer/tb_dcgm_types/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/tb_dcgm_types/config.py: -------------------------------------------------------------------------------- 1 | # default is 0.01 second 2 | DEFAULT_MONITORING_INTERVAL = 0.01 3 | 4 | 5 | class AnalayzerConfig: 6 | def __init__(self): 7 | self.monitoring_interval = DEFAULT_MONITORING_INTERVAL 8 | -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/tb_dcgm_types/cpu_record.py: -------------------------------------------------------------------------------- 1 | from .record import Record 2 | 3 | 4 | class CPURecord(Record): 5 | """ 6 | This is a base class for any 7 | CPU based record 8 | """ 9 | 10 | def __init__(self, value, timestamp=0): 11 | """ 12 | Parameters 13 | ---------- 14 | value : float 15 | The value of the CPU metrtic 16 | timestamp : int 17 | The timestamp for the record in nanoseconds 18 | """ 19 | 20 | super().__init__(value, timestamp) 21 | self._device_uuid = 0x1 22 | 23 | def device_uuid(self): 24 | return self._device_uuid 25 | -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/tb_dcgm_types/da_exceptions.py: -------------------------------------------------------------------------------- 1 | class TorchBenchAnalyzerException(Exception): 2 | """ 3 | A custom exception specific to the TorchBench Model Analyzer 4 | """ 5 | 6 | pass 7 | -------------------------------------------------------------------------------- /torchbenchmark/_components/model_analyzer/tb_dcgm_types/tb_logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | LOGGER_NAME = "TorchBenchLogger" 4 | 5 | 6 | def set_logger(logger_level=logging.WARNING): 7 | formatter = logging.Formatter( 8 | fmt="%(asctime)s - %(levelname)s - %(module)s - %(message)s" 9 | ) 10 | handler = logging.StreamHandler() 11 | handler.setFormatter(formatter) 12 | logger = logging.getLogger(LOGGER_NAME) 13 | logger.setLevel(logger_level) 14 | logger.addHandler(handler) 15 | return logger 16 | -------------------------------------------------------------------------------- /torchbenchmark/_components/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/_components/test/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/canary_models/DALLE2_pytorch/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/DALLE2_pytorch/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/lucidrains/DALLE2-pytorch.git -------------------------------------------------------------------------------- /torchbenchmark/canary_models/DALLE2_pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/lucidrains/DALLE2-pytorch@00e07b7d61e21447d55e6d06d5c928cf8b67601d 2 | beartype==0.15.0 3 | rotary-embedding-torch==0.3.3 4 | tensorboard 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/codellama/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | 8 | if __name__ == "__main__": 9 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 10 | cache_model(model_name) 11 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/codellama/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: cuda 10 | test: train 11 | train_benchmark: false 12 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/diffuser_instruct_pix2pix/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.diffusers.model_factory import DiffuserModel 3 | 4 | 5 | class Model(DiffuserModel): 6 | task = COMPUTER_VISION.GENERATION 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | # Default eval precision on CUDA device is fp16 10 | DEFAULT_EVAL_CUDA_PRECISION = "fp16" 11 | 12 | def __init__(self, test, device, batch_size=None, extra_args=[]): 13 | super().__init__( 14 | name="timbrooks/instruct-pix2pix", 15 | test=test, 16 | device=device, 17 | batch_size=batch_size, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/diffuser_instruct_pix2pix/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | not_implemented: 5 | - device: cpu 6 | train_benchmark: false 7 | train_deterministic: false 8 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/fambench_dlrm/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | from torchbenchmark import REPO_PATH 6 | from utils.python_utils import pip_install_requirements 7 | 8 | 9 | def update_fambench_submodule(): 10 | "Update FAMBench submodule of the benchmark repo" 11 | update_command = [ 12 | "git", 13 | "submodule", 14 | "update", 15 | "--init", 16 | "--recursive", 17 | os.path.join("submodules", "FAMBench"), 18 | ] 19 | subprocess.check_call(update_command, cwd=REPO_PATH) 20 | 21 | 22 | if __name__ == "__main__": 23 | update_fambench_submodule() 24 | pip_install_requirements() 25 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/fambench_dlrm/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: true 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: true 6 | not_implemented: 7 | # CUDA test disabled because it doesn't fit on the CI machine (T4) 8 | - device: cuda 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/fambench_dlrm/origin: -------------------------------------------------------------------------------- 1 | https://github.com/facebookresearch/FAMBench -------------------------------------------------------------------------------- /torchbenchmark/canary_models/fambench_dlrm/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/canary_models/fambench_dlrm/requirements.txt -------------------------------------------------------------------------------- /torchbenchmark/canary_models/fambench_xlmr/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | cpu: 5 | eval_batch_size: 4 6 | eval_benchmark: false 7 | eval_deterministic: false 8 | eval_nograd: true 9 | not_implemented: 10 | - device: cuda 11 | test: train 12 | train_benchmark: false 13 | train_deterministic: false 14 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/fambench_xlmr/requirements.txt: -------------------------------------------------------------------------------- 1 | sacrebleu 2 | bitarray 3 | cffi 4 | omegaconf 5 | hydra-core 6 | sentencepiece 7 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gat/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import GNN 2 | from torchbenchmark.util.framework.gnn.model_factory import GNNModel 3 | 4 | 5 | class Model(GNNModel): 6 | task = GNN.CLASSIFICATION 7 | DEFAULT_TRAIN_BSIZE = 64 8 | DEFAULT_EVAL_BSIZE = 64 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | model_name="gat", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | if device == "cuda": 19 | # TODO - Add CUDA support 20 | raise NotImplementedError("GAT doesn't support CUDA") 21 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gat/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "Reddit_minimal.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements( 9 | extra_args=["-f", "https://data.pyg.org/whl/torch-2.1.0+cpu.html"] 10 | ) 11 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gat/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | cpu: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gat/requirements.txt: -------------------------------------------------------------------------------- 1 | pyg_lib 2 | torch_scatter 3 | torch_sparse 4 | pyg-nightly 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gcn/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import GNN 2 | from torchbenchmark.util.framework.gnn.model_factory import GNNModel 3 | 4 | 5 | class Model(GNNModel): 6 | task = GNN.CLASSIFICATION 7 | DEFAULT_TRAIN_BSIZE = 64 8 | DEFAULT_EVAL_BSIZE = 64 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | model_name="gcn", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | if device == "cuda": 19 | # TODO - Add CUDA support 20 | raise NotImplementedError("GCN doesn't support CUDA") 21 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gcn/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | from utils import s3_utils 5 | 6 | 7 | def pip_install_requirements(): 8 | subprocess.check_call( 9 | [ 10 | sys.executable, 11 | "-m", 12 | "pip", 13 | "install", 14 | "-q", 15 | "-r", 16 | "requirements.txt", 17 | "-f", 18 | "https://data.pyg.org/whl/torch-2.1.0+cpu.html", 19 | ] 20 | ) 21 | 22 | 23 | if __name__ == "__main__": 24 | s3_utils.checkout_s3_data( 25 | "INPUT_TARBALLS", "Reddit_minimal.tar.gz", decompress=True 26 | ) 27 | pip_install_requirements() 28 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gcn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | cpu: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/gcn/requirements.txt: -------------------------------------------------------------------------------- 1 | pyg_lib 2 | torch_scatter 3 | torch_sparse 4 | pyg-nightly 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_GPT2_generate/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.huggingface.model_factory import ( 2 | HuggingFaceGenerationModel, 3 | ) 4 | 5 | 6 | class Model(HuggingFaceGenerationModel): 7 | def __init__(self, test, device, batch_size=None, extra_args=[]): 8 | super().__init__( 9 | name="hf_GPT2_generate", 10 | test=test, 11 | device=device, 12 | batch_size=batch_size, 13 | extra_args=extra_args, 14 | ) 15 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_GPT2_generate/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_GPT2_generate/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_MPT_7b_instruct/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | # https://huggingface.co/mosaicml/mpt-7b 8 | DEFAULT_TRAIN_BSIZE = 4 9 | DEFAULT_EVAL_BSIZE = 1 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | name="hf_MPT_7b_instruct", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | 20 | def eval(self): 21 | super().eval() 22 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_MPT_7b_instruct/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_MPT_7b_instruct/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_MPT_7b_instruct/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_Yi/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 6 | cache_model, 7 | patch_transformers, 8 | ) 9 | from utils.python_utils import pip_install_requirements 10 | 11 | if __name__ == "__main__": 12 | pip_install_requirements() 13 | patch_transformers() 14 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 15 | cache_model(model_name) 16 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_Yi/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: NVIDIA A10G 11 | # - device: cpu -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_Yi/requirements.txt: -------------------------------------------------------------------------------- 1 | numba -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_mixtral/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | 10 | if __name__ == "__main__": 11 | pip_install_requirements() 12 | patch_transformers() 13 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 14 | cache_model(model_name) 15 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_mixtral/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: NVIDIA A10G 11 | # - device: cpu -------------------------------------------------------------------------------- /torchbenchmark/canary_models/hf_mixtral/requirements.txt: -------------------------------------------------------------------------------- 1 | bitsandbytes 2 | transformers>=4.36.2 3 | numba -------------------------------------------------------------------------------- /torchbenchmark/canary_models/lit_llama/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.lit_llama import install_lit_llama 2 | 3 | if __name__ == "__main__": 4 | install_lit_llama() 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/lit_llama/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - test: eval 11 | - test: example 12 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/lit_llama_generate/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.lit_llama import install_lit_llama 2 | 3 | if __name__ == "__main__": 4 | install_lit_llama() 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/lit_llama_generate/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - test: eval 11 | - test: example 12 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/lit_llama_lora/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.lit_llama import install_lit_llama 2 | 3 | if __name__ == "__main__": 4 | install_lit_llama() 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/lit_llama_lora/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - test: train 11 | - test: example 12 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v2_13b/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model 4 | 5 | if __name__ == "__main__": 6 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 7 | cache_model(model_name) 8 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v2_13b/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: cuda 10 | test: train 11 | train_benchmark: false 12 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v2_70b/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model 4 | 5 | if __name__ == "__main__": 6 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 7 | cache_model(model_name) 8 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v2_70b/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: cuda 10 | test: train 11 | train_benchmark: false 12 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v2_7b/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model 4 | 5 | if __name__ == "__main__": 6 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 7 | cache_model(model_name) 8 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v2_7b/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: cuda 10 | test: train 11 | train_benchmark: false 12 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v31_8b/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model 4 | 5 | if __name__ == "__main__": 6 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 7 | cache_model(model_name) 8 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/llama_v31_8b/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: cuda 10 | test: train 11 | train_benchmark: false 12 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/canary_models/mistral_7b_instruct/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | 8 | if __name__ == "__main__": 9 | patch_transformers() 10 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 11 | cache_model(model_name) 12 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/mistral_7b_instruct/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | - device: NVIDIA A10G 3 | - device: NVIDIA A100-SXM4-40GB 4 | eval_batch_size: 1 5 | eval_benchmark: false 6 | eval_deterministic: false 7 | eval_nograd: true 8 | train_benchmark: false 9 | train_deterministic: false 10 | not_implemented: 11 | - device: cpu -------------------------------------------------------------------------------- /torchbenchmark/canary_models/orca_2/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | 8 | if __name__ == "__main__": 9 | patch_transformers() 10 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 11 | cache_model(model_name) 12 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/orca_2/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | - device: NVIDIA A10G 3 | - device: NVIDIA A100-SXM4-40GB 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/phi_1_5/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 6 | cache_model, 7 | patch_transformers, 8 | ) 9 | from utils.python_utils import pip_install_requirements 10 | 11 | if __name__ == "__main__": 12 | pip_install_requirements() 13 | patch_transformers() 14 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 15 | cache_model(model_name) 16 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/phi_1_5/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: NVIDIA A10G 11 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/phi_1_5/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | flash_attn 3 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/phi_2/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/phi_2/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: NVIDIA A10G 11 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/phi_2/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | flash_attn 3 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/sage/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import GNN 2 | from torchbenchmark.util.framework.gnn.model_factory import GNNModel 3 | 4 | 5 | class Model(GNNModel): 6 | task = GNN.CLASSIFICATION 7 | DEFAULT_TRAIN_BSIZE = 64 8 | DEFAULT_EVAL_BSIZE = 64 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | model_name="sage", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | if device == "cuda": 19 | # TODO - Add CUDA support 20 | raise NotImplementedError("Sage doesn't support CUDA") 21 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/sage/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "Reddit_minimal.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements() 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/sage/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | cpu: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/sage/requirements.txt: -------------------------------------------------------------------------------- 1 | pyg_lib 2 | torch_scatter 3 | torch_sparse 4 | pyg-nightly 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/stable_diffusion_xl/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | import torch 5 | from torchbenchmark.util.framework.diffusers import install_diffusers 6 | 7 | MODEL_NAME = "stabilityai/stable-diffusion-2" 8 | 9 | 10 | def load_model_checkpoint(): 11 | from diffusers import StableDiffusionPipeline 12 | 13 | StableDiffusionPipeline.from_pretrained( 14 | MODEL_NAME, torch_dtype=torch.float16, safety_checker=None 15 | ) 16 | 17 | 18 | if __name__ == "__main__": 19 | if not "HUGGING_FACE_HUB_TOKEN" in os.environ: 20 | warnings.warn( 21 | "Make sure to set `HUGGINGFACE_HUB_TOKEN` so you can download weights" 22 | ) 23 | else: 24 | install_diffusers() 25 | load_model_checkpoint() 26 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/stable_diffusion_xl/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: cpu 11 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/torchrec_dlrm/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/torchrec_dlrm/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | skip_cuda_memory_leak: true 7 | -------------------------------------------------------------------------------- /torchbenchmark/canary_models/torchrec_dlrm/origin: -------------------------------------------------------------------------------- 1 | https://github.com/facebookresearch/dlrm -------------------------------------------------------------------------------- /torchbenchmark/canary_models/torchrec_dlrm/requirements.txt: -------------------------------------------------------------------------------- 1 | torchrec-nightly 2 | fbgemm-gpu-nightly 3 | pyre-extensions 4 | -------------------------------------------------------------------------------- /torchbenchmark/data/index.yaml: -------------------------------------------------------------------------------- 1 | INPUT_TARBALLS: 2 | # index file for S3 storage of the input data 3 | - Background_Matting_inputs.tar.gz 4 | - coco128.tar.gz 5 | - multi30k.tar.gz 6 | - tacotron2-minimal.tar.gz 7 | - coco2017-minimal.tar.gz 8 | - pytorch_stargan_inputs.tar.gz 9 | - LearningToPaint_inputs.tar.gz 10 | - pytorch_CycleGAN_and_pix2pix_inputs.tar.gz 11 | - Super_SloMo_inputs.tar.gz 12 | - speech_transformer_inputs.tar.gz 13 | - Reddit_minimal.tar.gz 14 | - sam_inputs.tar.gz 15 | MODEL_PKLS: 16 | - drq/obs.pkl 17 | - maml_omniglot/batch.pt 18 | -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/fambench_xlmr/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | 5 | def pip_install_requirements(): 6 | subprocess.check_call( 7 | [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"] 8 | ) 9 | 10 | 11 | if __name__ == "__main__": 12 | pip_install_requirements() 13 | -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/fambench_xlmr/requirements.txt: -------------------------------------------------------------------------------- 1 | bitarray 2 | sacrebleu>=1.4.12 3 | omegaconf 4 | hydra-core 5 | -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/hf_bert/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | 5 | def pip_install_requirements(): 6 | subprocess.check_call( 7 | [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"] 8 | ) 9 | 10 | 11 | if __name__ == "__main__": 12 | pip_install_requirements() 13 | -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/hf_bert/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets >= 1.8.0 3 | sentencepiece != 0.1.92 4 | scipy 5 | scikit-learn 6 | protobuf 7 | torch 8 | evaluate 9 | sacrebleu -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/hf_t5/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | 5 | def pip_install_requirements(): 6 | subprocess.check_call( 7 | [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"] 8 | ) 9 | 10 | 11 | if __name__ == "__main__": 12 | pip_install_requirements() 13 | -------------------------------------------------------------------------------- /torchbenchmark/e2e_models/hf_t5/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets >= 1.8.0 3 | torch 4 | evaluate 5 | transformers 6 | numpy -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/Makefile: -------------------------------------------------------------------------------- 1 | package: 2 | python setup.py sdist 3 | python setup.py bdist_wheel 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import BERTDataset 2 | from .vocab import WordVocab 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .bert import BERT 2 | from .language_model import BERTLM 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/attention/__init__.py: -------------------------------------------------------------------------------- 1 | from .multi_head import MultiHeadedAttention 2 | from .single import Attention 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | from .bert import BERTEmbedding 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/segment.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class SegmentEmbedding(nn.Embedding): 5 | def __init__(self, embed_size=512): 6 | super().__init__(3, embed_size, padding_idx=0) 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/embedding/token.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class TokenEmbedding(nn.Embedding): 5 | def __init__(self, vocab_size, embed_size=512): 6 | super().__init__(vocab_size, embed_size, padding_idx=0) 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .feed_forward import PositionwiseFeedForward 2 | from .layer_norm import LayerNorm 3 | from .sublayer import SublayerConnection 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/feed_forward.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class PositionwiseFeedForward(nn.Module): 5 | "Implements FFN equation." 6 | 7 | def __init__(self, d_model, d_ff, dropout=0.1): 8 | super(PositionwiseFeedForward, self).__init__() 9 | self.w_1 = nn.Linear(d_model, d_ff) 10 | self.w_2 = nn.Linear(d_ff, d_model) 11 | self.dropout = nn.Dropout(dropout) 12 | self.activation = nn.GELU() 13 | 14 | def forward(self, x): 15 | return self.w_2(self.dropout(self.activation(self.w_1(x)))) 16 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/layer_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LayerNorm(nn.Module): 6 | "Construct a layernorm module (See citation for details)." 7 | 8 | def __init__(self, features, eps=1e-6): 9 | super(LayerNorm, self).__init__() 10 | self.a_2 = nn.Parameter(torch.ones(features)) 11 | self.b_2 = nn.Parameter(torch.zeros(features)) 12 | self.eps = eps 13 | 14 | def forward(self, x): 15 | mean = x.mean(-1, keepdim=True) 16 | std = x.std(-1, keepdim=True) 17 | return self.a_2 * (x - mean) / (std + self.eps) + self.b_2 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/sublayer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .layer_norm import LayerNorm 4 | from .tensor2tensor import TensorToTensor 5 | 6 | 7 | class SublayerConnection(nn.Module): 8 | """ 9 | A residual connection followed by a layer norm. 10 | Note for code simplicity the norm is first as opposed to last. 11 | """ 12 | 13 | def __init__(self, size, dropout): 14 | super(SublayerConnection, self).__init__() 15 | self.norm = LayerNorm(size) 16 | self.dropout = nn.Dropout(dropout) 17 | 18 | def forward(self, x, sublayer: TensorToTensor): 19 | "Apply residual connection to any sublayer with the same size." 20 | return x + self.dropout(sublayer.forward(self.norm(x))) 21 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/model/utils/tensor2tensor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | @torch.jit.interface 5 | class TensorToTensor(torch.nn.Module): 6 | def forward(self, x: torch.Tensor) -> torch.Tensor: 7 | pass 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/bert_pytorch/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from .pretrain import BERTTrainer 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | 5 | def setup_install(): 6 | subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", "."]) 7 | 8 | 9 | if __name__ == "__main__": 10 | setup_install() 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/install.sh: -------------------------------------------------------------------------------- 1 | python setup.py install 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/wconstab/BERT-pytorch 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | numpy 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1 bert -c data/corpus.small -v data/vocab.small -o bert.model $@ 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/BERT_pytorch/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class BERTVocabTestCase(unittest.TestCase): 5 | pass 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *.pth 3 | ak/ 4 | ak.png 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/Data_adobe/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # ./prepare.sh /path/to/Adobe/Combined_Dataset 3 | 4 | function copy_adobe() { 5 | while read p; do 6 | if [ -f "$1/Other/fg/$p" ]; then 7 | cp $1/Other/fg/$p fg_$2 8 | cp $1/Other/alpha/"$p" mask_$2 9 | else 10 | cp $1/Adobe-licensed\ images/fg/"$p" fg_$2 11 | cp $1/Adobe-licensed\ images/alpha/"$p" mask_$2 12 | fi 13 | done <$2_data_list.txt 14 | } 15 | mkdir -p fg_train fg_test mask_train mask_test merged_train merged_test 16 | copy_adobe "$1/Test_set" test 17 | copy_adobe "$1/Training_set" train 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/Data_adobe/test_data_list.txt: -------------------------------------------------------------------------------- 1 | woman-morning-bathrobe-bathroom.png 2 | woman-952506_1920 (1).png 3 | girl-1219339_1920.png 4 | wedding-dresses-1486260_1280.png 5 | long-1245787_1920.png 6 | pexels-photo-58463.png 7 | girl-beautiful-young-face-53000.png 8 | boy-1518482_1920.png 9 | girl-1467820_1280.png 10 | model-600238_1920.png 11 | sea-sunny-person-beach.png 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/install.py: -------------------------------------------------------------------------------- 1 | from utils import python_utils, s3_utils 2 | 3 | 4 | def pip_install_requirements(): 5 | python_utils.pip_install_requirements("requirements.txt") 6 | 7 | 8 | if __name__ == "__main__": 9 | pip_install_requirements() 10 | s3_utils.checkout_s3_data( 11 | "INPUT_TARBALLS", "Background_Matting_inputs.tar.gz", decompress=True 12 | ) 13 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/install.sh: -------------------------------------------------------------------------------- 1 | python -m pip install -r requirements.txt 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/metadata.yaml: -------------------------------------------------------------------------------- 1 | train_benchmark: true 2 | train_deterministic: false 3 | not_implemented: 4 | # Disabled due to excessively slow runtime - see GH Issue #100 5 | - test: train 6 | device: cpu 7 | - test: example 8 | device: cpu 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/bertmaher/Background-Matting.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | pandas 4 | Pillow 5 | scikit-image 6 | scipy 7 | tqdm 8 | tensorboardX 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/Background_Matting/run.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1 python train_real_fixed.py -n Real_fixed -bs 4 -res 512 $@ 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.py~ 3 | 4 | data/ 5 | *.pkl 6 | 7 | output/* 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/baseline/Renderer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/LearningToPaint/baseline/Renderer/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/baseline_modelfree/Renderer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/LearningToPaint/baseline_modelfree/Renderer/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | 5 | if __name__ == "__main__": 6 | s3_utils.checkout_s3_data( 7 | "INPUT_TARBALLS", "Super_SloMo_inputs.tar.gz", decompress=True 8 | ) 9 | pip_install_requirements() 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/install.sh: -------------------------------------------------------------------------------- 1 | python -m pip install -r requirements.txt 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 256 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/nikithamalgifb/LearningToPaint.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboardX 2 | opencv-python 3 | Pillow 4 | scipy 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/LearningToPaint/run.sh: -------------------------------------------------------------------------------- 1 | python3 baseline/train_renderer.py $@ 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/Super_SloMo/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "Super_SloMo_inputs.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements() 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/Super_SloMo/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip install -r requirements.txt 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/Super_SloMo/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: false 5 | eval_deterministic: true 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: cuda 10 | test: eval 11 | train_benchmark: false 12 | train_deterministic: true 13 | -------------------------------------------------------------------------------- /torchbenchmark/models/Super_SloMo/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/wconstab/Super-SloMo.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/Super_SloMo/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorboardX 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/Super_SloMo/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python train.py --dataset_root dataset --checkpoint_dir checkpoints --epochs 1 "$@" 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/alexnet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/alexnet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/alexnet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1024 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_edgecnn/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel 2 | 3 | 4 | class Model(BasicGNNModel): 5 | def __init__(self, test, device, batch_size=None, extra_args=[]): 6 | super().__init__( 7 | model_name="edgecnn", 8 | test=test, 9 | device=device, 10 | batch_size=batch_size, 11 | extra_args=extra_args, 12 | ) 13 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_edgecnn/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric 2 | 3 | if __name__ == "__main__": 4 | install_pytorch_geometric() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_edgecnn/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_gcn/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel 2 | 3 | 4 | class Model(BasicGNNModel): 5 | def __init__(self, test, device, batch_size=None, extra_args=[]): 6 | super().__init__( 7 | model_name="gcn", 8 | test=test, 9 | device=device, 10 | batch_size=batch_size, 11 | extra_args=extra_args, 12 | ) 13 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_gcn/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric 2 | 3 | if __name__ == "__main__": 4 | install_pytorch_geometric() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_gcn/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_gin/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel 2 | 3 | 4 | class Model(BasicGNNModel): 5 | def __init__(self, test, device, batch_size=None, extra_args=[]): 6 | super().__init__( 7 | model_name="gin", 8 | test=test, 9 | device=device, 10 | batch_size=batch_size, 11 | extra_args=extra_args, 12 | ) 13 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_gin/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric 2 | 3 | if __name__ == "__main__": 4 | install_pytorch_geometric() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_gin/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_sage/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn.model_factory import BasicGNNModel 2 | 3 | 4 | class Model(BasicGNNModel): 5 | def __init__(self, test, device, batch_size=None, extra_args=[]): 6 | super().__init__( 7 | model_name="sage", 8 | test=test, 9 | device=device, 10 | batch_size=batch_size, 11 | extra_args=extra_args, 12 | ) 13 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_sage/install.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.gnn import install_pytorch_geometric 2 | 3 | if __name__ == "__main__": 4 | install_pytorch_geometric() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/basic_gnn_sage/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/cm3leon_generate/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/dcgan/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/dcgan/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1024 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/dcgan/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.egg-info 3 | build 4 | dist 5 | Session.vim 6 | *.log 7 | trash 8 | tex 9 | demucs_release 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/check.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | 5 | a = torch.load(sys.argv[1]) 6 | b = torch.load(sys.argv[2]) 7 | torch.testing.assert_allclose(a, b, rtol=0.01, atol=0.01) 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/demucs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/environment-cpu.yml: -------------------------------------------------------------------------------- 1 | name: demucs 2 | 3 | channels: 4 | - conda-forge 5 | - pytorch 6 | 7 | dependencies: 8 | - python=3.7 9 | - ffmpeg==4.2 10 | - pytorch=1.4.0 11 | - scipy==1.3.1 12 | - tqdm>=4.36.1 13 | - pip 14 | - pip: 15 | - lameenc==1.2.2 16 | - musdb==0.3.1 17 | - museval==0.3.0 18 | - requests==2.22 19 | - treetable==0.2.3 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/environment-cuda.yml: -------------------------------------------------------------------------------- 1 | name: demucs 2 | 3 | channels: 4 | - conda-forge 5 | - pytorch 6 | 7 | dependencies: 8 | - python=3.7 9 | - cudatoolkit=10 10 | - ffmpeg==4.2 11 | - pytorch=1.4.0 12 | - scipy==1.3.1 13 | - tqdm>=4.36.1 14 | - pip 15 | - pip: 16 | - lameenc==1.2.2 17 | - musdb==0.3.1 18 | - museval==0.3.0 19 | - requests==2.22 20 | - treetable==0.2.3 21 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | test: train 10 | - device: cuda 11 | test: train 12 | - device: cuda 13 | test: eval 14 | train_benchmark: true 15 | train_deterministic: false 16 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/zheng-xq/demucs.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/requirements.txt: -------------------------------------------------------------------------------- 1 | ffmpeg-python 2 | scipy 3 | tqdm 4 | lameenc 5 | musdb 6 | museval 7 | requests 8 | treetable 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | if [ -f metadata/musdb.json ]; then 5 | rm metadata/musdb.json 6 | fi 7 | 8 | for f in checkpoints evals logs models; do 9 | if [ -d $f ]; then 10 | rm -r $f 11 | fi 12 | done 13 | 14 | python3 -m demucs --musdb "$(pwd)/sample_data/" \ 15 | --batch_size 1 \ 16 | --device cuda \ 17 | --workers 1 \ 18 | --eval_workers 1 \ 19 | --restart \ 20 | --remix_group_size 1 \ 21 | --samples 100000 \ 22 | --repeat 1 \ 23 | --epochs 1 \ 24 | "$@" 25 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/run_overall.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | time bash run.sh --debug reference_0.out 5 | time bash run.sh --debug reference_1.out 6 | python check.py reference_0.out reference_1.out 7 | 8 | time bash run.sh --script --debug jit.out 9 | python check.py reference_0.out jit.out 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/demucs/setup.cfg: -------------------------------------------------------------------------------- 1 | [pep8] 2 | max-line-length = 100 3 | 4 | [flake8] 5 | max-line-length = 100 6 | 7 | [yapf] 8 | column_limit = 100 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/densenet121/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/densenet121/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/densenet121/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cuda 9 | - device: cpu 10 | train_benchmark: true 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_101_c4/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_101_c4/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_101_dc5/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_101_dc5/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_101_fpn/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_101_fpn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_50_c4/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_50_c4/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_50_dc5/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_50_dc5/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_50_fpn/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fasterrcnn_r_50_fpn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fcos_r_50_fpn/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_fcos_r_50_fpn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/facebookresearch/detectron2 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_101_c4/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_101_c4/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 2 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_101_fpn/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_101_fpn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_50_c4/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_50_c4/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_50_fpn/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.detectron2 import install_detectron2 4 | 5 | MODEL_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 6 | MODEL_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | if __name__ == "__main__": 9 | install_detectron2(MODEL_NAME, MODEL_DIR) 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/detectron2_maskrcnn_r_50_fpn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/dlrm/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/dlrm/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ARG FROM_IMAGE_NAME=pytorch/pytorch:1.3-cuda10.1-cudnn7-runtime 7 | FROM ${FROM_IMAGE_NAME} 8 | 9 | ADD requirements.txt . 10 | RUN pip install -r requirements.txt 11 | 12 | RUN pip install torch==1.3.1 13 | 14 | WORKDIR /code 15 | ADD . . 16 | -------------------------------------------------------------------------------- /torchbenchmark/models/dlrm/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/dlrm/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: true 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: true 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/dlrm/origin: -------------------------------------------------------------------------------- 1 | https://github.com/facebookresearch/dlrm.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/dlrm/requirements.txt: -------------------------------------------------------------------------------- 1 | future 2 | numpy 3 | onnx 4 | pydot 5 | scikit-learn 6 | tqdm 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/doctr_det_predictor/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/doctr_det_predictor/requirements.txt: -------------------------------------------------------------------------------- 1 | python-doctr 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/doctr_reco_predictor/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/doctr_reco_predictor/requirements.txt: -------------------------------------------------------------------------------- 1 | python-doctr 2 | rapidfuzz 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/drq/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | pip_install_requirements() 6 | s3_utils.checkout_s3_data("MODEL_PKLS", "drq/obs.pkl", decompress=False) 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/drq/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/drq/origin: -------------------------------------------------------------------------------- 1 | https://github.com/denisyarats/drq/tree/365e5969f9a748ad63d854ce8c8cc8f90c1de4e0 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/drq/requirements.txt: -------------------------------------------------------------------------------- 1 | kornia 2 | scikit-image 3 | gym 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/fastNLP_Bert/bert_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.1, 3 | "directionality": "bidi", 4 | "hidden_act": "gelu", 5 | "hidden_dropout_prob": 0.1, 6 | "hidden_size": 768, 7 | "initializer_range": 0.02, 8 | "intermediate_size": 3072, 9 | "max_position_embeddings": 512, 10 | "num_attention_heads": 12, 11 | "num_hidden_layers": 12, 12 | "pooler_fc_size": 768, 13 | "pooler_num_attention_heads": 12, 14 | "pooler_num_fc_layers": 3, 15 | "pooler_size_per_head": 128, 16 | "pooler_type": "first_token_transform", 17 | "type_vocab_size": 2, 18 | "vocab_size": 21128 19 | } 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/fastNLP_Bert/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import patch 4 | from utils.python_utils import pip_install_requirements 5 | 6 | 7 | def patch_fastnlp(): 8 | import fastNLP 9 | 10 | current_dir = os.path.dirname(os.path.abspath(__file__)) 11 | patch_file = os.path.join(current_dir, "fastnlp.patch") 12 | fastNLP_dir = os.path.dirname(fastNLP.__file__) 13 | fastNLP_target_file = os.path.join(fastNLP_dir, "embeddings", "bert_embedding.py") 14 | p = patch.fromfile(patch_file) 15 | if not p.apply(strip=1, root=fastNLP_dir): 16 | print("Failed to patch fastNLP. Exit.") 17 | exit(1) 18 | 19 | 20 | if __name__ == "__main__": 21 | pip_install_requirements() 22 | patch_fastnlp() 23 | -------------------------------------------------------------------------------- /torchbenchmark/models/fastNLP_Bert/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 16 4 | eval_benchmark: false 5 | eval_deterministic: true 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: true 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/fastNLP_Bert/requirements.txt: -------------------------------------------------------------------------------- 1 | fastNLP==0.6.0 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/functorch_dp_cifar10/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/functorch_dp_cifar10/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 512 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/functorch_dp_cifar10/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/functorch_dp_cifar10/requirements.txt -------------------------------------------------------------------------------- /torchbenchmark/models/functorch_maml_omniglot/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | pip_install_requirements() 6 | s3_utils.checkout_s3_data("MODEL_PKLS", "maml_omniglot/batch.pt", decompress=False) 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/functorch_maml_omniglot/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/functorch_maml_omniglot/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/functorch_maml_omniglot/requirements.txt -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Albert/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 8 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Albert", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Albert/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Albert/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 16 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Albert/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bart/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Bart", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bart/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bart/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bart/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Bert", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert_large/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Bert_large", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert_large/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert_large/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Bert_large/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_BigBird/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 2 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_BigBird", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_BigBird/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_BigBird/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_BigBird/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_DistilBert/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 8 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_DistilBert", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_DistilBert/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_DistilBert/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 16 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_DistilBert/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_GPT2", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 16 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2_large/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_GPT2_large", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2_large/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2_large/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | not_implemented: 7 | # OOMs on torchbench CI 8 | - device: cuda 9 | # CPU OOM on torchbench CI 10 | - device: cpu 11 | test: train 12 | # CPU OOM on torchbench CI accuracy 13 | - device: cpu 14 | test: example 15 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_GPT2_large/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Longformer/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 2 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Longformer", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Longformer/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Longformer/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Longformer/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Reformer/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 8 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Reformer", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Reformer/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Reformer/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Reformer/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Roberta_base/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel 3 | 4 | 5 | class Model(HuggingFaceModel): 6 | task = NLP.LANGUAGE_MODELING 7 | DEFAULT_TRAIN_BSIZE = 4 8 | DEFAULT_EVAL_BSIZE = 1 9 | 10 | def __init__(self, test, device, batch_size=None, extra_args=[]): 11 | super().__init__( 12 | name="hf_Roberta_base", 13 | test=test, 14 | device=device, 15 | batch_size=batch_size, 16 | extra_args=extra_args, 17 | ) 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Roberta_base/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 6 | cache_model, 7 | patch_transformers, 8 | ) 9 | 10 | 11 | def pip_install_requirements(): 12 | subprocess.check_call( 13 | [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"] 14 | ) 15 | 16 | 17 | if __name__ == "__main__": 18 | pip_install_requirements() 19 | patch_transformers() 20 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 21 | cache_model(model_name) 22 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Roberta_base/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Roberta_base/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | test: train 10 | - device: cuda 11 | test: train 12 | train_benchmark: false 13 | train_deterministic: false 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_base/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_base/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | not_implemented: 7 | # disable train test because of CI infra capacity issue 8 | - device: cpu 9 | test: train 10 | - device: cuda 11 | test: train 12 | # CPU OOM on torchbench CI accuracy 13 | - device: cpu 14 | test: example 15 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_base/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_generate/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.util.framework.huggingface.model_factory import ( 2 | HuggingFaceGenerationModel, 3 | ) 4 | 5 | 6 | class Model(HuggingFaceGenerationModel): 7 | def __init__(self, test, device, batch_size=None, extra_args=[]): 8 | super().__init__( 9 | name="hf_T5_generate", 10 | test=test, 11 | device=device, 12 | batch_size=batch_size, 13 | extra_args=extra_args, 14 | ) 15 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_generate/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_generate/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_large/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_large/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | not_implemented: 7 | # disable train test because of CI infra capacity issue 8 | - device: cpu 9 | test: train 10 | - device: cuda 11 | test: train 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_T5_large/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Whisper/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | 8 | if __name__ == "__main__": 9 | patch_transformers() 10 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 11 | cache_model(model_name) 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_Whisper/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - test: train 10 | train_benchmark: false 11 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/hf_clip/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_distil_whisper/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_distil_whisper/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 16 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/hf_distil_whisper/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece 2 | datasets 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/lennard_jones/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/lennard_jones/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1000 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/lennard_jones/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/lennard_jones/requirements.txt -------------------------------------------------------------------------------- /torchbenchmark/models/llama/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/llama/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/llama/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/facebookresearch/llama -------------------------------------------------------------------------------- /torchbenchmark/models/llama/requirements.txt: -------------------------------------------------------------------------------- 1 | sentencepiece -------------------------------------------------------------------------------- /torchbenchmark/models/llama_v2_7b_16h/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import NLP 2 | from torchbenchmark.util.framework.huggingface.model_factory import ( 3 | HuggingFaceAuthMixin, 4 | HuggingFaceModel, 5 | ) 6 | 7 | 8 | class Model(HuggingFaceModel, HuggingFaceAuthMixin): 9 | task = NLP.LANGUAGE_MODELING 10 | DEFAULT_TRAIN_BSIZE = 1 11 | DEFAULT_EVAL_BSIZE = 1 12 | DEEPCOPY = False 13 | 14 | def __init__(self, test, device, batch_size=None, extra_args=[]): 15 | HuggingFaceAuthMixin.__init__(self) 16 | super().__init__( 17 | name="llama_v2_7b_16h", 18 | test=test, 19 | device=device, 20 | batch_size=batch_size, 21 | extra_args=extra_args, 22 | ) 23 | -------------------------------------------------------------------------------- /torchbenchmark/models/llama_v2_7b_16h/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import cache_model 4 | 5 | if __name__ == "__main__": 6 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 7 | cache_model(model_name) 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/llama_v2_7b_16h/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | - device: NVIDIA A10G 10 | train_benchmark: false 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/llava/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | if __name__ == "__main__": 10 | pip_install_requirements() 11 | patch_transformers() 12 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 13 | cache_model(model_name) 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/llava/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: NVIDIA A10G 11 | # CPU OOM on the CI runner 12 | - device: cpu 13 | # accuracy test OOM on CUDA 14 | - device: cuda 15 | test: example 16 | -------------------------------------------------------------------------------- /torchbenchmark/models/llava/requirements.txt: -------------------------------------------------------------------------------- 1 | einops -------------------------------------------------------------------------------- /torchbenchmark/models/maml/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/maml/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/maml/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: true 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: true -------------------------------------------------------------------------------- /torchbenchmark/models/maml/origin: -------------------------------------------------------------------------------- 1 | https://github.com/dragen1860/MAML-Pytorch 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/maml_omniglot/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | pip_install_requirements() 6 | s3_utils.checkout_s3_data("MODEL_PKLS", "maml_omniglot/batch.pt", decompress=False) 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/maml_omniglot/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/maml_omniglot/requirements.txt: -------------------------------------------------------------------------------- 1 | higher 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/microbench_unbacked_tolist_sum/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/mnasnet1_0/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="mnasnet1_0", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.MNASNet1_0_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/mnasnet1_0/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mnasnet1_0/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/mnasnet1_0/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v2/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 96 9 | DEFAULT_EVAL_BSIZE = 16 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="mobilenet_v2", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.MobileNet_V2_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v2/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mobilenet_v2/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v2/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v2_quantized_qat/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mobilenet_v2_quantized_qat/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v2_quantized_qat/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: true 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | not_implemented: 7 | # Disable CUDA train test because of insufficient GPU memory on CI machine 8 | - test: train 9 | device: cuda -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v3_large/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="mobilenet_v3_large", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.MobileNet_V3_Large_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v3_large/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/mobilenet_v3_large/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/mobilenet_v3_large/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/detection/configs/Base-RCNN-C4-BN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeadsExtraNorm" 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | RESNETS: 11 | NORM: "SyncBN" 12 | TEST: 13 | PRECISE_BN: 14 | ENABLED: True 15 | SOLVER: 16 | IMS_PER_BATCH: 16 17 | BASE_LR: 0.02 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/detection/configs/coco_R_50_C4_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-C4-BN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | INPUT: 6 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 7 | MIN_SIZE_TEST: 800 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | STEPS: (120000, 160000) 13 | MAX_ITER: 180000 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/detection/configs/coco_R_50_C4_2x_moco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "coco_R_50_C4_2x.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "See Instructions" 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | INPUT: 9 | FORMAT: "RGB" 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/detection/configs/pascal_voc_R_50_C4_24k.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN-C4-BN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | ROI_HEADS: 6 | NUM_CLASSES: 20 7 | INPUT: 8 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 9 | MIN_SIZE_TEST: 800 10 | DATASETS: 11 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 12 | TEST: ('voc_2007_test',) 13 | SOLVER: 14 | STEPS: (18000, 22000) 15 | MAX_ITER: 24000 16 | WARMUP_ITERS: 100 17 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/detection/configs/pascal_voc_R_50_C4_24k_moco.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "pascal_voc_R_50_C4_24k.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "See Instructions" 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | INPUT: 9 | FORMAT: "RGB" 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/install.py: -------------------------------------------------------------------------------- 1 | # only needs torch and torchvision 2 | 3 | if __name__ == "__main__": 4 | pass 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: true 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/moco/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/nickgg/moco.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/moco/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/moco/requirements.txt -------------------------------------------------------------------------------- /torchbenchmark/models/moco/run.sh: -------------------------------------------------------------------------------- 1 | debug_arg="" 2 | if [ $# -gt 1 ]; then 3 | if [ "$1" == "--debug" ]; then 4 | debug_arg="-d $2" 5 | fi 6 | fi 7 | CUDA_VISIBLE_DEVICES=0 python main_moco.py -a resnet50 --lr 0.03 --batch-size 32 --dist-url 'tcp://localhost:10001' --multiprocessing-distributed --world-size 1 --rank 0 --fake_data --epochs 2 --seed 1058467 $debug_arg dummy 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/moondream/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torchbenchmark.util.framework.huggingface.patch_hf import ( 4 | cache_model, 5 | patch_transformers, 6 | ) 7 | from utils.python_utils import pip_install_requirements 8 | 9 | 10 | if __name__ == "__main__": 11 | pip_install_requirements() 12 | patch_transformers() 13 | model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) 14 | cache_model(model_name) 15 | -------------------------------------------------------------------------------- /torchbenchmark/models/moondream/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: NVIDIA A10G 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/moondream/requirements.txt: -------------------------------------------------------------------------------- 1 | einops -------------------------------------------------------------------------------- /torchbenchmark/models/nanogpt/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/nanogpt/origin: -------------------------------------------------------------------------------- 1 | https://github.com/karpathy/nanoGPT -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | from utils.python_utils import pip_install_requirements 5 | 6 | if __name__ == "__main__": 7 | pip_install_requirements() 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 512 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/nvidia/deeprecommender 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/reco_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/reco_encoder/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/reco_encoder/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/nvidia_deeprecommender/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/opacus_cifar10/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/opacus_cifar10/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 512 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/opacus_cifar10/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/pytorch/functorch.git 2 | # must include the fix https://github.com/pytorch/opacus/pull/426 3 | # Pinning to 1.5.3. Remove once is resolved https://github.com/pytorch/pytorch/issues/154446 4 | opacus>=1.1.2 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/phlippe_densenet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/phlippe_densenet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/phlippe_densenet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cuda 9 | - device: cpu 10 | train_benchmark: false 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/phlippe_resnet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/phlippe_resnet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/phlippe_resnet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 256 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_equation_of_state/install.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | pass 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_equation_of_state/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: true 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_equation_of_state/origin: -------------------------------------------------------------------------------- 1 | https://github.com/dionhaefner/pyhpc-benchmarks+650ecc650e394df829944ffcf09e9d646ec69691 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_isoneutral_mixing/install.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | pass 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_isoneutral_mixing/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: true 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_isoneutral_mixing/origin: -------------------------------------------------------------------------------- 1 | https://github.com/dionhaefner/pyhpc-benchmarks+650ecc650e394df829944ffcf09e9d646ec69691 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_turbulent_kinetic_energy/install.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | pass 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_turbulent_kinetic_energy/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: true 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/pyhpc_turbulent_kinetic_energy/origin: -------------------------------------------------------------------------------- 1 | https://github.com/dionhaefner/pyhpc-benchmarks+650ecc650e394df829944ffcf09e9d646ec69691 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/docs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-base 2 | 3 | RUN apt update && apt install -y wget unzip curl bzip2 git 4 | RUN curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 5 | RUN bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b 6 | RUN rm Miniconda3-latest-Linux-x86_64.sh 7 | ENV PATH=/miniconda/bin:${PATH} 8 | RUN conda update -y conda 9 | 10 | RUN conda install -y pytorch torchvision -c pytorch 11 | RUN mkdir /workspace/ && cd /workspace/ && git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix.git && cd pytorch-CycleGAN-and-pix2pix && pip install -r requirements.txt 12 | 13 | WORKDIR /workspace 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/environment.yml: -------------------------------------------------------------------------------- 1 | name: pytorch-CycleGAN-and-pix2pix 2 | channels: 3 | - peterjc123 4 | - defaults 5 | dependencies: 6 | - python=3.5.5 7 | - pytorch=0.4.1 8 | - scipy 9 | - pip: 10 | - dominate==2.3.1 11 | - git+https://github.com/pytorch/vision.git 12 | - Pillow==5.0.0 13 | - numpy==1.14.1 14 | - visdom==0.1.7 15 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/example_input.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/example_input.pt -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "pytorch_CycleGAN_and_pix2pix_inputs.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements() 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: true 2 | eval_deterministic: true 3 | eval_nograd: true 4 | train_benchmark: true 5 | train_deterministic: true 6 | not_implemented: 7 | # Disabled due to excessively slow runtime - see GH Issue #100 8 | - device: cpu -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/options/__init__.py: -------------------------------------------------------------------------------- 1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test).""" 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/zdevito/pytorch-CycleGAN-and-pix2pix 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/requirements.txt: -------------------------------------------------------------------------------- 1 | dominate>=2.3.1 2 | visdom>=0.1.8.3 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/conda_deps.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing 3 | conda install pytorch torchvision -c pytorch # add cuda90 if CUDA 9 4 | conda install visdom dominate -c conda-forge # install visdom and dominate 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/download_cyclegan_model.sh: -------------------------------------------------------------------------------- 1 | FILE=$1 2 | 3 | echo "Note: available models are apple2orange, orange2apple, summer2winter_yosemite, winter2summer_yosemite, horse2zebra, zebra2horse, monet2photo, style_monet, style_cezanne, style_ukiyoe, style_vangogh, sat2map, map2sat, cityscapes_photo2label, cityscapes_label2photo, facades_photo2label, facades_label2photo, iphone2dslr_flower" 4 | 5 | echo "Specified [$FILE]" 6 | 7 | mkdir -p ./checkpoints/${FILE}_pretrained 8 | MODEL_FILE=./checkpoints/${FILE}_pretrained/latest_net_G.pth 9 | URL=http://efrosgans.eecs.berkeley.edu/cyclegan/pretrained_models/$FILE.pth 10 | 11 | wget -N $URL -O $MODEL_FILE 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/download_pix2pix_model.sh: -------------------------------------------------------------------------------- 1 | FILE=$1 2 | 3 | echo "Note: available models are edges2shoes, sat2map, map2sat, facades_label2photo, and day2night" 4 | echo "Specified [$FILE]" 5 | 6 | mkdir -p ./checkpoints/${FILE}_pretrained 7 | MODEL_FILE=./checkpoints/${FILE}_pretrained/latest_net_G.pth 8 | URL=http://efrosgans.eecs.berkeley.edu/pix2pix/models-pytorch/$FILE.pth 9 | 10 | wget -N $URL -O $MODEL_FILE 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/eval_cityscapes/download_fcn8s.sh: -------------------------------------------------------------------------------- 1 | URL=http://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/fcn-8s-cityscapes/fcn-8s-cityscapes.caffemodel 2 | OUTPUT_FILE=./scripts/eval_cityscapes/caffemodel/fcn-8s-cityscapes.caffemodel 3 | wget -N $URL -O $OUTPUT_FILE 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/install_deps.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | pip install visdom 3 | pip install dominate 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_colorization.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/colorization --name color_pix2pix --model colorization 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_cyclegan.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan --phase test --no_dropout 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_pix2pix.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --netG unet_256 --direction BtoA --dataset_mode aligned --norm batch 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/test_single.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/facades/testB/ --name facades_pix2pix --model test --netG unet_256 --direction BtoA --dataset_mode single --norm batch 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/train_colorization.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python train.py --dataroot ./datasets/colorization --name color_pix2pix --model colorization 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/train_cyclegan.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python train.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan --pool_size 50 --no_dropout 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/scripts/train_pix2pix.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --netG unet_256 --direction BtoA --lambda_L1 100 --dataset_mode aligned --norm batch --pool_size 0 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_CycleGAN_and_pix2pix/util/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes a miscellaneous collection of useful helper functions.""" 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_stargan/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "pytorch_stargan_inputs.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements() 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_stargan/logger.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Logger: 5 | """Tensorboard logger.""" 6 | 7 | def __init__(self, log_dir): 8 | """Initialize summary writer.""" 9 | self.writer = tf.summary.create_file_writer(log_dir) 10 | 11 | def scalar_summary(self, tag, value, step): 12 | """Add scalar summary.""" 13 | with self.writer.as_default(): 14 | tf.summary.scalar(tag, value, step=step) 15 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_stargan/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: true 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: true 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_stargan/requirements.txt: -------------------------------------------------------------------------------- 1 | # Nothing else aside from PyTorch! 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_stargan/run.sh: -------------------------------------------------------------------------------- 1 | debug_arg="" 2 | if [ $# -gt 1 ]; then 3 | if [ "$1" == "--debug" ]; then 4 | debug_arg="--debug $2" 5 | fi 6 | fi 7 | python main.py --mode train --dataset CelebA --image_size 128 --c_dim 2 --sample_dir stargan_celeba/samples --log_dir stargan_celeba/logs --model_save_dir stargan_celeba/models --result_dir stargan_celeba/results --selected_attrs Male Young --use_tensorboard False --num_iters 30 --should_script True --deterministic True $debug_arg 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements(requirements_txt="pytorch_unet/requirements.txt") 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 4 4 | eval_benchmark: false 5 | eval_deterministic: true 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/milesial/Pytorch-UNet.git -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:21.06-py3 2 | 3 | RUN rm -rf /workspace/* 4 | WORKDIR /workspace/unet 5 | 6 | ADD requirements.txt . 7 | RUN pip install --no-cache-dir --upgrade --pre pip 8 | RUN pip install --no-cache-dir -r requirements.txt 9 | ADD . . 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/pytorch_unet/pytorch_unet/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/hubconf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from unet import UNet as _UNet 3 | 4 | 5 | def unet_carvana(pretrained=False): 6 | """ 7 | UNet model trained on the Carvana dataset ( https://www.kaggle.com/c/carvana-image-masking-challenge/data ). 8 | Set the scale to 0.5 (50%) when predicting. 9 | """ 10 | net = _UNet(n_channels=3, n_classes=2, bilinear=True) 11 | if pretrained: 12 | checkpoint = "https://github.com/milesial/Pytorch-UNet/releases/download/v2.0/unet_carvana_scale0.5_epoch1.pth" 13 | net.load_state_dict( 14 | torch.hub.load_state_dict_from_url(checkpoint, progress=True) 15 | ) 16 | 17 | return net 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | Pillow 4 | tqdm 5 | wandb 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/scripts/download_data.sh: -------------------------------------------------------------------------------- 1 | echo -n "Kaggle username: " 2 | read USERNAME 3 | echo "" 4 | echo -n "Kaggle API key: " 5 | read APIKEY 6 | 7 | pip install kaggle --upgrade 8 | mkdir -p ~/.kaggle 9 | echo "{\"username\":\"$USERNAME\",\"key\":\"$APIKEY\"}" > ~/.kaggle/kaggle.json 10 | 11 | kaggle competitions download -c carvana-image-masking-challenge -f train_hq.zip 12 | unzip train_hq.zip 13 | mv train_hq/* data/imgs/ 14 | rm -d train_hq 15 | rm train_hq.zip 16 | 17 | kaggle competitions download -c carvana-image-masking-challenge -f train_masks.zip 18 | unzip train_masks.zip 19 | mv train_masks/* data/masks/ 20 | rm -d train_masks 21 | rm train_masks.zip -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/unet/__init__.py: -------------------------------------------------------------------------------- 1 | from .unet_model import UNet 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/pytorch_unet/pytorch_unet/utils/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/models/pytorch_unet/pytorch_unet/utils/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | def plot_img_and_mask(img, mask): 5 | classes = mask.shape[0] if len(mask.shape) > 2 else 1 6 | fig, ax = plt.subplots(1, classes + 1) 7 | ax[0].set_title("Input image") 8 | ax[0].imshow(img) 9 | if classes > 1: 10 | for i in range(classes): 11 | ax[i + 1].set_title(f"Output mask (class {i + 1})") 12 | ax[i + 1].imshow(mask[:, :, i]) 13 | else: 14 | ax[1].set_title(f"Output mask") 15 | ax[1].imshow(mask) 16 | plt.xticks([]), plt.yticks([]) 17 | plt.show() 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet152/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="resnet152", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.ResNet152_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet152/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet152/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/resnet152/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet18/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 16 9 | DEFAULT_EVAL_BSIZE = 8 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="resnet18", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.ResNet18_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet18/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet18/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/resnet18/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 256 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet50/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="resnet50", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.ResNet50_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet50/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet50/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/resnet50/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnet50_quantized_qat/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnet50_quantized_qat/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/resnet50_quantized_qat/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: true 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnext50_32x4d/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 8 9 | DEFAULT_EVAL_BSIZE = 8 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="resnext50_32x4d", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/resnext50_32x4d/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/resnext50_32x4d/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/resnext50_32x4d/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 64 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/sam/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/sam/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/facebookresearch/segment-anything -------------------------------------------------------------------------------- /torchbenchmark/models/sam/requirements.txt: -------------------------------------------------------------------------------- 1 | # Actually needed 2 | opencv-python 3 | pycocotools 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/sam_fast/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: cpu 11 | - device: cuda 12 | test: example 13 | skip_cuda_memory_leak: true 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/sam_fast/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/pytorch-labs/segment-anything-fast.git 2 | opencv-python 3 | pycocotools 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/shufflenet_v2_x1_0/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.vision.model_factory import TorchVisionModel 3 | from torchvision import models 4 | 5 | 6 | class Model(TorchVisionModel): 7 | task = COMPUTER_VISION.CLASSIFICATION 8 | DEFAULT_TRAIN_BSIZE = 128 9 | DEFAULT_EVAL_BSIZE = 64 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | model_name="shufflenet_v2_x1_0", 14 | test=test, 15 | device=device, 16 | batch_size=batch_size, 17 | weights=models.ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1, 18 | extra_args=extra_args, 19 | ) 20 | -------------------------------------------------------------------------------- /torchbenchmark/models/shufflenet_v2_x1_0/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/shufflenet_v2_x1_0/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/shufflenet_v2_x1_0/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/simple_gpt/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/simple_gpt/origin: -------------------------------------------------------------------------------- 1 | https://github.com/pytorch-labs/simple_gpt/ 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/simple_gpt_tp_manual/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false 6 | -------------------------------------------------------------------------------- /torchbenchmark/models/simple_gpt_tp_manual/origin: -------------------------------------------------------------------------------- 1 | https://github.com/pytorch-labs/simple_gpt/ 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/soft_actor_critic/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/soft_actor_critic/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: false 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: false 5 | train_deterministic: false -------------------------------------------------------------------------------- /torchbenchmark/models/soft_actor_critic/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | pygame 3 | tensorboardX 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "speech_transformer_inputs.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements() 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | train_benchmark: false 10 | train_deterministic: false 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/kaituoxu/Speech-Transformer 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/requirements.txt: -------------------------------------------------------------------------------- 1 | kaldi_io 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/speech_transformer/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import * 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/speech_transformer/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import * 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/speech_transformer/speech_transformer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/squeezenet1_1/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/squeezenet1_1/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/squeezenet1_1/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 256 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | test: train 10 | - device: cuda 11 | test: train 12 | train_benchmark: true 13 | train_deterministic: false 14 | -------------------------------------------------------------------------------- /torchbenchmark/models/stable_diffusion_text_encoder/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: cpu 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/stable_diffusion_unet/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | import torch 5 | from torchbenchmark.util.framework.diffusers import install_diffusers 6 | 7 | MODEL_NAME = "stabilityai/stable-diffusion-2" 8 | 9 | 10 | def load_model_checkpoint(): 11 | from diffusers import StableDiffusionPipeline 12 | 13 | StableDiffusionPipeline.from_pretrained( 14 | MODEL_NAME, torch_dtype=torch.float16, safety_checker=None 15 | ) 16 | 17 | 18 | if __name__ == "__main__": 19 | install_diffusers() 20 | if not "HUGGING_FACE_HUB_TOKEN" in os.environ: 21 | warnings.warn( 22 | "Make sure to set `HUGGINGFACE_HUB_TOKEN` so you can download weights" 23 | ) 24 | else: 25 | load_model_checkpoint() 26 | -------------------------------------------------------------------------------- /torchbenchmark/models/stable_diffusion_unet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | not_implemented: 10 | - device: cpu 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "waveglow"] 2 | path = waveglow 3 | url = https://github.com/NVIDIA/waveglow 4 | branch = master 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:nightly-devel-cuda10.0-cudnn7 2 | ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} 3 | 4 | RUN apt-get update -y 5 | 6 | RUN pip install numpy scipy matplotlib librosa==0.6.0 tensorflow tensorboardX inflect==0.2.5 Unidecode==1.0.22 pillow jupyter 7 | 8 | ADD apex /apex/ 9 | WORKDIR /apex/ 10 | RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . 11 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from utils import s3_utils 5 | from utils.python_utils import pip_install_requirements 6 | 7 | 8 | def check_data_dir(): 9 | current_dir = Path(os.path.dirname(os.path.realpath(__file__))) 10 | tacotron2_data_dir = os.path.join( 11 | current_dir.parent.parent, "data", ".data", "tacotron2-minimal" 12 | ) 13 | assert os.path.exists( 14 | tacotron2_data_dir 15 | ), "Couldn't find tacotron2 minimal data dir, please run install.py again." 16 | 17 | 18 | if __name__ == "__main__": 19 | pip_install_requirements() 20 | s3_utils.checkout_s3_data( 21 | "INPUT_TARBALLS", "tacotron2-minimal.tar.gz", decompress=True 22 | ) 23 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/install.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | git submodule init; git submodule update 3 | pip install -r requirements.txt 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cuda 9 | - device: cpu 10 | train_benchmark: false 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/multiproc.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | argslist = list(sys.argv)[1:] 8 | num_gpus = torch.cuda.device_count() 9 | argslist.append("--n_gpus={}".format(num_gpus)) 10 | workers = [] 11 | job_id = time.strftime("%Y_%m_%d-%H%M%S") 12 | argslist.append("--group_name=group_{}".format(job_id)) 13 | 14 | for i in range(num_gpus): 15 | argslist.append("--rank={}".format(i)) 16 | stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i), "w") 17 | print(argslist) 18 | p = subprocess.Popen([str(sys.executable)] + argslist, stdout=stdout) 19 | workers.append(p) 20 | argslist = argslist[:-1] 21 | 22 | for p in workers: 23 | p.wait() 24 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/ailzhang/tacotron2.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | inflect 3 | scipy 4 | Unidecode 5 | pillow 6 | librosa==0.9.2 7 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/waveglow/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tacotron2"] 2 | path = tacotron2 3 | url = http://github.com/NVIDIA/tacotron2 4 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/waveglow/tacotron2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:0.4_cuda9_cudnn7 2 | RUN pip install numpy scipy matplotlib librosa==0.6.0 tensorflow tensorboardX inflect==0.2.5 Unidecode==1.0.22 jupyter 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/tacotron2/waveglow/tacotron2/multiproc.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | argslist = list(sys.argv)[1:] 8 | num_gpus = torch.cuda.device_count() 9 | argslist.append("--n_gpus={}".format(num_gpus)) 10 | workers = [] 11 | job_id = time.strftime("%Y_%m_%d-%H%M%S") 12 | argslist.append("--group_name=group_{}".format(job_id)) 13 | 14 | for i in range(num_gpus): 15 | argslist.append("--rank={}".format(i)) 16 | stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i), "w") 17 | print(argslist) 18 | p = subprocess.Popen([str(sys.executable)] + argslist, stdout=stdout) 19 | workers.append(p) 20 | argslist = argslist[:-1] 21 | 22 | for p in workers: 23 | p.wait() 24 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_efficientdet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cuda 9 | - device: cpu 10 | train_benchmark: true 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_efficientdet/requirements.txt: -------------------------------------------------------------------------------- 1 | pycocotools 2 | git+https://github.com/rwightman/efficientdet-pytorch.git@d43c9e34cd62d22b4205831bb735f6dd83b8e881 3 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_efficientnet/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel 3 | 4 | 5 | class Model(TimmModel): 6 | task = COMPUTER_VISION.CLASSIFICATION 7 | 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 64 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | test=test, 14 | model_name="efficientnet_b0", 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_efficientnet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_efficientnet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_efficientnet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_nfnet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_nfnet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_nfnet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cuda 9 | test: train 10 | train_benchmark: true 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_regnet/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel 3 | 4 | 5 | class Model(TimmModel): 6 | task = COMPUTER_VISION.CLASSIFICATION 7 | 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | test=test, 14 | model_name="regnety_120", 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_regnet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_regnet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_regnet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 32 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_resnest/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel 3 | 4 | 5 | class Model(TimmModel): 6 | task = COMPUTER_VISION.CLASSIFICATION 7 | 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | test=test, 14 | model_name="resnest14d", 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_resnest/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_resnest/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_resnest/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 256 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vision_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel 3 | 4 | 5 | class Model(TimmModel): 6 | task = COMPUTER_VISION.GENERATION 7 | 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | test=test, 14 | model_name="vit_small_patch16_224", 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vision_transformer/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_vision_transformer/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vision_transformer/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vision_transformer_large/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel 3 | 4 | 5 | class Model(TimmModel): 6 | task = COMPUTER_VISION.GENERATION 7 | 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | test=test, 14 | model_name="vit_giant_patch14_224", 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vision_transformer_large/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_vision_transformer_large/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vision_transformer_large/metadata.yaml: -------------------------------------------------------------------------------- 1 | eval_benchmark: true 2 | eval_deterministic: false 3 | eval_nograd: true 4 | train_benchmark: true 5 | train_deterministic: false 6 | not_implemented: 7 | - device: cuda 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vovnet/__init__.py: -------------------------------------------------------------------------------- 1 | from torchbenchmark.tasks import COMPUTER_VISION 2 | from torchbenchmark.util.framework.timm.model_factory import TimmModel 3 | 4 | 5 | class Model(TimmModel): 6 | task = COMPUTER_VISION.DETECTION 7 | 8 | DEFAULT_TRAIN_BSIZE = 32 9 | DEFAULT_EVAL_BSIZE = 32 10 | 11 | def __init__(self, test, device, batch_size=None, extra_args=[]): 12 | super().__init__( 13 | test=test, 14 | model_name="vovnet39a", 15 | device=device, 16 | batch_size=batch_size, 17 | extra_args=extra_args, 18 | ) 19 | -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vovnet/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/timm_vovnet/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/timm_vovnet/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 128 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/torch_multimodal_clip/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_train_size: 1 5 | eval_benchmark: false 6 | eval_deterministic: false 7 | eval_nograd: true 8 | train_benchmark: false 9 | train_deterministic: false 10 | -------------------------------------------------------------------------------- /torchbenchmark/models/torch_multimodal_clip/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/facebookresearch/multimodal.git 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/tts_angular/install.py: -------------------------------------------------------------------------------- 1 | from utils.python_utils import pip_install_requirements 2 | 3 | if __name__ == "__main__": 4 | pip_install_requirements() 5 | -------------------------------------------------------------------------------- /torchbenchmark/models/tts_angular/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 512 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/tts_angular/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | phonemizer 4 | unidecode 5 | Pillow 6 | flask 7 | tqdm 8 | inflect 9 | bokeh 10 | pysbd 11 | soundfile 12 | nose 13 | cardboardlint 14 | pylint 15 | gdown 16 | pyyaml 17 | librosa 18 | -------------------------------------------------------------------------------- /torchbenchmark/models/vgg16/install.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/vgg16/install.py -------------------------------------------------------------------------------- /torchbenchmark/models/vgg16/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: true 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/vision_maskrcnn/install.py: -------------------------------------------------------------------------------- 1 | from utils import s3_utils 2 | from utils.python_utils import pip_install_requirements 3 | 4 | if __name__ == "__main__": 5 | s3_utils.checkout_s3_data( 6 | "INPUT_TARBALLS", "coco2017-minimal.tar.gz", decompress=True 7 | ) 8 | pip_install_requirements() 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/vision_maskrcnn/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 1 4 | eval_benchmark: false 5 | eval_deterministic: false 6 | eval_nograd: true 7 | train_benchmark: false 8 | train_deterministic: false 9 | -------------------------------------------------------------------------------- /torchbenchmark/models/vision_maskrcnn/origin: -------------------------------------------------------------------------------- 1 | origin https://github.com/pytorch/vision/blob/main/torchvision/models/detection/mask_rcnn.py 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/vision_maskrcnn/requirements.txt: -------------------------------------------------------------------------------- 1 | pycocotools 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/check.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | 5 | a = torch.load(sys.argv[1]) 6 | b = torch.load(sys.argv[2]) 7 | torch.testing.assert_allclose(a, b, rtol=0.01, atol=0.01) 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/install.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from utils import s3_utils 5 | from utils.python_utils import pip_install_requirements 6 | 7 | 8 | def setup_data_dir(): 9 | current_dir = Path(os.path.dirname(os.path.realpath(__file__))) 10 | coco128_data_dir = os.path.join( 11 | current_dir.parent.parent, "data", ".data", "coco128" 12 | ) 13 | assert os.path.exists( 14 | coco128_data_dir 15 | ), "Couldn't find coco128 data dir, please run install.py again." 16 | 17 | 18 | if __name__ == "__main__": 19 | s3_utils.checkout_s3_data("INPUT_TARBALLS", "coco128.tar.gz", decompress=True) 20 | pip_install_requirements() 21 | -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/install.sh: -------------------------------------------------------------------------------- 1 | python -m pip install -r requirements.txt 2 | -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/metadata.yaml: -------------------------------------------------------------------------------- 1 | devices: 2 | NVIDIA A100-SXM4-40GB: 3 | eval_batch_size: 8 4 | eval_benchmark: true 5 | eval_deterministic: false 6 | eval_nograd: true 7 | not_implemented: 8 | - device: cpu 9 | test: train 10 | train_benchmark: true 11 | train_deterministic: false 12 | -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/run.sh: -------------------------------------------------------------------------------- 1 | debug_arg="" 2 | if [ $# -gt 1 ]; then 3 | if [ "$1" == "--debug" ]; then 4 | debug_arg="--debug $2" 5 | fi 6 | fi 7 | python3.8 train.py --data coco128.data --img 416 --batch 8 --nosave --notest --epochs 10 --weights '' $debug_arg 8 | -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/yolov3/train_batch0.jpg -------------------------------------------------------------------------------- /torchbenchmark/models/yolov3/yolo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/models/yolov3/yolo_utils/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/util/__init__.py -------------------------------------------------------------------------------- /torchbenchmark/util/distributed/requirements.txt: -------------------------------------------------------------------------------- 1 | deepspeed 2 | evaluate 3 | datasets 4 | scikit-learn 5 | tensorboard 6 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/detectron2/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parse_tb_args(args): 5 | parser = argparse.ArgumentParser() 6 | # default resolution: 800x1333 7 | parser.add_argument( 8 | "--resize", 9 | choices=["default", "448x608"], 10 | default="default", 11 | help="Resize the image to specified size", 12 | ) 13 | args, unknown_args = parser.parse_known_args(args) 14 | return args, unknown_args 15 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/detectron2/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/facebookresearch/detectron2.git@0df2d73d0013db7de629602c23cc120219b4f2b8 2 | omegaconf==2.3.0 3 | numpy 4 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/diffusers/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from utils.python_utils import pip_install_requirements 5 | 6 | CURRENT_DIR = Path(os.path.dirname(os.path.realpath(__file__))) 7 | 8 | 9 | def install_diffusers(): 10 | requirements_file = os.path.join(CURRENT_DIR, "requirements.txt") 11 | pip_install_requirements(requirements_txt=requirements_file) 12 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/diffusers/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.30.3 2 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/gnn/__init__.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from utils.python_utils import pip_install_requirements 4 | 5 | CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) 6 | 7 | 8 | def install_pytorch_geometric(): 9 | pip_install_requirements(os.path.join(CURRENT_DIR, "requirements.txt")) 10 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/gnn/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/torchbenchmark/util/framework/gnn/args.py -------------------------------------------------------------------------------- /torchbenchmark/util/framework/gnn/config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parse_tb_args(args): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument( 7 | "--graph_type", 8 | choices=["dense", "sparse"], 9 | default="dense", 10 | help="Determine dense graph or sparse graph", 11 | ) 12 | args, unknown_args = parser.parse_known_args(args) 13 | return args, unknown_args 14 | -------------------------------------------------------------------------------- /torchbenchmark/util/framework/gnn/requirements.txt: -------------------------------------------------------------------------------- 1 | torch_geometric @ git+https://github.com/pyg-team/pytorch_geometric.git@cabcd4097442ba60aa1efa11e1619dd9bb8fb527 2 | -------------------------------------------------------------------------------- /torchbenchmark/util/hardware/__init__.py: -------------------------------------------------------------------------------- 1 | from .roofline import HW_ROOFLINE_SPECS 2 | -------------------------------------------------------------------------------- /torchbenchmark/util/hardware/roofline.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | # NVIDIA A100 GPU Spec: 4 | # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf 5 | NV_A100 = { 6 | "fp32": 19.5, 7 | "tf32": 156, 8 | "bf16": 312, 9 | "fp16": 312, 10 | } 11 | 12 | # NVIDIA H100 GPU Datasheet: 13 | # https://nvdam.widen.net/content/vuzumiozpb/original/h100-datasheet-2287922.pdf 14 | NV_H100 = { 15 | "fp32": 51, 16 | "tf32": 756, 17 | "bf16": 1513, 18 | "fp16": 1513, 19 | "fp8": 3026, 20 | } 21 | 22 | 23 | HW_ROOFLINE_SPECS: Dict[str, Dict[str, float]] = { 24 | "NVIDIA A100-SXM4-40GB": NV_A100, 25 | "NVIDIA A100-PG509-200": NV_A100, 26 | "NVIDIA H100": NV_H100, 27 | } 28 | -------------------------------------------------------------------------------- /userbenchmark/api-coverage/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /userbenchmark/cpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/cpu/__init__.py -------------------------------------------------------------------------------- /userbenchmark/cpu/cpu_test.yaml: -------------------------------------------------------------------------------- 1 | test: eval 2 | model: resnet50,mobilenet_v2 3 | extra_args: --backend torchscript --fuser fuser3 4 | -------------------------------------------------------------------------------- /userbenchmark/cuda-compare/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/cuda-compare/__init__.py -------------------------------------------------------------------------------- /userbenchmark/ddp_experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/ddp_experiments/__init__.py -------------------------------------------------------------------------------- /userbenchmark/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/distributed/__init__.py -------------------------------------------------------------------------------- /userbenchmark/distributed/ci.yaml: -------------------------------------------------------------------------------- 1 | platform: "ai_cluster" 2 | schedule: "nightly" 3 | -------------------------------------------------------------------------------- /userbenchmark/distributed/install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | 5 | def pip_install_requirements(): 6 | subprocess.check_call( 7 | [sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"] 8 | ) 9 | 10 | 11 | if __name__ == "__main__": 12 | pip_install_requirements() 13 | -------------------------------------------------------------------------------- /userbenchmark/dynamo/__init__.py: -------------------------------------------------------------------------------- 1 | BM_NAME = "dynamo" 2 | -------------------------------------------------------------------------------- /userbenchmark/dynamo/dynamobench/torchbench_models_list.txt: -------------------------------------------------------------------------------- 1 | BERT_pytorch,128 2 | Background_Matting, 16 3 | LearningToPaint,1024 4 | alexnet,1024 5 | dcgan,1024 6 | densenet121,64 7 | hf_Albert,32 8 | hf_Bart,16 9 | hf_Bert,16 10 | hf_GPT2,16 11 | hf_T5,4 12 | mnasnet1_0,256 13 | mobilenet_v2,128 14 | mobilenet_v3_large,256 15 | nvidia_deeprecommender,1024 16 | pytorch_unet,8 17 | resnet18,512 18 | resnet50,128 19 | resnext50_32x4d,128 20 | shufflenet_v2_x1_0,512 21 | squeezenet1_1,512 22 | timm_nfnet,256 23 | timm_efficientnet,128 24 | timm_regnet,128 25 | timm_resnest,256 26 | timm_vision_transformer,256 27 | timm_vovnet,128 28 | vgg16,128 29 | -------------------------------------------------------------------------------- /userbenchmark/functorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/functorch/__init__.py -------------------------------------------------------------------------------- /userbenchmark/functorch/ci.yaml: -------------------------------------------------------------------------------- 1 | platform: "aws_t4_metal" 2 | schedule: "nightly" 3 | -------------------------------------------------------------------------------- /userbenchmark/group_bench/__init__.py: -------------------------------------------------------------------------------- 1 | BM_NAME = "group_bench" 2 | -------------------------------------------------------------------------------- /userbenchmark/group_bench/configs/torch_ao.yaml: -------------------------------------------------------------------------------- 1 | model: "*" 2 | extended_models: 3 | - huggingface 4 | - timm 5 | test: eval 6 | device: cuda 7 | extra_args: --precision bf16 --torchdynamo inductor --inductor-compile-mode max-autotune 8 | metrics: 9 | - latencies 10 | test_group: 11 | test_batch_size_default: 12 | subgroup: 13 | - extra_args: 14 | - extra_args: --quantization int8dynamic 15 | - extra_args: --quantization int8weightonly 16 | - extra_args: --quantization int4weightonly 17 | -------------------------------------------------------------------------------- /userbenchmark/instruction-count/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/instruction-count/__init__.py -------------------------------------------------------------------------------- /userbenchmark/model-stableness/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/model-stableness/__init__.py -------------------------------------------------------------------------------- /userbenchmark/nvfuser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/nvfuser/__init__.py -------------------------------------------------------------------------------- /userbenchmark/optim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/optim/__init__.py -------------------------------------------------------------------------------- /userbenchmark/release-test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/release-test/__init__.py -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/1.12.1.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 11.3 3 | magma_version: magma-cuda113 4 | - version: 11.6 5 | magma_version: magma-cuda116 6 | pytorch: 7 | - version: 1.12.1 8 | conda_channel: pytorch-test 9 | - version: 1.12.0 10 | conda_channel: pytorch 11 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/1.13.0.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 11.6 3 | magma_version: magma-cuda116 4 | pytorch: 5 | - version: 1.13.0 6 | conda_channel: pytorch-test 7 | - version: 1.12.1 8 | conda_channel: pytorch 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.0.1.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 11.7 3 | magma_version: magma-cuda117 4 | pytorch: 5 | - version: 2.1.0 6 | conda_channel: pytorch-test 7 | - version: 2.0.1 8 | conda_channel: pytorch 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.1.0.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 11.8 3 | magma_version: magma-cuda118 4 | pytorch: 5 | - version: 2.1.0 6 | conda_channel: pytorch-test 7 | - version: 2.0.1 8 | conda_channel: pytorch 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.1.1.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 12.1 3 | magma_version: magma-cuda121 4 | pytorch: 5 | - version: 2.1.1 6 | conda_channel: pytorch 7 | - version: 2.1.0 8 | conda_channel: pytorch 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.1.2.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 12.1 3 | magma_version: magma-cuda121 4 | pytorch: 5 | - version: 2.1.2 6 | conda_channel: pytorch-test 7 | - version: 2.1.1 8 | conda_channel: pytorch 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.5.0.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 12.4 3 | magma_version: magma-cuda124 4 | pytorch: 5 | - version: 2.4.1 6 | conda_channel: pytorch 7 | - version: 2.5.0 8 | conda_channel: pytorch-test 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.5.1.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 12.4 3 | magma_version: magma-cuda124 4 | pytorch: 5 | - version: 2.5.0 6 | conda_channel: pytorch 7 | - version: 2.5.1 8 | conda_channel: pytorch-test 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/configs/2.6.0.yaml: -------------------------------------------------------------------------------- 1 | cuda: 2 | - version: 12.4 3 | magma_version: magma-cuda124 4 | pytorch: 5 | - version: 2.5.1 6 | conda_channel: pytorch 7 | - version: 2.6.0 8 | conda_channel: pytorch-test 9 | -------------------------------------------------------------------------------- /userbenchmark/release-test/version.txt: -------------------------------------------------------------------------------- 1 | 2.6.0 2 | -------------------------------------------------------------------------------- /userbenchmark/rocm-test/__init__.py: -------------------------------------------------------------------------------- 1 | BM_NAME = "rocm-test" 2 | -------------------------------------------------------------------------------- /userbenchmark/test-user-invoke/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/test-user-invoke/__init__.py -------------------------------------------------------------------------------- /userbenchmark/test_bench/__init__.py: -------------------------------------------------------------------------------- 1 | BM_NAME = "test_bench" 2 | -------------------------------------------------------------------------------- /userbenchmark/torch-nightly/__init__.py: -------------------------------------------------------------------------------- 1 | BM_NAME = "torch-nightly" 2 | -------------------------------------------------------------------------------- /userbenchmark/torch_trt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch/benchmark/68cf9dffe5df4eedac42512dc92598fffff335ae/userbenchmark/torch_trt/__init__.py -------------------------------------------------------------------------------- /userbenchmark/torch_trt/ci.yaml: -------------------------------------------------------------------------------- 1 | platform: "gcp_a100" 2 | schedule: "nightly" 3 | -------------------------------------------------------------------------------- /userbenchmark/torchao/__init__.py: -------------------------------------------------------------------------------- 1 | BM_NAME = "torchao" 2 | -------------------------------------------------------------------------------- /utils/build_requirements.txt: -------------------------------------------------------------------------------- 1 | # We need to pin numpy version to the same as the torch testing environment 2 | # which still supports python 3.8 3 | numpy==1.21.2; python_version < '3.11' 4 | numpy==1.26.0; python_version >= '3.11' 5 | psutil 6 | pyyaml --------------------------------------------------------------------------------