├── Glow ├── ExecutorCore │ └── ExecutorCore.cpp ├── README.md ├── glow_tracing.sh ├── glow_tracing_parser.py ├── images │ └── cat_285.png └── run_glow_end2end.sh ├── README.md ├── TC_perlayer ├── README.md ├── img │ └── image-20200718110200812.png ├── mobilenet │ ├── autotune │ │ ├── autotune_msg │ │ ├── mobilenet0.25.cc │ │ ├── mobilenet0.5.cc │ │ ├── mobilenet0.75.cc │ │ ├── mobilenet1.0.cc │ │ ├── mobilenetV2_0.25.cc │ │ ├── mobilenetV2_0.5.cc │ │ ├── mobilenetV2_0.75.cc │ │ ├── mobilenetV2_1.0.cc │ │ └── mobilenetV2_1.0_autotuneByKernel.cc │ └── untune │ │ ├── mobilenet0.25.cc │ │ ├── mobilenet0.5.cc │ │ ├── mobilenet0.75.cc │ │ ├── mobilenet1.0.cc │ │ ├── mobilenetV2_0.25.cc │ │ ├── mobilenetV2_0.5.cc │ │ ├── mobilenetV2_0.75.cc │ │ └── mobilenetV2_1.0.cc └── resnet │ ├── autotune │ ├── autotune_msg │ ├── resnet101.cc │ ├── resnet152.cc │ ├── resnet18.cc │ ├── resnet26.cc │ ├── resnet26d.cc │ ├── resnet34.cc │ ├── resnet50.cc │ └── resnet50_autotuneByKernel.cc │ └── untune │ ├── resnet101.cc │ ├── resnet152.cc │ ├── resnet18.cc │ ├── resnet26.cc │ ├── resnet26d.cc │ ├── resnet34.cc │ └── resnet50.cc ├── TVM ├── README.md ├── TVM_perlayer │ ├── conv2d_depth_profile.py │ ├── res │ │ ├── broadwell_v100 │ │ │ ├── res_gpu_1batch_mobilenet_tuned_ljj │ │ │ ├── res_gpu_1batch_resnet50_tuned_ljj │ │ │ ├── res_x86_broadwell_1batch_1thread_mobilenetv2_1.0_tuned_ljj │ │ │ ├── res_x86_broadwell_1batch_1thread_resnet50_tuned_ljj │ │ │ ├── res_x86_broadwell_1batch_multhread_mobilenetv2_1.0_tuned_ljj │ │ │ └── res_x86_broadwell_1batch_multhread_resnet50_tuned_ljj │ │ └── skylake_2080ti │ │ │ ├── res_gpu_1batch_mobilenetv2_1.0_tuned_ljj │ │ │ ├── res_gpu_1batch_resnet50_tuned_ljj │ │ │ ├── res_x86_skylake_1batch_1thread_mobilenetv2_1.0_tuned_ljj │ │ │ ├── res_x86_skylake_1batch_1thread_resnet50_tuned_ljj │ │ │ ├── res_x86_skylake_1batch_Multhreads_mobilenetv2_1.0_tuned_ljj │ │ │ └── res_x86_skylake_1batch_Multhreads_resnet50_tuned_ljj │ ├── run_tvm_perlayer.sh │ └── utils.py ├── run_tvm_1thread_tuned.sh ├── tvm-log │ ├── 2080Ti │ │ ├── 1thread-log │ │ │ └── x86 │ │ │ │ └── onnx │ │ │ │ └── 1batch │ │ │ │ ├── x86_onnx_1batch_alexnet.log │ │ │ │ ├── x86_onnx_1batch_densenet121.log │ │ │ │ ├── x86_onnx_1batch_densenet161.log │ │ │ │ ├── x86_onnx_1batch_densenet169.log.tmp │ │ │ │ ├── x86_onnx_1batch_googlenet.log │ │ │ │ ├── x86_onnx_1batch_inceptionv3.log │ │ │ │ ├── x86_onnx_1batch_mnasnet0_5.log │ │ │ │ ├── x86_onnx_1batch_mnasnet1_0.log │ │ │ │ ├── x86_onnx_1batch_mobilenet0.25.log │ │ │ │ ├── x86_onnx_1batch_mobilenet0.5.log │ │ │ │ ├── x86_onnx_1batch_mobilenet0.75.log │ │ │ │ ├── x86_onnx_1batch_mobilenet1.0.log │ │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.25.log │ │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.5.log │ │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.75.log │ │ │ │ ├── x86_onnx_1batch_mobilenetv2_1.0.log │ │ │ │ ├── x86_onnx_1batch_resnet101.log │ │ │ │ ├── x86_onnx_1batch_resnet101_v2.log │ │ │ │ ├── x86_onnx_1batch_resnet152.log │ │ │ │ ├── x86_onnx_1batch_resnet152_v2.log │ │ │ │ ├── x86_onnx_1batch_resnet34_v2.log.tmp │ │ │ │ ├── x86_onnx_1batch_resnet50.log │ │ │ │ ├── x86_onnx_1batch_resnet50_v2.log │ │ │ │ ├── x86_onnx_1batch_resnext101_32x8d.log │ │ │ │ ├── x86_onnx_1batch_resnext50_32x4d.log │ │ │ │ ├── x86_onnx_1batch_shufflenet_v2_x0_5.log │ │ │ │ ├── x86_onnx_1batch_shufflenet_v2_x1_0.log │ │ │ │ ├── x86_onnx_1batch_squeezenet1_0.log │ │ │ │ ├── x86_onnx_1batch_squeezenet1_1.log │ │ │ │ ├── x86_onnx_1batch_vgg11.log │ │ │ │ ├── x86_onnx_1batch_vgg11_bn.log │ │ │ │ ├── x86_onnx_1batch_vgg13.log │ │ │ │ ├── x86_onnx_1batch_vgg13_bn.log │ │ │ │ ├── x86_onnx_1batch_vgg16.log │ │ │ │ ├── x86_onnx_1batch_vgg16_bn.log │ │ │ │ ├── x86_onnx_1batch_vgg19.log │ │ │ │ ├── x86_onnx_1batch_vgg19_bn.log │ │ │ │ ├── x86_onnx_1batch_wide_resnet101_2.log │ │ │ │ └── x86_onnx_1batch_wide_resnet50_2.log │ │ ├── gpu │ │ │ └── onnx │ │ │ │ └── 1batch │ │ │ │ ├── gpu_onnx_1batch_densenet121.log │ │ │ │ ├── gpu_onnx_1batch_densenet161.log.tmp │ │ │ │ ├── gpu_onnx_1batch_mnasnet0_5.log │ │ │ │ ├── gpu_onnx_1batch_mnasnet1_0.log │ │ │ │ ├── gpu_onnx_1batch_mobilenet0.25.log │ │ │ │ ├── gpu_onnx_1batch_mobilenet0.5.log │ │ │ │ ├── gpu_onnx_1batch_mobilenet0.75.log │ │ │ │ ├── gpu_onnx_1batch_mobilenet1.0.log │ │ │ │ ├── gpu_onnx_1batch_mobilenetv2_0.25.log │ │ │ │ ├── gpu_onnx_1batch_mobilenetv2_0.5.log │ │ │ │ ├── gpu_onnx_1batch_mobilenetv2_0.75.log │ │ │ │ ├── gpu_onnx_1batch_mobilenetv2_1.0.log │ │ │ │ ├── gpu_onnx_1batch_resnet101.log │ │ │ │ ├── gpu_onnx_1batch_resnet101_v2.log │ │ │ │ ├── gpu_onnx_1batch_resnet152.log │ │ │ │ ├── gpu_onnx_1batch_resnet152_v2.log │ │ │ │ ├── gpu_onnx_1batch_resnet18.log │ │ │ │ ├── gpu_onnx_1batch_resnet18_v2.log │ │ │ │ ├── gpu_onnx_1batch_resnet34.log │ │ │ │ ├── gpu_onnx_1batch_resnet34_v2.log │ │ │ │ ├── gpu_onnx_1batch_resnet50.log │ │ │ │ ├── gpu_onnx_1batch_resnet50_v2.log │ │ │ │ ├── gpu_onnx_1batch_squeezenet1_0.log.tmp │ │ │ │ ├── gpu_onnx_1batch_vgg16.log │ │ │ │ └── gpu_onnx_1batch_vgg19.log │ │ └── x86 │ │ │ └── onnx │ │ │ └── 1batch │ │ │ ├── x86_onnx_1batch_alexnet.log │ │ │ ├── x86_onnx_1batch_densenet121.log │ │ │ ├── x86_onnx_1batch_densenet161.log │ │ │ ├── x86_onnx_1batch_densenet169.log │ │ │ ├── x86_onnx_1batch_densenet201.log │ │ │ ├── x86_onnx_1batch_googlenet.log │ │ │ ├── x86_onnx_1batch_inceptionv3.log │ │ │ ├── x86_onnx_1batch_mnasnet0_5.log │ │ │ ├── x86_onnx_1batch_mnasnet1_0.log │ │ │ ├── x86_onnx_1batch_mobilenet0.25.log │ │ │ ├── x86_onnx_1batch_mobilenet0.5.log │ │ │ ├── x86_onnx_1batch_mobilenet0.75.log │ │ │ ├── x86_onnx_1batch_mobilenet1.0.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.25.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.5.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.75.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_1.0.log │ │ │ ├── x86_onnx_1batch_resnet101.log │ │ │ ├── x86_onnx_1batch_resnet101_v2.log │ │ │ ├── x86_onnx_1batch_resnet152.log │ │ │ ├── x86_onnx_1batch_resnet152_v2.log │ │ │ ├── x86_onnx_1batch_resnet18.log │ │ │ ├── x86_onnx_1batch_resnet18_v2.log │ │ │ ├── x86_onnx_1batch_resnet34.log │ │ │ ├── x86_onnx_1batch_resnet34_v2.log │ │ │ ├── x86_onnx_1batch_resnet50.log │ │ │ ├── x86_onnx_1batch_resnet50_v2.log │ │ │ ├── x86_onnx_1batch_resnext101_32x8d.log │ │ │ ├── x86_onnx_1batch_resnext50_32x4d.log │ │ │ ├── x86_onnx_1batch_shufflenet_v2_x0_5.log │ │ │ ├── x86_onnx_1batch_shufflenet_v2_x1_0.log │ │ │ ├── x86_onnx_1batch_squeezenet1_0.log │ │ │ ├── x86_onnx_1batch_squeezenet1_1.log │ │ │ ├── x86_onnx_1batch_vgg11.log │ │ │ ├── x86_onnx_1batch_vgg11_bn.log │ │ │ ├── x86_onnx_1batch_vgg13.log │ │ │ ├── x86_onnx_1batch_vgg13_bn.log │ │ │ ├── x86_onnx_1batch_vgg16.log │ │ │ ├── x86_onnx_1batch_vgg16_bn.log │ │ │ ├── x86_onnx_1batch_vgg19.log │ │ │ ├── x86_onnx_1batch_vgg19_bn.log │ │ │ ├── x86_onnx_1batch_wide_resnet101_2.log │ │ │ └── x86_onnx_1batch_wide_resnet50_2.log │ └── V100 │ │ ├── 1thread-log │ │ └── x86 │ │ │ └── onnx │ │ │ └── 1batch │ │ │ ├── x86_onnx_1batch_alexnet.log │ │ │ ├── x86_onnx_1batch_densenet121.log │ │ │ ├── x86_onnx_1batch_densenet161.log │ │ │ ├── x86_onnx_1batch_densenet169.log │ │ │ ├── x86_onnx_1batch_densenet201.log │ │ │ ├── x86_onnx_1batch_googlenet.log │ │ │ ├── x86_onnx_1batch_inceptionv3.log │ │ │ ├── x86_onnx_1batch_mnasnet0_5.log │ │ │ ├── x86_onnx_1batch_mnasnet1_0.log │ │ │ ├── x86_onnx_1batch_mobilenet0.25.log │ │ │ ├── x86_onnx_1batch_mobilenet0.5.log │ │ │ ├── x86_onnx_1batch_mobilenet0.75.log │ │ │ ├── x86_onnx_1batch_mobilenet1.0.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.25.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.5.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_0.75.log │ │ │ ├── x86_onnx_1batch_mobilenetv2_1.0.log │ │ │ ├── x86_onnx_1batch_resnet101.log │ │ │ ├── x86_onnx_1batch_resnet101_v2.log │ │ │ ├── x86_onnx_1batch_resnet152.log │ │ │ ├── x86_onnx_1batch_resnet152_v2.log │ │ │ ├── x86_onnx_1batch_resnet18.log │ │ │ ├── x86_onnx_1batch_resnet18_v2.log │ │ │ ├── x86_onnx_1batch_resnet50.log │ │ │ ├── x86_onnx_1batch_resnet50_v2.log │ │ │ ├── x86_onnx_1batch_resnext101_32x8d.log │ │ │ ├── x86_onnx_1batch_resnext50_32x4d.log │ │ │ ├── x86_onnx_1batch_shufflenet_v2_x0_5.log │ │ │ ├── x86_onnx_1batch_shufflenet_v2_x1_0.log │ │ │ ├── x86_onnx_1batch_squeezenet1_0.log │ │ │ ├── x86_onnx_1batch_squeezenet1_1.log │ │ │ ├── x86_onnx_1batch_vgg11.log │ │ │ ├── x86_onnx_1batch_vgg11_bn.log │ │ │ ├── x86_onnx_1batch_vgg13.log │ │ │ ├── x86_onnx_1batch_vgg13_bn.log │ │ │ ├── x86_onnx_1batch_vgg16.log │ │ │ ├── x86_onnx_1batch_vgg16_bn.log │ │ │ ├── x86_onnx_1batch_vgg19.log │ │ │ ├── x86_onnx_1batch_vgg19_bn.log │ │ │ ├── x86_onnx_1batch_wide_resnet101_2.log │ │ │ └── x86_onnx_1batch_wide_resnet50_2.log │ │ ├── gpu │ │ └── onnx │ │ │ └── 1batch │ │ │ ├── gpu_onnx_1batch_alexnet.log │ │ │ ├── gpu_onnx_1batch_densenet121.log │ │ │ ├── gpu_onnx_1batch_densenet161.log.tmp │ │ │ ├── gpu_onnx_1batch_mnasnet0_5.log │ │ │ ├── gpu_onnx_1batch_mnasnet1_0.log │ │ │ ├── gpu_onnx_1batch_mobilenet0.25.log │ │ │ ├── gpu_onnx_1batch_mobilenet0.5.log │ │ │ ├── gpu_onnx_1batch_mobilenet0.75.log │ │ │ ├── gpu_onnx_1batch_mobilenet1.0.log │ │ │ ├── gpu_onnx_1batch_mobilenetv2_0.25.log │ │ │ ├── gpu_onnx_1batch_mobilenetv2_0.5.log │ │ │ ├── gpu_onnx_1batch_mobilenetv2_0.75.log │ │ │ ├── gpu_onnx_1batch_mobilenetv2_1.0.log │ │ │ ├── gpu_onnx_1batch_resnet101.log │ │ │ ├── gpu_onnx_1batch_resnet101_v2.log │ │ │ ├── gpu_onnx_1batch_resnet152.log │ │ │ ├── gpu_onnx_1batch_resnet152_v2.log │ │ │ ├── gpu_onnx_1batch_resnet18.log │ │ │ ├── gpu_onnx_1batch_resnet18_v2.log │ │ │ ├── gpu_onnx_1batch_resnet34.log │ │ │ ├── gpu_onnx_1batch_resnet34_v2.log │ │ │ ├── gpu_onnx_1batch_resnet50.log │ │ │ ├── gpu_onnx_1batch_resnet50_v2.log │ │ │ ├── gpu_onnx_1batch_squeezenet1_0.log.tmp │ │ │ ├── gpu_onnx_1batch_vgg16.log │ │ │ └── gpu_onnx_1batch_vgg19.log │ │ └── x86 │ │ └── onnx │ │ └── 1batch │ │ ├── x86_onnx_1batch_alexnet.log │ │ ├── x86_onnx_1batch_densenet121.log │ │ ├── x86_onnx_1batch_googlenet.log │ │ ├── x86_onnx_1batch_inceptionv3.log │ │ ├── x86_onnx_1batch_mnasnet0_5.log │ │ ├── x86_onnx_1batch_mnasnet1_0.log │ │ ├── x86_onnx_1batch_mobilenet0.25.log │ │ ├── x86_onnx_1batch_mobilenet0.5.log │ │ ├── x86_onnx_1batch_mobilenet0.75.log │ │ ├── x86_onnx_1batch_mobilenet1.0.log │ │ ├── x86_onnx_1batch_mobilenetv2_0.25.log │ │ ├── x86_onnx_1batch_mobilenetv2_0.5.log │ │ ├── x86_onnx_1batch_mobilenetv2_0.75.log │ │ ├── x86_onnx_1batch_mobilenetv2_1.0.log │ │ ├── x86_onnx_1batch_resnet101.log │ │ ├── x86_onnx_1batch_resnet101_v2.log │ │ ├── x86_onnx_1batch_resnet152.log │ │ ├── x86_onnx_1batch_resnet152_v2.log │ │ ├── x86_onnx_1batch_resnet50.log │ │ ├── x86_onnx_1batch_resnet50_v2.log │ │ ├── x86_onnx_1batch_resnext101_32x8d.log │ │ ├── x86_onnx_1batch_resnext50_32x4d.log │ │ ├── x86_onnx_1batch_shufflenet_v2_x0_5.log │ │ ├── x86_onnx_1batch_shufflenet_v2_x1_0.log │ │ ├── x86_onnx_1batch_squeezenet1_0.log │ │ ├── x86_onnx_1batch_squeezenet1_1.log │ │ ├── x86_onnx_1batch_vgg11_bn.log │ │ ├── x86_onnx_1batch_vgg13_bn.log │ │ ├── x86_onnx_1batch_vgg16.log │ │ ├── x86_onnx_1batch_vgg16_bn.log │ │ ├── x86_onnx_1batch_vgg19.log │ │ ├── x86_onnx_1batch_vgg19_bn.log │ │ ├── x86_onnx_1batch_wide_resnet101_2.log │ │ └── x86_onnx_1batch_wide_resnet50_2.log ├── tvm_tuned.py └── tvm_untuned.py ├── XLA ├── README.md ├── run_keras_models.py ├── run_nsys_profile.sh ├── run_tf2xla_end2end.py ├── run_tfprofiler.py └── tf2xla.py ├── micro-models ├── README.md ├── convert_onnx_to_pb.py ├── gen_conv_mobilenetv2_1.0.py └── gen_conv_resnet50.py ├── nGraph ├── README.md ├── ng.py ├── run_ng_end2end.py └── run_ng_perlayer_tracing.py └── utils ├── __init__.py ├── gather_data.py ├── list ├── list2 └── utils.py /Glow/README.md: -------------------------------------------------------------------------------- 1 | ## Glow 2 | 3 | ### Install 4 | - Replace the original glow/tools/loader/ExecutorCore.cpp with our modified file (ExecutorCore/ExecutorCore.cpp) 5 | - re-compile Glow 6 | 7 | ### Run end-to-end evaluation 8 | ```bash 9 | sh run_glow_end2end.sh 10 | ``` 11 | ### Run per-layer tracting 12 | ```bash 13 | # tracing, generate a json file 14 | sh glow_tracing.sh 15 | 16 | # parsing the json file 17 | python glow_tracing_parser.py 18 | ``` 19 | -------------------------------------------------------------------------------- /Glow/glow_tracing.sh: -------------------------------------------------------------------------------- 1 | ./bin/image-classifier ./images/cat_285.png -image-mode=0to1 -m mobilenetv2_1.0.onnx -model-input-name=data -backend=$1 --trace-path=mobilenet.json --auto-instrument 2 | -------------------------------------------------------------------------------- /Glow/images/cat_285.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buaa-hipo/dlcompiler-comparison/b0dca60d45e22cd8847a4ca5a4e38e24db7fc346/Glow/images/cat_285.png -------------------------------------------------------------------------------- /Glow/run_glow_end2end.sh: -------------------------------------------------------------------------------- 1 | log_path=../logs/glow-$1-2080Ti 2 | for i in `cat ../utils/list` 3 | do 4 | echo $i | tee -a $log_path/$i 5 | ./bin/image-classifier ./images/cat_285.png -image-mode=0to1 -m ./models/${i}.onnx -model-input-name=data -backend=$1 | tee -a $log_path/$i 6 | done 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DL compiler comparison 2 | 3 | The performance comparison acrossing widely used deep learning compilers (e.g., TVM, nGraph, Tensor Comprehension, Glow and XLA) 4 | 5 | For more information about the DL compilers, please refer to our survey paper **The Deep Learning Compiler: A Comprehensive Survey** [on arXiv](https://arxiv.org/abs/2002.03794). 6 | 7 | We have compared the end-to-end and per-layer (convolution) performance among DL compilers on CNN models. We upload the corresponding scripts in this repo, and we hope to save time for the practitioners. 8 | 9 | ## Usage 10 | Please refer to the `README` in the following directories. 11 | ``` 12 | |-- TVM 13 | |-- nGraph 14 | |-- TC_perlayer 15 | |-- Glow 16 | |-- XLA 17 | |-- micro-models 18 | ``` 19 | -------------------------------------------------------------------------------- /TC_perlayer/README.md: -------------------------------------------------------------------------------- 1 | ## TC per-layer 2 | 3 | ### Install 4 | 5 | ### How-to-run 6 | 7 | 1. Without autotuning 8 | 9 | ```bash 10 | ./build/tc/example/resnet50 |tee output 11 | ``` 12 | 2. With autotuning 13 | 14 | ```bash 15 | ./build/tc/example/resnet50 --tuner_threads=28 --tuner_gen_pop_size=20 --tuner_gen_generations=25 --tuner_gen_number_elites=4 |tee output 16 | ``` 17 | 18 | 3. Run each convolution kernel for 15 times, and collect the last 10 times 19 | 20 | ![image-20200718110200812](img/image-20200718110200812.png) 21 | 22 | 4. Get the time data 23 | 24 | ```bash 25 | grep Time output | grep -oE '(conv|depthwise)_[a-zA-Z0-9_]+' # get conv name 26 | grep Time output |awk '{print $2}' # get time 27 | ``` 28 | -------------------------------------------------------------------------------- /TC_perlayer/img/image-20200718110200812.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buaa-hipo/dlcompiler-comparison/b0dca60d45e22cd8847a4ca5a4e38e24db7fc346/TC_perlayer/img/image-20200718110200812.png -------------------------------------------------------------------------------- /TC_perlayer/mobilenet/autotune/autotune_msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buaa-hipo/dlcompiler-comparison/b0dca60d45e22cd8847a4ca5a4e38e24db7fc346/TC_perlayer/mobilenet/autotune/autotune_msg -------------------------------------------------------------------------------- /TC_perlayer/resnet/autotune/autotune_msg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buaa-hipo/dlcompiler-comparison/b0dca60d45e22cd8847a4ca5a4e38e24db7fc346/TC_perlayer/resnet/autotune/autotune_msg -------------------------------------------------------------------------------- /TVM/README.md: -------------------------------------------------------------------------------- 1 | # TVM 2 | 3 | ## run end-to-end evaluation 4 | Refer to `tvm_tuned.py` and `tvm_untuned.py` for usage. 5 | 6 | `run_tvm_1thread_tuned.sh` provides an example. 7 | 8 | 9 | ## run per-layer evaluation 10 | Corresponding files in `TVM_perlayer` 11 | 12 | For usage, refer to the 'TVM_perlayer/run_tvm_perlayer.sh' 13 | -------------------------------------------------------------------------------- /TVM/TVM_perlayer/run_tvm_perlayer.sh: -------------------------------------------------------------------------------- 1 | 2 | #skylake-2080ti 3 | #export TVM_HOME=/root/lib/tvm-0.6 4 | 5 | #broadwell-v100 6 | export TVM_HOME=/root/tvm-0.6 7 | 8 | #export TVM_HOM=/root/tvm-0.7 9 | export PYTHONPATH=$TVM_HOME/python:$TVM_HOME/topi/python:${PYTHONPATH} 10 | export TVM_NUM_THREADS=1 11 | 12 | ## -->> resnet50 <<-- ## 13 | 14 | ## x86-avx2 15 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f broadwell -p false -thread 1 -model resnet50 -l ./logs/broadwell_v100/x86_onnx_1batch_1thread_resnet50.log > res/broadwell_v100/res_x86_broadwell_1batch_1thread_resnet50_tuned_ljj 16 | 17 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f broadwell -p false -thread 28 -model resnet50 -l ./logs/broadwell_v100/x86_onnx_1batch_multhread_resnet50.log > res/broadwell_v100/res_x86_broadwell_1batch_multhread_resnet50_tuned_ljj 18 | 19 | 20 | # gpu-v100 21 | #python3 conv2d_depth_profile.py -d gpu -t 200 -b 1 -f v100 -p false -model resnet50 -l ./logs/broadwell_v100/gpu_onnx_1batch_resnet50.log > res/broadwell_v100/res_gpu_1batch_resnet50_tuned_ljj 22 | 23 | ## -->> mobilenet <<-- ## 24 | 25 | ## x86-avx2 26 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f broadwell -p false -thread 1 -usedepthlog false -path ./data_results/perlayer_conv2d_depthwise_case.csv -model mobilenetv2_1.0 -l ./logs/broadwell_v100/x86_onnx_1batch_1thread_mobilenetv2_1.0.log > res/broadwell_v100/res_x86_broadwell_1batch_1thread_mobilenetv2_1.0_tuned_ljj 27 | 28 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f broadwell -p false -thread 28 -usedepthlog false -path ./data_results/perlayer_conv2d_depthwise_case.csv -model mobilenetv2_1.0 -l ./logs/broadwell_v100/x86_onnx_1batch_multhread_mobilenetv2_1.0.log > res/broadwell_v100/res_x86_broadwell_1batch_multhread_mobilenetv2_1.0_tuned_ljj 29 | 30 | # gpu-v100 31 | python3 conv2d_depth_profile.py -d gpu -t 200 -b 1 -f v100 -p false -path ./data_results/perlayer_conv2d_depthwise_case.csv -model mobilenetv2_1.0 -l ./logs/broadwell_v100/gpu_onnx_1batch_mobilenetv2_1.0.log > res/broadwell_v100/res_gpu_1batch_mobilenet_tuned_ljj 32 | 33 | 34 | ## -->> resnet50 <<-- ## 35 | 36 | ## x86-avx512 37 | # avx-512 is not set when tuning 38 | #python3 conv2d_depth_profile.py -d x86-avx512 -t 200 -b 1 -f skylake -p false -thread 1 -l ./logs/skylake_2080ti/x86_onnx_1batch_1thread_resnet50.log > res/skylake_2080ti/res_x86_skylake_1batch_1thread_resnet50_tuned_ljj 39 | # use this one 40 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f skylake -p false -thread 1 -l ./logs/skylake_2080ti/x86_onnx_1batch_1thread_resnet50.log > res/skylake_2080ti/res_x86_skylake_1batch_1thread_resnet50_tuned_ljj 41 | 42 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f skylake -p false -thread 16 -l ./logs/skylake_2080ti/x86_onnx_1batch_Multhreads_resnet50.log > res/skylake_2080ti/res_x86_skylake_1batch_Multhreads_resnet50_tuned_ljj 43 | 44 | # gpu-2080ti 45 | #python3 conv2d_depth_profile.py -d gpu -t 200 -b 1 -f 2080ti -p false -l ./logs/skylake_2080ti/gpu_onnx_1batch_resnet50.log > res/skylake_2080ti/res_gpu_1batch_resnet50_tuned_ljj 46 | 47 | 48 | ## -->> mobilenet <<-- ## 49 | 50 | # x86-avx512 51 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f skylake -p false -thread 1 -usedepthlog false -path ./data_results/perlayer_conv2d_depthwise_case.csv -model mobilenetv2_1.0 -l ./logs/skylake_2080ti/x86_onnx_1batch_1thread_mobilenetv2_1.0.log > res/skylake_2080ti/res_x86_skylake_1batch_1thread_mobilenetv2_1.0_tuned_ljj 52 | 53 | #python3 conv2d_depth_profile.py -d x86-avx2 -t 200 -b 1 -f skylake -p false -thread 16 -usedepthlog false -path ./data_results/perlayer_conv2d_depthwise_case.csv -model mobilenetv2_1.0 -l ./logs/skylake_2080ti/x86_onnx_1batch_Multhreads_mobilenetv2_1.0.log > res/skylake_2080ti/res_x86_skylake_1batch_Multhreads_mobilenetv2_1.0_tuned_ljj 54 | 55 | # gpu-2080ti 56 | #python3 conv2d_depth_profile.py -d gpu -t 200 -b 1 -f 2080ti -p false -path ./data_results/perlayer_conv2d_depthwise_case.csv -model mobilenetv2_1.0 -l ./logs/skylake_2080ti/gpu_onnx_1batch_mobilenetv2_1.0.log > res/skylake_2080ti/res_gpu_1batch_mobilenetv2_1.0_tuned_ljj 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /TVM/TVM_perlayer/utils.py: -------------------------------------------------------------------------------- 1 | import tvm.relay as relay 2 | import tvm 3 | 4 | def create_target(device): 5 | if device == "x86": 6 | print("from x86") 7 | target = tvm.target.create("llvm -mcpu=core-avx2") 8 | elif device == "x86-avx2": 9 | print("from x86-avx2") 10 | target = tvm.target.create("llvm -mcpu=core-avx2") 11 | elif device == "x86-avx512": 12 | print("from x86-avx-512") 13 | target = tvm.target.create("llvm -mcpu=skylake-avx512") 14 | elif device == "gpu": 15 | target = tvm.target.cuda() 16 | elif device == "aarch64": 17 | target = tvm.target.create('llvm -device=arm_cpu -target=aarch64-linux-gnu -mattr=+neon') 18 | elif device == "arm": 19 | target = tvm.target.create('llvm -device=arm_cpu -target=armv7l-linux-gnueabihf -mattr=+neon') 20 | return target 21 | 22 | 23 | def create_ctx(device, did = 0): 24 | print('ctx on device ' + device) 25 | if 'x86' in device : 26 | ctx = tvm.cpu(did) 27 | elif device == "gpu": 28 | ctx = tvm.gpu(did) 29 | return ctx 30 | 31 | def speed(graph, lib, params, ctx): 32 | import numpy as np 33 | import tvm.contrib.graph_runtime as runtime 34 | import json 35 | graph_dict = json.loads(graph) 36 | input_shape = graph_dict["attrs"]["shape"][1][0] 37 | input_name = graph_dict["nodes"][0]["name"] 38 | data_tvm = tvm.nd.array(np.random.uniform(size = input_shape).astype("float32")) 39 | module = runtime.create(graph, lib, ctx) 40 | module.set_input(input_name, data_tvm) 41 | #module.load_params(params) 42 | module.set_input(**params) 43 | ftimer = module.module.time_evaluator("run", ctx, number = 1, repeat = 100) 44 | prof_res = np.array(ftimer().results) * 1000 45 | return np.mean(prof_res) 46 | 47 | def speed_profile(graph, lib, params, ctx): 48 | import numpy as np 49 | #import tvm.contrib.graph_runtime as runtime 50 | from tvm.contrib.debugger import debug_runtime as runtime 51 | import json 52 | graph_dict = json.loads(graph) 53 | input_shape = graph_dict["attrs"]["shape"][1][0] 54 | input_name = graph_dict["nodes"][0]["name"] 55 | data_tvm = tvm.nd.array(np.random.uniform(size = input_shape).astype("float32")) 56 | module = runtime.create(graph, lib, ctx) 57 | module.set_input(input_name, data_tvm) 58 | #module.load_params(params) 59 | module.set_input(**params) 60 | ftimer = module.module.time_evaluator("run", ctx, number = 1, repeat = 100) 61 | prof_res = np.array(ftimer().results) * 1000 62 | # profile 63 | module.run() 64 | # 65 | return np.mean(prof_res) 66 | 67 | def get_onnx(path, batch=1): 68 | import onnx 69 | on = onnx.load(open(path, "rb")) 70 | name = on.graph.input[0].name 71 | input_shape = [i.dim_value for i in on.graph.input[0].type.tensor_type.shape.dim] 72 | input_shape[0] = batch 73 | return on, {name : input_shape} 74 | 75 | def get_model(path): 76 | graph = open(path + ".json").read() 77 | lib = tvm.runtime.module.load_module(path + ".tar") 78 | params = bytearray(open(path + ".params", "rb").read()) 79 | return graph, lib, params 80 | 81 | def build_model_from_onnx(onnx_model, input_shape, target, log = ""): 82 | from tvm import autotvm 83 | import os 84 | model, relay_params = relay.frontend.from_onnx(onnx_model, input_shape) 85 | func = model["main"] 86 | if os.path.isfile(log): 87 | with autotvm.apply_history_best(log): 88 | with relay.build_config(opt_level=4): 89 | graph, lib, params = relay.build(func , target, params = relay_params) 90 | else: 91 | with relay.build_config(opt_level=4): 92 | graph, lib, params = relay.build(func , target, params = relay_params) 93 | 94 | 95 | return graph, lib , params 96 | 97 | def save_model(graph, lib, params, prefix = "relay"): 98 | deploy_name = prefix 99 | import os 100 | dir_name = os.path.dirname(deploy_name) 101 | try: 102 | os.mkdir(dir_name) 103 | except: 104 | pass 105 | print("save to %s" % (deploy_name)) 106 | lib.export_library(deploy_name + '.tar' ) 107 | with open(deploy_name + ".json", "w") as fo: 108 | fo.write(graph) 109 | with open(deploy_name + ".params", "wb") as fo: 110 | fo.write(relay.save_param_dict(params)) 111 | return True 112 | -------------------------------------------------------------------------------- /TVM/run_tvm_1thread_tuned.sh: -------------------------------------------------------------------------------- 1 | export TVM_NUM_THREADS=1 2 | 3 | log_path=./logs/tvm-x86-1thread-V100-tuned 4 | for i in `cat list` 5 | do 6 | python3 tvm_tuned.py -m models/$i.onnx -t x86 -l ./tvm-log/1thread-log/x86/onnx/1batch/x86_onnx_1batch_$i.log | tee $log_path/$i 7 | done 8 | 9 | unset TVM_NUM_THREADS 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_alexnet.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 11, 11], "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 11, 11, "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {"i": 79, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 5]], ["unroll_kw", "ot", false]]}], "r": [[0.002752385894409938], 0, 2.7186777591705322, 1596115482.905654], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 27, 27], "float32"], ["TENSOR", [192, 64, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 27, 27, "float32"], [192, 64, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 236, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 9]], ["unroll_kw", "ot", true]]}], "r": [[0.009511141642857143], 0, 3.5630879402160645, 1596115874.7679815], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 13, 13], "float32"], ["TENSOR", [384, 192, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 13, 13, "float32"], [384, 192, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 744, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", false]]}], "r": [[0.006757987746724891], 0, 3.0896570682525635, 1596116201.6735425], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 13, 13], "float32"], ["TENSOR", [256, 384, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 13, 13, "float32"], [256, 384, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 482, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", false]]}], "r": [[0.009081062823863636], 0, 3.2910845279693604, 1596116250.236479], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 13, 13], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 13, 13, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 109, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.006212036381526104], 0, 3.1253952980041504, 1596116652.764027], "v": 0.1} 6 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg11.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.006918500872807018], 0, 3.296509265899658, 1596117065.9077153], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03377314466666667], 0, 2.1923346519470215, 1596117450.1103528], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0329542789032258], 0, 2.2047877311706543, 1596117948.0741732], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.066816388125], 0, 2.686286211013794, 1596118346.064727], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.033198615258064515], 0, 2.3146960735321045, 1596118574.0945194], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.06677948875], 0, 2.563065528869629, 1596118787.7118487], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 234, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.020346158], 0, 3.442610740661621, 1596119030.5664802], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg11_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.006836010458515284], 0, 3.3540902137756348, 1596128612.2345247], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03340126686666667], 0, 1.9902942180633545, 1596129194.943994], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03300625596774194], 0, 2.1245760917663574, 1596129539.0478356], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0651163904375], 0, 2.567615032196045, 1596129790.5840647], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.033263295354166666], 0, 3.8218603134155273, 1596130006.6182668], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.06700168725], 0, 2.5436770915985107, 1596130240.0707273], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.020296259721518987], 0, 3.4179916381835938, 1596130464.1363254], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg13.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 267, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.006898583454148472], 0, 3.3767552375793457, 1596119487.713077], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 169, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.081739508], 0, 2.836487293243408, 1596120010.167118], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0338081361], 0, 2.247675895690918, 1596120416.4209824], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0665116335625], 0, 2.6345653533935547, 1596120930.915767], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03311267677419355], 0, 2.1344892978668213, 1596121172.5803533], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.065049608625], 0, 2.6100494861602783, 1596121402.4667797], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03290496412903226], 0, 2.2973506450653076, 1596121648.4773412], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0668443998125], 0, 2.546466112136841, 1596121853.3508365], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.020334838860759492], 0, 3.483874797821045, 1596122107.928696], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg13_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 62, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0073040463853211], 0, 3.184107780456543, 1596131034.2524643], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 131, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.08783118725], 0, 3.130889654159546, 1596131293.7537804], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03402641443333333], 0, 2.3585143089294434, 1596131845.4983296], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0664316], 0, 2.610201358795166, 1596132256.908666], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03297800370967742], 0, 2.1585347652435303, 1596132486.495907], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.066795277625], 0, 2.6188759803771973, 1596132707.2719684], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03315443938709678], 0, 2.5341038703918457, 1596132965.1325734], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0667315534375], 0, 2.5224661827087402, 1596133170.459807], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.020348928443037973], 0, 3.4437501430511475, 1596133413.5796592], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg16.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.006870585986607143], 0, 3.288921594619751, 1596122564.750097], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0785388715625], 0, 2.7927939891815186, 1596123095.9288073], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.034002204700000004], 0, 2.209871530532837, 1596123441.8793747], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0664630863125], 0, 2.63623309135437, 1596124004.795023], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03304497941935484], 0, 2.189822196960449, 1596124233.2152858], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.065117898125], 0, 2.5823304653167725, 1596124473.8088424], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03307271177419355], 0, 2.344104051589966, 1596124686.905911], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0667468598125], 0, 2.534515380859375, 1596124916.6428285], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.020307563227848102], 0, 3.439610242843628, 1596125173.2293992], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg16_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.007245901681818182], 0, 3.1858508586883545, 1596133947.9010143], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0783156791875], 0, 2.7045326232910156, 1596134563.8508098], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0333800842], 0, 1.8803057670593262, 1596135185.079038], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.065042288375], 0, 2.3006300926208496, 1596135202.7929401], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03298403467741935], 0, 2.107243776321411, 1596135446.8200831], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0669208055625], 0, 2.671328544616699, 1596135673.993506], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03315498325806451], 0, 2.305938959121704, 1596135911.3425171], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.066678249375], 0, 2.577552080154419, 1596136139.1942682], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 230, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.02027193198734177], 0, 3.3808937072753906, 1596136439.1873512], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg19.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.006783066541125541], 0, 3.3270492553710938, 1596125672.1265714], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0784305589375], 0, 2.704773187637329, 1596126293.9119365], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03322247622580645], 0, 2.0303592681884766, 1596126560.968603], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0662816479375], 0, 2.6218159198760986, 1596126827.2101874], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03313581316129032], 0, 2.1373229026794434, 1596127171.2113478], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0650433295], 0, 2.541339635848999, 1596127408.8611462], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.033222887483870964], 0, 2.311842441558838, 1596127653.5192695], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.06667492475], 0, 2.5315325260162354, 1596127860.7733378], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.02035761794936709], 0, 3.4973130226135254, 1596128161.502795], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg19_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 267, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.006898931712389381], 0, 3.388451099395752, 1596136934.0417387], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.078498422125], 0, 2.799562692642212, 1596137513.2174776], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.033801018333333335], 0, 2.224395513534546, 1596137883.3176696], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0649253149375], 0, 2.3282406330108643, 1596138433.9413586], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.033109787096774194], 0, 2.1387908458709717, 1596138604.0975952], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.06508972325], 0, 2.673032283782959, 1596138823.7356472], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03328783151612903], 0, 2.344836711883545, 1596139077.4176269], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0670596299375], 0, 2.5426127910614014, 1596139286.752205], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 234, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.02034344264], 0, 1.8967463970184326, 1596139538.188148], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/gpu/onnx/1batch/gpu_onnx_1batch_vgg16.log: -------------------------------------------------------------------------------- 1 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 18743260, "t": "direct", "c": null, "e": [["tile_f", "sp", [-1, 16, 1, 1]], ["tile_y", "sp", [-1, 1, 7, 1]], ["tile_x", "sp", [-1, 1, 16, 2]], ["tile_rc", "sp", [-1, 1]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[3.766086570650059e-05], 0, 12.015217065811157, 1596998908.4320843], "v": 0.1} 2 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2127357, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 8, 2, 4]], ["tile_x", "sp", [-1, 2, 56, 1]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.000297244889182058], 0, 72.19263315200806, 1596999807.9383483], "v": 0.1} 3 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1603772, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 2, 8]], ["tile_x", "sp", [-1, 2, 28, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00012498887370977535], 0, 22.81765079498291, 1597001034.1881225], "v": 0.1} 4 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1443531, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 4, 2]], ["tile_x", "sp", [-1, 2, 14, 2]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.00021629896336206896], 0, 91.57132840156555, 1597002032.119416], "v": 0.1} 5 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 712213, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 16, 2]], ["tile_x", "sp", [-1, 7, 4, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[9.161358231420508e-05], 0, 69.0690507888794, 1597002477.4916365], "v": 0.1} 6 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 794740, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 4]], ["tile_x", "sp", [-1, 7, 4, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001517770930599369], 0, 4.66903829574585, 1597003125.9010572], "v": 0.1} 7 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1059674, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 8, 8]], ["tile_x", "sp", [-1, 7, 4, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001232116068601583], 0, 9.381632328033447, 1597003642.5013134], "v": 0.1} 8 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1169675, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 8]], ["tile_x", "sp", [-1, 7, 4, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00022329477909738716], 0, 26.150113821029663, 1597004305.8602326], "v": 0.1} 9 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 119767, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 16, 2]], ["tile_x", "sp", [-1, 7, 7, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[7.593891006160164e-05], 0, 23.8491849899292, 1597005653.891277], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/gpu/onnx/1batch/gpu_onnx_1batch_vgg19.log: -------------------------------------------------------------------------------- 1 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 119896601, "t": "direct", "c": null, "e": [["tile_f", "sp", [-1, 4, 4, 2]], ["tile_y", "sp", [-1, 1, 2, 1]], ["tile_x", "sp", [-1, 1, 16, 2]], ["tile_rc", "sp", [-1, 1]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 3]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[3.6780037783868445e-05], 0, 13.70903730392456, 1597006707.025582], "v": 0.1} 2 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1644608, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 2, 4]], ["tile_x", "sp", [-1, 1, 32, 2]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002919576458616011], 0, 4.956915616989136, 1597007579.07174], "v": 0.1} 3 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 479158, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 2, 64]], ["tile_x", "sp", [-1, 1, 56, 2]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 0]]}], "r": [[0.00013000896490147784], 0, 14.737946271896362, 1597007882.0644283], "v": 0.1} 4 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1434557, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 4, 4]], ["tile_x", "sp", [-1, 4, 14, 1]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.00020006028187250998], 0, 17.8870792388916, 1597009778.4172528], "v": 0.1} 5 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 730390, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 4]], ["tile_x", "sp", [-1, 7, 14, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[8.948499953445065e-05], 0, 65.96505427360535, 1597010688.1196933], "v": 0.1} 6 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 794767, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 16, 8]], ["tile_x", "sp", [-1, 7, 4, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001489962957746479], 0, 8.812845945358276, 1597010996.6035323], "v": 0.1} 7 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1084534, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 8, 8]], ["tile_x", "sp", [-1, 7, 28, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00010693559343148358], 0, 4.54094123840332, 1597012316.3221564], "v": 0.1} 8 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1194502, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 4]], ["tile_x", "sp", [-1, 7, 28, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00019472063891779398], 0, 29.202327489852905, 1597013070.4277909], "v": 0.1} 9 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 119838, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 16, 8]], ["tile_x", "sp", [-1, 7, 7, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[7.379193757503001e-05], 0, 14.88327431678772, 1597013417.2359698], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_alexnet.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 11, 11], "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 11, 11, "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {"i": 79, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 5]], ["unroll_kw", "ot", false]]}], "r": [[0.000253848834502551], 0, 3.5001540184020996, 1596448939.1776137], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 27, 27], "float32"], ["TENSOR", [192, 64, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 27, 27, "float32"], [192, 64, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 232, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 9]], ["unroll_kw", "ot", true]]}], "r": [[0.0006887817277056277], 0, 3.6009926795959473, 1596449515.573171], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 13, 13], "float32"], ["TENSOR", [384, 192, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 13, 13, "float32"], [384, 192, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 299, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.0005330535376128385], 0, 3.656599760055542, 1596449822.6626618], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 13, 13], "float32"], ["TENSOR", [256, 384, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 13, 13, "float32"], [256, 384, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 193, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.0006518876530198622], 0, 3.668201446533203, 1596450090.7428856], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 13, 13], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 13, 13, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 109, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.0004413544754738016], 0, 3.578256130218506, 1596450346.1886325], "v": 0.1} 6 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_resnet18_v2.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 314, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 7]], ["tile_oh", "ot", 2]]}], "r": [[6.0030532797194804e-05], 0, 3.4430298805236816, 1596215184.6473992], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 255, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 14]], ["tile_oh", "ot", 1]]}], "r": [[7.878945390035729e-05], 0, 3.445403575897217, 1596215928.4560626], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 95, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 2]], ["tile_oh", "ot", 1]]}], "r": [[8.529370407095246e-05], 0, 4.921019792556763, 1596216567.0174406], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {"i": 189, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.0004956358743748161], 0, 3.8440377712249756, 1596216982.7041843], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 128, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.000336746521759367], 0, 3.4087235927581787, 1596217643.980005], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 477, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.00022946405085518814], 0, 2.180070638656616, 1596218295.3933637], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0003349891787646529], 0, 3.55096697807312, 1596218640.187958], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 170, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.00025194979168], 0, 3.8192741870880127, 1596219036.2410553], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 190, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0003661262847173761], 0, 3.559701919555664, 1596219431.2746332], "v": 0.1} 10 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 125, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.00023886264709345107], 0, 4.620815277099609, 1596219725.9784842], "v": 0.1} 11 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 139, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0004313715153638814], 0, 3.47411847114563, 1596220042.7528176], "v": 0.1} 12 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg11.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 188, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", false]]}], "r": [[0.0005826003064889919], 0, 1.9775025844573975, 1596450757.972092], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 140, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002313890138888889], 0, 2.5262463092803955, 1596451068.710304], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 178, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0022378739737903227], 0, 2.732792377471924, 1596451608.1730688], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 201, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004411388477477477], 0, 3.152791976928711, 1596452060.0578597], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 219, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0021849808103792416], 0, 2.6345927715301514, 1596452481.6561778], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 244, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004395779810810811], 0, 3.3129653930664062, 1596452799.4449801], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 235, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0013367123598937583], 0, 2.2028610706329346, 1596453072.970195], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg11_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.0005397101095596133], 0, 2.2414708137512207, 1596463791.0852258], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 141, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0023699294509018035], 0, 2.6867265701293945, 1596464214.3354626], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 178, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0022193137463917523], 0, 2.5859532356262207, 1596464729.015276], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004443172696666667], 0, 3.0507583618164062, 1596465194.741506], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002164654336088154], 0, 4.044808864593506, 1596465490.9938133], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 244, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004414946655688623], 0, 3.346822738647461, 1596465823.2958527], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 236, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0013476139504021447], 0, 2.200176239013672, 1596466079.0521598], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg13.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 62, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0005654399786056806], 0, 3.4489939212799072, 1596453678.9429088], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.005414291594202899], 0, 3.0710275173187256, 1596454333.367365], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 145, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0023781288989473686], 0, 2.493635892868042, 1596454551.3946722], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.004642617336448598], 0, 3.931882619857788, 1596454894.692462], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002216775920892495], 0, 2.5336952209472656, 1596455251.0348735], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004419781384105961], 0, 2.930107831954956, 1596455618.097451], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 219, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002173218911368015], 0, 2.6456236839294434, 1596455952.7075095], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 243, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004523682234920635], 0, 3.1252567768096924, 1596456229.0780573], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 235, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.001338314540106952], 0, 2.165275812149048, 1596456485.3555408], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg13_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 62, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0005616441849615806], 0, 1.9776067733764648, 1596466762.698668], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 707, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.005486705585820896], 0, 2.9869773387908936, 1596467439.009753], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 141, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0023748547124183006], 0, 2.398829221725464, 1596467688.9778001], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 164, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0046958206699029125], 0, 3.3051044940948486, 1596468064.9306622], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 754, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0023274839330628804], 0, 2.6660547256469727, 1596468471.3632672], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.00435849784660767], 0, 3.2968053817749023, 1596468873.82195], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0021571071166666665], 0, 2.795720338821411, 1596469185.8893511], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 244, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004404086359882006], 0, 4.401059865951538, 1596469442.8512094], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 235, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0013479397190435524], 0, 3.7837088108062744, 1596469704.0843005], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg16.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.0005420128762341326], 0, 3.5438625812530518, 1596457012.0635018], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 707, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.005505419100746268], 0, 3.0948729515075684, 1596457283.3590913], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 140, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0022997922027559055], 0, 2.520529270172119, 1596457718.1397002], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004621441058461538], 0, 3.2978649139404297, 1596458328.2870686], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0022161030403225804], 0, 2.5310370922088623, 1596458488.5800266], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 201, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004409806283536585], 0, 3.1523849964141846, 1596458776.949035], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 219, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0021946378481973437], 0, 2.671485424041748, 1596459050.7326703], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004453199081081082], 0, 3.229020595550537, 1596459331.9493787], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 235, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.001339532734986945], 0, 2.3215527534484863, 1596459587.0863411], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg16_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 62, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0005746727671043538], 0, 3.73333477973938, 1596470333.15616], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 707, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.005426089123188406], 0, 3.1078243255615234, 1596471005.1041338], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 145, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0023847884822834646], 0, 2.7121999263763428, 1596471095.0450115], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004589705791666667], 0, 3.2991862297058105, 1596471564.4126856], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 178, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0022258902960662526], 0, 2.6052093505859375, 1596471942.1932373], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 200, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.00434811261627907], 0, 3.3203492164611816, 1596472252.9024312], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 218, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002225635898734177], 0, 4.2301270961761475, 1596472526.9978697], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 244, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004396804903903904], 0, 3.2921416759490967, 1596472841.1925936], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 234, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.00132970685770751], 0, 2.2065157890319824, 1596473070.3190806], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg19.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.0005605813147691223], 0, 3.5352954864501953, 1596460233.946505], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.00553282396641791], 0, 3.0758166313171387, 1596460879.817044], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002294525861283644], 0, 2.6598360538482666, 1596461189.2129443], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 162, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004593253292063493], 0, 3.2318713665008545, 1596461538.180827], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 179, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0022198052128514054], 0, 2.675957679748535, 1596461800.0109468], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 201, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004460844936599424], 0, 4.228254318237305, 1596462141.9335012], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 220, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002167871939688716], 0, 2.7116470336914062, 1596462460.1310673], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004438791208333333], 0, 3.0592589378356934, 1596462906.3693774], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 235, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0013481655369294604], 0, 3.894418716430664, 1596463242.7511413], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/2080Ti/x86/onnx/1batch/x86_onnx_1batch_vgg19_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.0005597706106145251], 0, 1.962482213973999, 1596473704.2140877], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 168, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.005495699186567164], 0, 3.0747392177581787, 1596474039.4404364], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0022443210338345867], 0, 2.510802745819092, 1596474677.3832624], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 163, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004670732993846153], 0, 3.260664224624634, 1596475089.1908534], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 178, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002234606993814433], 0, 2.664768695831299, 1596475302.9768763], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 201, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0044366165429447855], 0, 3.18804669380188, 1596475608.6015775], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 219, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0021697463851717905], 0, 2.779651165008545, 1596475872.5036128], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004449829482248521], 0, 3.1746253967285156, 1596476255.6267054], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 234, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.001326771671345995], 0, 3.8857598304748535, 1596476527.9188766], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_alexnet.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 11, 11], "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 11, 11, "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {"i": 79, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 5]], ["unroll_kw", "ot", false]]}], "r": [[0.0025747730810810813], 0, 2.203202486038208, 1596133170.978952], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 27, 27], "float32"], ["TENSOR", [192, 64, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 27, 27, "float32"], [192, 64, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 625, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 9]], ["unroll_kw", "ot", false]]}], "r": [[0.008796612006756757], 0, 3.08322811126709, 1596133546.424947], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 13, 13], "float32"], ["TENSOR", [384, 192, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 13, 13, "float32"], [384, 192, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 743, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", false]]}], "r": [[0.006918483318435755], 0, 2.5930190086364746, 1596133620.4638646], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 13, 13], "float32"], ["TENSOR", [256, 384, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 13, 13, "float32"], [256, 384, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 193, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.008038808583850933], 0, 2.777975082397461, 1596134179.749869], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 13, 13], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 13, 13, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 109, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.004970768831858407], 0, 2.466529607772827, 1596134493.358305], "v": 0.1} 6 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_resnet18.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 134, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 7]], ["tile_oh", "ot", 1]]}], "r": [[9.477298455851667e-05], 0, 3.496755361557007, 1595906288.1946592], "v": 0.1} 2 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 478, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 7]], ["tile_oh", "ot", 2]]}], "r": [[0.00010815299796876095], 0, 3.7016890048980713, 1595906913.388673], "v": 0.1} 3 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 265, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 14]], ["tile_oh", "ot", 1]]}], "r": [[0.00011712066744168604], 0, 3.683521032333374, 1595907379.6617298], "v": 0.1} 4 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {"i": 10, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 1]], ["unroll_kw", "ot", true]]}], "r": [[0.0010180829289198608], 0, 3.456883668899536, 1595907877.6946037], "v": 0.1} 5 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 78, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 2]], ["unroll_kw", "ot", true]]}], "r": [[0.000999826429491307], 0, 3.6195385456085205, 1595908522.4381373], "v": 0.1} 6 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 39, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 1]], ["unroll_kw", "ot", true]]}], "r": [[0.0005719154207994079], 0, 3.6505789756774902, 1595909285.8787022], "v": 0.1} 7 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 103, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 2]], ["unroll_kw", "ot", true]]}], "r": [[0.0009889903040152964], 0, 3.5320887565612793, 1595909536.7988524], "v": 0.1} 8 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 175, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0005480639250353607], 0, 3.6105761528015137, 1595909963.28719], "v": 0.1} 9 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 186, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 4]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0009743919729899497], 0, 3.5893473625183105, 1595910285.3665104], "v": 0.1} 10 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 122, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0005820961204682779], 0, 3.6042888164520264, 1595910604.7514298], "v": 0.1} 11 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 129, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 4]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0010519164945848377], 0, 3.4467480182647705, 1595910970.8724623], "v": 0.1} 12 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_resnet18_v2.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 44, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 1]], ["tile_oh", "ot", 1]]}], "r": [[9.716859522054306e-05], 0, 3.54321551322937, 1595911341.563304], "v": 0.1} 2 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 478, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 7]], ["tile_oh", "ot", 2]]}], "r": [[0.0001074218348642862], 0, 3.752678394317627, 1595911987.04302], "v": 0.1} 3 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [2, 2], [0, 0], [1, 1], "NCHW", "float32"], {"i": 545, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 7]], ["tile_oh", "ot", 2]]}], "r": [[0.00011697326672674024], 0, 3.74175763130188, 1595912377.636888], "v": 0.1} 4 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 7, 7], "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 7, 7, "float32"], [2, 2], [3, 3], [1, 1], "NCHW", "float32"], {"i": 10, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 1]], ["unroll_kw", "ot", true]]}], "r": [[0.0010160689629629628], 0, 3.448570489883423, 1595912660.8815222], "v": 0.1} 5 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 471, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 2]], ["unroll_kw", "ot", false]]}], "r": [[0.0009998604123097287], 0, 3.5596394538879395, 1595912938.0821111], "v": 0.1} 6 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 40, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 1]], ["unroll_kw", "ot", true]]}], "r": [[0.0005760893048188271], 0, 3.5786449909210205, 1595913722.8773036], "v": 0.1} 7 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 101, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 2]], ["unroll_kw", "ot", true]]}], "r": [[0.0009879176953223768], 0, 3.6054670810699463, 1595914308.1394541], "v": 0.1} 8 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 171, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0005520035084199212], 0, 3.7702651023864746, 1595914649.1817927], "v": 0.1} 9 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 186, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 4]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0009755636727386933], 0, 3.618600606918335, 1595914995.5120838], "v": 0.1} 10 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 116, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 4]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0005758580560328726], 0, 3.503413200378418, 1595915293.6945882], "v": 0.1} 11 | {"i": ["llvm", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 126, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 4]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0010544404164402174], 0, 3.521240472793579, 1595915603.832466], "v": 0.1} 12 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg11.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 203, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.007358239859649123], 0, 2.6596744060516357, 1596134705.5011113], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03510063188888889], 0, 3.444300651550293, 1596135017.6982985], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 754, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03707884803703704], 0, 2.217472791671753, 1596135682.8805768], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 999, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.0797559154375], 0, 2.635230779647827, 1596136176.4353237], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 223, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.038768181307692315], 0, 2.0343985557556152, 1596136448.351628], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 339, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.080956131125], 0, 2.7017478942871094, 1596136769.5456984], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.020877420891891894], 0, 3.3145623207092285, 1596137087.8047817], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg11_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.007021917396551724], 0, 2.696695327758789, 1596148555.6848376], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03504108593103448], 0, 1.8984274864196777, 1596149157.4369895], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 752, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03649664727906977], 0, 3.453077554702759, 1596149306.290492], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 278, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0881814373125], 0, 2.8889853954315186, 1596149678.886315], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 305, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.039452785425], 0, 3.512899160385132, 1596150019.6466649], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 338, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.083125186], 0, 2.893906593322754, 1596150304.2983167], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.02112353975], 0, 3.259850025177002, 1596150617.1072066], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg13.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 62, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.007307158767741935], 0, 2.4879350662231445, 1596137637.8692858], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.07957305875], 0, 2.663844108581543, 1596138417.459754], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 146, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.036689013093023255], 0, 3.5124192237854004, 1596138782.764441], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.072159919], 0, 2.436377763748169, 1596139350.107345], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 183, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.038050872666666666], 0, 3.556368350982666, 1596139697.1255763], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 285, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.090993940625], 0, 3.028719186782837, 1596140000.5493052], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 224, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03883270849999999], 0, 1.9730944633483887, 1596140302.7012825], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 339, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.08228797675], 0, 2.7479629516601562, 1596140696.4466696], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 238, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.02116520666666667], 0, 3.2789900302886963, 1596141008.2943335], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg13_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.007073880627118645], 0, 2.7458322048187256, 1596151405.2268648], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0822564373125], 0, 2.7546753883361816, 1596152188.9520164], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03557825197777778], 0, 3.6877899169921875, 1596152489.9348638], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 167, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0753068940625], 0, 2.5952022075653076, 1596152796.9607468], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 183, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03734381251851852], 0, 1.949460744857788, 1596153159.5034854], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 278, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.08763162775], 0, 2.8660693168640137, 1596153541.2163935], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 304, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0391143866923077], 0, 2.0202627182006836, 1596153859.644981], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 338, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0822540801875], 0, 2.791612386703491, 1596154134.7344446], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.022463529015151515], 0, 3.310187816619873, 1596154519.1572359], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg16.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.007044244835227273], 0, 2.7331020832061768, 1596141662.6082323], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 616, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0816159559375], 0, 2.742471933364868, 1596141962.341647], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03582525989285714], 0, 2.0618093013763428, 1596142589.9639342], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 738, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0750169531875], 0, 2.6903719902038574, 1596142880.2414002], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 183, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.037926500214285716], 0, 3.596472978591919, 1596143232.4639652], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 278, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0889731418125], 0, 2.941265821456909, 1596143489.0276618], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 304, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.039238906615384615], 0, 2.0323891639709473, 1596143777.9096677], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 338, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0824505625625], 0, 2.8195383548736572, 1596144024.8881836], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.021052652594594594], 0, 3.3929290771484375, 1596144325.17106], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg16_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 202, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.007695260235], 0, 3.365144729614258, 1596155022.6977828], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 168, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0882103385], 0, 2.9706177711486816, 1596155953.323366], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 646, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.03778872392592593], 0, 2.55721378326416, 1596156307.646097], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.080892422875], 0, 2.7656009197235107, 1596156668.5983698], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 752, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.039426820275], 0, 3.6095218658447266, 1596156976.1887949], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 285, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0940554159375], 0, 3.3694167137145996, 1596157390.9254239], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 224, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03917848638461538], 0, 2.051431179046631, 1596157821.6718287], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 339, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0839289921875], 0, 2.887434720993042, 1596158164.3100765], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.021393914824324324], 0, 3.3672730922698975, 1596158536.8856122], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg19.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.007115023203389831], 0, 2.768251419067383, 1596144703.962029], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 174, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0841481314375], 0, 2.7844526767730713, 1596145414.3013794], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 142, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.035597245155555554], 0, 3.674520969390869, 1596146071.3330612], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 167, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0745722280625], 0, 2.5429179668426514, 1596146288.8981009], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 752, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.036539646674418606], 0, 3.466627597808838, 1596146623.0227365], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 278, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.089879415875], 0, 2.984203577041626, 1596146950.6502557], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 304, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.03938657192307692], 0, 2.0449047088623047, 1596147223.5301533], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 338, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.083426568875], 0, 2.8214197158813477, 1596147490.0761108], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 237, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.021148991291666665], 0, 3.2898383140563965, 1596147819.55299], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/1thread-log/x86/onnx/1batch/x86_onnx_1batch_vgg19_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 203, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.00742780922962963], 0, 1.8241431713104248, 1596159050.6248121], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 128, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.0865517315], 0, 3.0230109691619873, 1596159548.2950113], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 146, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.03846693814814815], 0, 2.253309488296509, 1596159892.515713], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 163, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 8]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.078220444875], 0, 2.9490809440612793, 1596160534.6393447], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 180, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.037962127725], 0, 3.7594969272613525, 1596160911.1891222], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 351, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0882403360625], 0, 3.1485836505889893, 1596161305.9530368], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 305, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.03977562865384616], 0, 2.1624677181243896, 1596161686.9588068], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 338, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0838196716875], 0, 2.882793664932251, 1596162008.7756336], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 238, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.021925893541666668], 0, 3.577045440673828, 1596162327.5734894], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/gpu/onnx/1batch/gpu_onnx_1batch_alexnet.log: -------------------------------------------------------------------------------- 1 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 11, 11], "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 11, 11, "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {"i": 43739, "t": "direct", "c": null, "e": [["tile_f", "sp", [-1, 2, 4, 4]], ["tile_y", "sp", [-1, 1, 55, 1]], ["tile_x", "sp", [-1, 1, 1, 1]], ["tile_rc", "sp", [-1, 1]], ["tile_ry", "sp", [-1, 11]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[7.151026968894771e-05], 0, 15.608054876327515, 1597021834.8837514], "v": 0.1} 2 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 27, 27], "float32"], ["TENSOR", [192, 64, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 27, 27, "float32"], [192, 64, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 581100, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 2, 3]], ["tile_x", "sp", [-1, 7, 28, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[8.344756071564522e-05], 0, 50.94522190093994, 1597022077.3418329], "v": 0.1} 3 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 192, 13, 13], "float32"], ["TENSOR", [384, 192, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 13, 13, "float32"], [384, 192, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 377052, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 4, 3, 4]], ["tile_x", "sp", [-1, 1, 49, 1]], ["tile_rc", "sp", [-1, 24]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[3.37322729844413e-05], 0, 109.75009059906006, 1597023365.1080105], "v": 0.1} 4 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 384, 13, 13], "float32"], ["TENSOR", [256, 384, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 13, 13, "float32"], [256, 384, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 116851, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 16, 1]], ["tile_x", "sp", [-1, 1, 7, 7]], ["tile_rc", "sp", [-1, 12]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[5.4399347150259065e-05], 0, 62.83663773536682, 1597024365.999884], "v": 0.1} 5 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 13, 13], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 13, 13, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 83420, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 4, 4]], ["tile_x", "sp", [-1, 1, 49, 1]], ["tile_rc", "sp", [-1, 32]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[3.1335635813404195e-05], 0, 20.848942756652832, 1597024558.8959937], "v": 0.1} 6 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/gpu/onnx/1batch/gpu_onnx_1batch_vgg16.log: -------------------------------------------------------------------------------- 1 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 114839381, "t": "direct", "c": null, "e": [["tile_f", "sp", [-1, 4, 4, 2]], ["tile_y", "sp", [-1, 1, 1, 2]], ["tile_x", "sp", [-1, 1, 32, 1]], ["tile_rc", "sp", [-1, 3]], ["tile_ry", "sp", [-1, 3]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[2.3808705730337077e-05], 0, 3.950860023498535, 1597025847.6298623], "v": 0.1} 2 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1644037, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 8, 8]], ["tile_x", "sp", [-1, 2, 28, 2]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002731917358708189], 0, 48.54783892631531, 1597025921.780228], "v": 0.1} 3 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 177675, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 4, 4]], ["tile_x", "sp", [-1, 4, 98, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.00014082873934527485], 0, 4.8769402503967285, 1597026401.2567792], "v": 0.1} 4 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1813777, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 8, 8]], ["tile_x", "sp", [-1, 2, 28, 1]], ["tile_rc", "sp", [-1, 8]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00017694997385103012], 0, 6.031948566436768, 1597027895.914515], "v": 0.1} 5 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 86697, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 16, 2]], ["tile_x", "sp", [-1, 2, 14, 1]], ["tile_rc", "sp", [-1, 32]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[9.801227246732027e-05], 0, 94.47634816169739, 1597028136.972019], "v": 0.1} 6 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 664249, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 8, 8]], ["tile_x", "sp", [-1, 2, 14, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.00015064380297901152], 0, 52.532723903656006, 1597029246.1794667], "v": 0.1} 7 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 885622, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 4]], ["tile_x", "sp", [-1, 2, 14, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.00011878040783034259], 0, 3.5120179653167725, 1597029828.1968377], "v": 0.1} 8 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 995622, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 8, 4]], ["tile_x", "sp", [-1, 2, 14, 1]], ["tile_rc", "sp", [-1, 32]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.00022082443843843844], 0, 43.46766757965088, 1597030487.723897], "v": 0.1} 9 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 103050, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 32, 2]], ["tile_x", "sp", [-1, 1, 7, 7]], ["tile_rc", "sp", [-1, 64]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[8.529888950058987e-05], 0, 82.1436219215393, 1597031797.7567012], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/gpu/onnx/1batch/gpu_onnx_1batch_vgg19.log: -------------------------------------------------------------------------------- 1 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 68120760, "t": "direct", "c": null, "e": [["tile_f", "sp", [-1, 4, 2, 2]], ["tile_y", "sp", [-1, 1, 4, 2]], ["tile_x", "sp", [-1, 1, 8, 1]], ["tile_rc", "sp", [-1, 1]], ["tile_ry", "sp", [-1, 1]], ["tile_rx", "sp", [-1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[4.19499814126394e-05], 0, 16.415100574493408, 1597032927.569081], "v": 0.1} 2 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 2621845, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 8, 2, 2]], ["tile_x", "sp", [-1, 1, 64, 1]], ["tile_rc", "sp", [-1, 4]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0002514178732083793], 0, 51.01491189002991, 1597033083.578129], "v": 0.1} 3 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1352596, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 4, 4]], ["tile_x", "sp", [-1, 4, 49, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[0.00012751478710394662], 0, 82.07050132751465, 1597033852.153047], "v": 0.1} 4 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 52153, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 8, 2, 4]], ["tile_x", "sp", [-1, 1, 196, 1]], ["tile_rc", "sp", [-1, 2]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.00023743959940944882], 0, 39.90784311294556, 1597034473.6508565], "v": 0.1} 5 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 154091, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 1, 32]], ["tile_x", "sp", [-1, 1, 98, 1]], ["tile_rc", "sp", [-1, 2]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 0]]}], "r": [[0.00011284428790199081], 0, 19.839908361434937, 1597035115.0255501], "v": 0.1} 6 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 814226, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 2, 8]], ["tile_x", "sp", [-1, 2, 98, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0001496300081967213], 0, 49.112138509750366, 1597036702.1930034], "v": 0.1} 7 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1085624, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 2, 2, 8]], ["tile_x", "sp", [-1, 2, 98, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.00012858194594594593], 0, 47.94846868515015, 1597037021.6955402], "v": 0.1} 8 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 533850, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 32, 4]], ["tile_x", "sp", [-1, 7, 14, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.00022203016053169732], 0, 28.076951026916504, 1597038448.6288197], "v": 0.1} 9 | {"i": ["cuda -model=unknown", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 97765, "t": "winograd", "c": null, "e": [["tile_b", "sp", [-1, 1, 1, 1]], ["tile_y", "sp", [-1, 1, 16, 2]], ["tile_x", "sp", [-1, 7, 7, 1]], ["tile_rc", "sp", [-1, 16]], ["auto_unroll_max_step", "ot", 128], ["unroll_explicit", "ot", 1]]}], "r": [[9.011374835255355e-05], 0, 66.90152382850647, 1597038746.4399135], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_alexnet.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 11, 11], "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 11, 11, "float32"], [4, 4], [2, 2], [1, 1], "NCHW", "float32"], {"i": 79, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 5]], ["unroll_kw", "ot", false]]}], "r": [[0.00022956165229646823], 0, 3.2858760356903076, 1596418594.527846], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 27, 27], "float32"], ["TENSOR", [192, 64, 5, 5], "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 27, 27, "float32"], [192, 64, 5, 5, "float32"], [1, 1], [2, 2], [1, 1], "NCHW", "float32"], {"i": 236, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 9]], ["unroll_kw", "ot", true]]}], "r": [[0.0007750931613070539], 0, 3.5438387393951416, 1596419078.6291635], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 192, 13, 13], "float32"], ["TENSOR", [384, 192, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 192, 13, 13, "float32"], [384, 192, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 306, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 96]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.00046395220591715974], 0, 3.982252597808838, 1596419385.8983676], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 384, 13, 13], "float32"], ["TENSOR", [256, 384, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 384, 13, 13, "float32"], [256, 384, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 204, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 96]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.0006165956966919092], 0, 3.5372023582458496, 1596419820.8565571], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 13, 13], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 13, 13, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 115, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 13]], ["unroll_kw", "ot", true]]}], "r": [[0.0004181733251041047], 0, 3.4078328609466553, 1596419927.9211326], "v": 0.1} 6 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_vgg11_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 216, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 14]], ["unroll_kw", "ot", false]]}], "r": [[0.0010033297200920245], 0, 2.515608549118042, 1596428849.8843882], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 756, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.005383160529032258], 0, 3.7412190437316895, 1596429199.7756197], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 245, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.006161490568047338], 0, 2.6498303413391113, 1596429510.6520622], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 1010, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.008229506529411765], 0, 4.083755254745483, 1596430112.3462455], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 217, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.005245289829145728], 0, 3.3366706371307373, 1596430552.1417232], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 338, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.00778388508974359], 0, 3.645012140274048, 1596431264.9730694], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 236, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 64]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0018743747927927926], 0, 5.038111686706543, 1596431911.6931493], "v": 0.1} 8 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_vgg13_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 48, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0007749434295900179], 0, 4.261383533477783, 1596432716.6257236], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 127, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.006494982375545851], 0, 5.016509532928467, 1596433392.8943868], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 145, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002789270905982906], 0, 4.009928226470947, 1596433859.4686456], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 280, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.006484865973799127], 0, 4.803851127624512, 1596434775.1962597], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 757, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0032510859921875], 0, 4.586193561553955, 1596435210.412156], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 351, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.007059429975903614], 0, 3.7002158164978027, 1596435557.5712738], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 304, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.002921994344927536], 0, 2.798251152038574, 1596436403.524336], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 337, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.006368897949748743], 0, 2.8624472618103027, 1596436720.9896698], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 235, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0017200022697899837], 0, 3.1631205081939697, 1596437095.2725296], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_vgg16.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.0005424911252357601], 0, 3.4935929775238037, 1596420301.6117923], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 707, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.006184083576923077], 0, 2.5293757915496826, 1596421060.1979296], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 144, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002703847277777778], 0, 4.202467679977417, 1596421447.5808868], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.005944406802083333], 0, 2.4452080726623535, 1596421703.8202536], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 312, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.002971506982404692], 0, 2.209097146987915, 1596422056.2404575], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 351, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0071268702202380955], 0, 2.560438632965088, 1596422272.0192564], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 305, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0029784808550295857], 0, 2.169175148010254, 1596422534.943569], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 339, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.00616093003125], 0, 2.498378276824951, 1596422850.474881], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.001602923093047035], 0, 3.6954805850982666, 1596423141.038578], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_vgg16_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 266, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 56]], ["unroll_kw", "ot", false]]}], "r": [[0.0005902194159811986], 0, 4.310074329376221, 1596437618.194491], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 131, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.00676393948], 0, 3.8549344539642334, 1596438408.0532758], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 644, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0031944942763157897], 0, 3.7438743114471436, 1596439017.3408623], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 161, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.00617631693], 0, 2.9232017993927, 1596439704.5337327], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 752, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.00294134817260274], 0, 4.3671252727508545, 1596440070.4522736], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 277, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 128]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.006901831700564971], 0, 2.611910104751587, 1596440681.1741369], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 305, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.002868468911392405], 0, 3.7107415199279785, 1596441353.881557], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 339, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.006260967634408602], 0, 2.5013175010681152, 1596441579.2225742], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.0016046661731160896], 0, 3.7170960903167725, 1596441814.2593622], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_vgg19.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 28]], ["unroll_kw", "ot", false]]}], "r": [[0.0010279065728862973], 0, 4.409317255020142, 1596423478.2075377], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 131, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.008064502673076923], 0, 3.9076805114746094, 1596424179.2082515], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 595, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 2]], ["unroll_kw", "ot", false]]}], "r": [[0.004441140848101266], 0, 3.8891024589538574, 1596424954.4591744], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 229, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.008208767855421687], 0, 4.298424243927002, 1596425483.2251189], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 744, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.005250375819905213], 0, 3.4994242191314697, 1596425904.071961], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 436, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 14]], ["unroll_kw", "ot", true]]}], "r": [[0.0075408437393939395], 0, 3.0936758518218994, 1596426382.773889], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 230, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 32]], ["tile_oc", "sp", [-1, 32]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.004070615406844107], 0, 4.061920166015625, 1596427130.5017076], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 349, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.01022889992079208], 0, 2.470304012298584, 1596427624.1226761], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 334, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 16]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 14]], ["unroll_kw", "ot", true]]}], "r": [[0.0019810853420669577], 0, 5.826848983764648, 1596428040.1580238], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm-log/V100/x86/onnx/1batch/x86_onnx_1batch_vgg19_bn.log: -------------------------------------------------------------------------------- 1 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [64, 3, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [64, 3, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 203, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 3]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", false]]}], "r": [[0.0005884158176964149], 0, 3.5786807537078857, 1596442237.397887], "v": 0.1} 2 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 224, 224], "float32"], ["TENSOR", [64, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 224, 224, "float32"], [64, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 128, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 4]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.00664794424858757], 0, 2.6991360187530518, 1596442839.5263894], "v": 0.1} 3 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [128, 64, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 112, 112, "float32"], [128, 64, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 141, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", true]]}], "r": [[0.002783849520979021], 0, 3.789486885070801, 1596443348.0094779], "v": 0.1} 4 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 112, 112], "float32"], ["TENSOR", [128, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 112, 112, "float32"], [128, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 736, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 16]], ["tile_ow", "sp", [-1, 4]], ["unroll_kw", "ot", false]]}], "r": [[0.0060426112239583335], 0, 2.5039925575256348, 1596443571.820053], "v": 0.1} 5 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [256, 128, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [256, 128, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 312, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0029883460089285717], 0, 2.1618564128875732, 1596443918.93725], "v": 0.1} 6 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 56, 56], "float32"], ["TENSOR", [256, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 56, 56, "float32"], [256, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 351, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 8]], ["unroll_kw", "ot", true]]}], "r": [[0.0071266013869047615], 0, 2.5642168521881104, 1596444140.8450437], "v": 0.1} 7 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [512, 256, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [512, 256, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 305, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 256]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.002980121360119048], 0, 2.163407564163208, 1596444394.0969992], "v": 0.1} 8 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 28, 28], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 28, 28, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 339, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.006320657708333333], 0, 2.583472490310669, 1596444711.9621189], "v": 0.1} 9 | {"i": ["llvm -mcpu=core-avx2", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "NCHW", "float32"], {"i": 239, "t": "direct", "c": null, "e": [["tile_ic", "sp", [-1, 512]], ["tile_oc", "sp", [-1, 8]], ["tile_ow", "sp", [-1, 7]], ["unroll_kw", "ot", true]]}], "r": [[0.001613079911794872], 0, 3.6917200088500977, 1596444989.694601], "v": 0.1} 10 | -------------------------------------------------------------------------------- /TVM/tvm_untuned.py: -------------------------------------------------------------------------------- 1 | import tvm 2 | import onnx 3 | import numpy as np 4 | import tvm.relay as relay 5 | import tvm.contrib.graph_runtime as runtime 6 | 7 | 8 | def get_onnx(path): 9 | ox = onnx.load(path) 10 | name = ox.graph.input[0].name 11 | input_shape = [i.dim_value for i in ox.graph.input[0].type.tensor_type.shape.dim] 12 | input_shape[0] = 1 13 | return ox, {name: input_shape} 14 | 15 | def create_target(device): 16 | if device == 'x86' or device == 'CPU': 17 | target = tvm.target.create('llvm -mcpu=core-avx2') 18 | elif device == 'gpu': 19 | target = tvm.target.cuda() 20 | return target 21 | 22 | def create_ctx(device): 23 | if device == 'x86' or device == 'CPU': 24 | ctx = tvm.cpu() 25 | elif device == 'gpu': 26 | ctx = tvm.gpu() 27 | return ctx 28 | 29 | def speed(graph, lib, params, ctx, input_dict): 30 | for name, shape in input_dict.items(): 31 | input_name, input_shape = name, shape 32 | input_data = tvm.nd.array(np.random.uniform(size=input_shape).astype("float32")) 33 | 34 | module = runtime.create(graph, lib, ctx) 35 | module.set_input(input_name, input_data) 36 | module.set_input(**params) 37 | 38 | ftimer = module.module.time_evaluator('run', ctx, number=1, repeat=15) 39 | prof_res = np.array(ftimer().results) 40 | return prof_res 41 | 42 | 43 | if __name__ == "__main__": 44 | import argparse 45 | import os 46 | 47 | parser = argparse.ArgumentParser() 48 | parser.add_argument('-m', '--model', default="", type=str) 49 | parser.add_argument('-d', '--device', default='', type=str) 50 | args = parser.parse_args() 51 | 52 | model_name = os.path.basename(args.model).replace('.onnx', '') 53 | 54 | ox, input_shape = get_onnx(args.model) 55 | target = create_target(args.device) 56 | mod, relay_params = relay.frontend.from_onnx(ox, input_shape) 57 | func = mod['main'] 58 | with relay.build_config(opt_level=3): 59 | graph, lib, params = relay.build(func, target, params=relay_params) 60 | 61 | ctx = create_ctx(args.device) 62 | prof_res = speed(graph, lib, params, ctx, input_shape) 63 | import time 64 | print(time.strftime('[localtime] %Y-%m-%d %H:%M:%S', time.localtime())) 65 | print(model_name) 66 | print(input_shape) 67 | for i in range(5, 15): 68 | print('-- {}, iteration time(s) is {:.6f}'.format(i, prof_res[i])) 69 | 70 | print('@@ {}, average time(s) is {:.6f}'.format(model_name, np.mean(prof_res[5:]))) 71 | print('FINISH') 72 | 73 | -------------------------------------------------------------------------------- /XLA/README.md: -------------------------------------------------------------------------------- 1 | # XLA 2 | 3 | ## Run end-to-end evaluation 4 | 5 | ```bash 6 | # run a single onnx model 7 | python tf2xla.py /path/to/onnx/file --device ['x86', 'gpu'] --thread ['single', 'multiple'] 8 | 9 | # run onnx models in a dictory 10 | # Firstly, modify the onnx_path in the following .py file 11 | python run_tf2xla_end2end.py --device ['x86', 'gpu'] --thread ['single', 'multiple'] 12 | ``` 13 | 14 | ## Profiling Tensorflow and XLA 15 | 16 | ### Reference 17 | 18 | [NVIDIA DLProf]({https://docs.nvidia.com/deeplearning/frameworks/dlprof-user-guide/) 19 | 20 | [tensorflow.profiler.experimental](https://www.tensorflow.org/api_docs/python/tf/profiler/experimental) 21 | 22 | ### Profiling with DLProf 23 | ```bash 24 | # requirements: nvidia-docker 25 | 26 | docker pull nvcr.io/nvidia/tensorflow:20.07-tf1-py3 27 | docker run --rm --gpus=1 --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -it -p6006:6006 -v/path/to/local/dir:/path/to/mapped/dir nvcr.io/nvidia/tensorflow:20.07-tf1-py3 28 | 29 | # turn off automatic mixed precision 30 | export TF_ENABLE_AUTO_MIXED_PRECISION=0 31 | 32 | # run NN model 33 | dlprof python run_keras_models.py -m ['mobilenet', 'resnet'] 34 | 35 | # analyze the profiled data 36 | dlprof reports=detail file_formats =csv 37 | dlprof --reports=detail --nsys_database=nsys_profile.sqlite 38 | # get the dlprof_detailed.csv 39 | ``` 40 | 41 | ### Profiling with tensorflow.profiler.experimental 42 | ```bash 43 | python run_tfprofiler.py -m ['mobilenet', 'resnet'] 44 | 45 | # visualize with tensorboard 46 | tensorboard --logdir=/path/to/your/log --port=6006 --host=0.0.0.0 47 | ``` 48 | -------------------------------------------------------------------------------- /XLA/run_keras_models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os, argparse 4 | 5 | 6 | parser = argparse.ArgumentParser(description = "run tf nvtx model") 7 | parser.add_argument("-m", "--model", choices=["resnet", "mobilenet"]) 8 | arg = parser.parse_args() 9 | 10 | os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0' 11 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 12 | #os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit" 13 | 14 | tf.keras.backend.clear_session() 15 | tf.config.optimizer.set_jit(True) # Enable XLA 16 | 17 | if arg.model == 'resnet': 18 | from resnet import ResNet50 19 | model = ResNet50(include_top=True, weights=None, input_tensor=None, input_shape=(224, 224, 3), 20 | pooling=None, classes=1000) 21 | 22 | if arg.model == 'mobilenet': 23 | from mobilenet_v2 import MobileNetV2 24 | model = MobileNetV2(alpha=1.0, include_top=True, weights=None, input_tensor=None, 25 | pooling=None, classes=1000, classifier_activation='softmax', input_shape=(224, 224, 3)) 26 | 27 | model.summary() 28 | 29 | shape=[1,224,224,3] 30 | picture = np.ones(shape, dtype=np.float32) 31 | 32 | nSteps=50 33 | for i in range(0, nSteps): 34 | ret = model.predict(picture1, batch_size=1) 35 | 36 | -------------------------------------------------------------------------------- /XLA/run_nsys_profile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export CUDA_LAUNCH_BLOCKING=1 4 | 5 | nsys profile \ 6 | -d 60 \ 7 | -w true \ 8 | --force-overwrite=true \ 9 | --sample=cpu \ 10 | -t 'nvtx,cuda,cublas,cudnn,openmp' \ 11 | --stop-on-exit=true \ 12 | --kill=sigkill \ 13 | -o $1"_nvtx_more" \ 14 | python run_keras_models.py -m $1 15 | -------------------------------------------------------------------------------- /XLA/run_tf2xla_end2end.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("../") 3 | from utils.utils import * 4 | import argparse 5 | 6 | if __name__ == "__main__": 7 | 8 | parser = argparse.ArgumentParser(description = "run xla test") 9 | parser.add_argument("-d", "--device", choices=["x86", "gpu"]) 10 | parser.add_argument("-t", "--thread", default="multiple", choices=["multiple","single"]) 11 | arg = parser.parse_args() 12 | 13 | onnx_path='~/hub' 14 | 15 | if arg.device=='x86': 16 | device='x86' 17 | log_path='../logs/Appendix_xla2_broadwell_5_15' 18 | if arg.thread=='single': 19 | log_path+='_single' 20 | if arg.device=='gpu': 21 | device='gpu' 22 | log_path='../logs/Appendix_xla2_v100_5_15' 23 | if arg.thread=='single': 24 | log_path+='_single' 25 | 26 | mkdir(log_path) 27 | 28 | 29 | f = listdir(onnx_path, '') 30 | 31 | for i, name in enumerate(f): 32 | on = os.path.join(onnx_path, name.strip()) 33 | cmd = 'python tf2xla.py %s -d %s -t %s' % (on, device, arg.thread) 34 | cmd += ' 2>&1 | tee ' + os.path.join(log_path,name.strip()) 35 | 36 | print('##NO %d' % i) 37 | print(cmd) 38 | os.system(cmd) 39 | 40 | for i, name in enumerate(['densenet121', 'vgg16', 'vgg19']): 41 | on = name 42 | cmd = 'python tf2xla.py %s -d %s -t %s' % (on, device, arg.thread) 43 | cmd += ' 2>&1 | tee ' + os.path.join(log_path,name.strip()) 44 | 45 | print('##NO %d' % (len(f)+i)) 46 | print(cmd) 47 | os.system(cmd) 48 | 49 | 50 | -------------------------------------------------------------------------------- /XLA/run_tfprofiler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os, argparse 4 | from datetime import datetime 5 | 6 | 7 | parser = argparse.ArgumentParser(description = "run tf nvtx model") 8 | parser.add_argument("-m", "--model", choices=["resnet", "mobilenet"]) 9 | arg = parser.parse_args() 10 | 11 | os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit" 12 | 13 | 14 | tf.keras.backend.clear_session() 15 | tf.config.optimizer.set_jit(True) # Enable XLA 16 | 17 | if arg.model == 'resnet': 18 | from tensorflow.keras.applications import ResNet50 19 | model = ResNet50(include_top=True, weights=None, input_tensor=None, input_shape=(224, 224, 3), 20 | pooling=None, classes=1000) 21 | 22 | if arg.model == 'mobilenet': 23 | from tensorflow.keras.applications import MobileNetV2 24 | model = MobileNetV2(alpha=1.0, include_top=True, weights=None, input_tensor=None, 25 | pooling=None, classes=1000, classifier_activation='softmax', input_shape=(224, 224, 3)) 26 | 27 | model.summary() 28 | 29 | shape=[1,224,224,3] 30 | picture = np.ones(shape, dtype=np.float32) 31 | #picture1 = np.random.rand(1,224,224,3) 32 | 33 | nSteps=50 34 | for i in range(0, nSteps): 35 | #picture = np.random.rand(1,224,224,3) 36 | if i==nSteps-1: 37 | tf.profiler.experimental.start('mobilenet_logdir'+str(i)) 38 | ret = model.predict(picture, batch_size=1) 39 | if i==nSteps-1: 40 | tf.profiler.experimental.stop() 41 | 42 | -------------------------------------------------------------------------------- /XLA/tf2xla.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_hub as hub 3 | import numpy as np 4 | import time 5 | import os, argparse 6 | 7 | 8 | def tf2xla(sm_path, nSteps=15): 9 | if sm_path == 'densenet121': 10 | model = tf.keras.applications.DenseNet121(weights='imagenet', classes=1000) 11 | elif sm_path == 'vgg16': 12 | model = tf.keras.applications.VGG16(weights='imagenet', classes=1000) 13 | elif sm_path == 'vgg19': 14 | model = tf.keras.applications.VGG19(weights='imagenet', classes=1000) 15 | 16 | else: 17 | model = tf.keras.Sequential([ 18 | hub.KerasLayer(sm_path) 19 | ]) 20 | 21 | shape=[1,224,224,3] 22 | picture = np.ones(shape, dtype=np.float32) 23 | 24 | avg_time=0 25 | for i in range(0, nSteps): 26 | time1 = time.time() 27 | ret = model.predict(picture, batch_size=1) 28 | time2 = time.time() 29 | if i < 5: 30 | continue 31 | avg_time += float(time2-time1) 32 | info = '-- %d, iteration time(s) is %.4f' %(i, float(time2-time1)) 33 | print(info) 34 | 35 | avg_time = avg_time / (nSteps-5) 36 | name = os.path.basename(sm_path) 37 | print("@@ %s, average time(s) is %.4f" % (name, avg_time)) 38 | print('FINISH') 39 | 40 | 41 | if __name__ == "__main__": 42 | 43 | parser = argparse.ArgumentParser(description = "run onnx xla model") 44 | parser.add_argument("onnx", help = "onnx model path") 45 | parser.add_argument("-d", "--device", default="x86", choices=["gpu","x86", "arm"]) 46 | parser.add_argument("-t", "--thread", default="multiple", choices=["multiple","single"]) 47 | arg = parser.parse_args() 48 | 49 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 50 | 51 | tf.keras.backend.clear_session() 52 | tf.config.optimizer.set_jit(True) # Enable XLA 53 | 54 | 55 | #if arg.device=='x86': 56 | if arg.thread=='single': 57 | tf.config.threading.set_inter_op_parallelism_threads(1) 58 | tf.config.threading.set_intra_op_parallelism_threads(1) 59 | 60 | 61 | print(time.strftime("[localtime] %Y-%m-%d %H:%M:%S", time.localtime()) ) 62 | 63 | tf2xla(arg.onnx) 64 | -------------------------------------------------------------------------------- /micro-models/README.md: -------------------------------------------------------------------------------- 1 | # Micro-models 2 | 3 | Extract the convolution layers from a ONNX model, and convert them to ONNX format. 4 | -------------------------------------------------------------------------------- /micro-models/convert_onnx_to_pb.py: -------------------------------------------------------------------------------- 1 | import sys, argparse 2 | sys.path.append("../") 3 | from utils.utils import * 4 | 5 | if __name__ == "__main__": 6 | 7 | onnx_path ='./mobilenet/' 8 | pb_path ='./mobilenet_pb/' 9 | 10 | f = listdir(onnx_path) 11 | for i, name in enumerate(f): 12 | on = os.path.join(onnx_path, name.strip()+'.onnx') 13 | pb = os.path.join(pb_path, name.strip()+'.pb') 14 | cmd = 'onnx-tf convert -i %s -o %s' % (on, pb) 15 | print('##NO %d' % i) 16 | print(cmd) 17 | os.system(cmd) 18 | 19 | 20 | -------------------------------------------------------------------------------- /micro-models/gen_conv_resnet50.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | 5 | def get_name2conv(model): 6 | name2conv = {} 7 | name2conv['conv1'] = model.conv1 8 | for layer_number in range(2, 6): 9 | layer = getattr(model, 'layer' + str(layer_number - 1)) 10 | for bottleneck_number in range(1, len(layer) + 1): 11 | bottleneck = layer[bottleneck_number - 1] 12 | nodes = vars(bottleneck)['_modules'] 13 | for node in nodes.keys(): 14 | if 'conv' in node: 15 | conv_name = 'conv' + str(layer_number) + '_x' + str(bottleneck_number) + '_' + node[4:] 16 | name2conv[conv_name] = nodes[node] 17 | elif 'downsample' == node: 18 | conv_name = 'conv' + str(layer_number) + '_x' + str(bottleneck_number) + '_shortcut' 19 | name2conv[conv_name] = nodes[node][0] 20 | 21 | return name2conv 22 | 23 | 24 | def get_name2inputSize(): 25 | name2inputSize = {'conv1': (1, 3, 224, 224), 26 | 'conv2_x1_1': (1, 64, 56, 56), 27 | 'conv2_x1_2': (1, 64, 56, 56), 28 | 'conv2_x2_2': (1, 64, 56, 56), 29 | 'conv2_x3_2': (1, 64, 56, 56), 30 | 'conv2_x1_3': (1, 64, 56, 56), 31 | 'conv2_x2_3': (1, 64, 56, 56), 32 | 'conv2_x3_3': (1, 64, 56, 56), 33 | 'conv2_x1_shortcut': (1, 64, 56, 56), 34 | 'conv2_x2_1': (1, 256, 56, 56), 35 | 'conv2_x3_1': (1, 256, 56, 56), 36 | 'conv3_x1_1': (1, 256, 56, 56), 37 | 'conv3_x1_2': (1, 128, 56, 56), 38 | 'conv3_x1_3': (1, 128, 28, 28), 39 | 'conv3_x2_3': (1, 128, 28, 28), 40 | 'conv3_x3_3': (1, 128, 28, 28), 41 | 'conv3_x4_3': (1, 128, 28, 28), 42 | 'conv3_x1_shortcut': (1, 256, 56, 56), 43 | 'conv3_x2_1': (1, 512, 28, 28), 44 | 'conv3_x3_1': (1, 512, 28, 28), 45 | 'conv3_x4_1': (1, 512, 28, 28), 46 | 'conv3_x2_2': (1, 128, 28, 28), 47 | 'conv3_x3_2': (1, 128, 28, 28), 48 | 'conv3_x4_2': (1, 128, 28, 28), 49 | 'conv4_x1_1': (1, 512, 28, 28), 50 | 'conv4_x1_2': (1, 256, 28, 28), 51 | 'conv4_x1_3': (1, 256, 14, 14), 52 | 'conv4_x2_3': (1, 256, 14, 14), 53 | 'conv4_x3_3': (1, 256, 14, 14), 54 | 'conv4_x4_3': (1, 256, 14, 14), 55 | 'conv4_x5_3': (1, 256, 14, 14), 56 | 'conv4_x6_3': (1, 256, 14, 14), 57 | 'conv4_x1_shortcut': (1, 512, 28, 28), 58 | 'conv4_x2_1': (1, 1024, 14, 14), 59 | 'conv4_x3_1': (1, 1024, 14, 14), 60 | 'conv4_x4_1': (1, 1024, 14, 14), 61 | 'conv4_x5_1': (1, 1024, 14, 14), 62 | 'conv4_x6_1': (1, 1024, 14, 14), 63 | 'conv4_x2_2': (1, 256, 14, 14), 64 | 'conv4_x3_2': (1, 256, 14, 14), 65 | 'conv4_x4_2': (1, 256, 14, 14), 66 | 'conv4_x5_2': (1, 256, 14, 14), 67 | 'conv4_x6_2': (1, 256, 14, 14), 68 | 'conv5_x1_1': (1, 1024, 14, 14), 69 | 'conv5_x1_2': (1, 512, 14, 14), 70 | 'conv5_x1_3': (1, 512, 7, 7), 71 | 'conv5_x2_3': (1, 512, 7, 7), 72 | 'conv5_x3_3': (1, 512, 7, 7), 73 | 'conv5_x1_shortcut': (1, 1024, 14, 14), 74 | 'conv5_x2_1': (1, 2048, 7, 7), 75 | 'conv5_x2_2': (1, 512, 7, 7), 76 | 'conv5_x3_1': (1, 2048, 7, 7), 77 | 'conv5_x3_2': (1, 512, 7, 7) 78 | } 79 | return name2inputSize 80 | 81 | 82 | def get_models_pytorch(model_name): 83 | model = getattr(torchvision.models, model_name)(pretrained=True).eval() 84 | 85 | return model 86 | 87 | 88 | def output_conv(conv, conv_name, input_size, onnx_path='./'): 89 | print(f'export {conv_name}') 90 | dummy_input = torch.randn(input_size) 91 | script = torch.jit.trace(conv, dummy_input) 92 | torch.onnx.export(script, dummy_input, onnx_path + conv_name + '.onnx', verbose=False, input_names=['data'], 93 | output_names=['output'], example_outputs=script(dummy_input)) 94 | 95 | 96 | if __name__ == '__main__': 97 | model = get_models_pytorch('resnet50') 98 | name2conv = get_name2conv(model) 99 | name2inputSize = get_name2inputSize() 100 | for name in name2conv.keys(): 101 | output_conv(name2conv[name], name, name2inputSize[name], onnx_path='./resnet50/') 102 | -------------------------------------------------------------------------------- /nGraph/README.md: -------------------------------------------------------------------------------- 1 | # nGraph 2 | 3 | ## Run end-to-end evaluation 4 | ```bash 5 | # run a single onnx model 6 | python ng.py /path/to/onnx/file --device ['x86', 'gpu'] --thread ['single', 'multiple'] 7 | 8 | # run onnx models in a dictory 9 | # Firstly, modify the onnx_path in the following .py file 10 | python run_ng_end2end.py --device ['x86', 'gpu'] --thread ['single', 'multiple'] 11 | ``` 12 | ## Run per-layer evaluation 13 | ```bash 14 | # generate the per-layer onnx micro-models 15 | cd micro-model 16 | python ['gen_conv_mobilenetv1_1.0.py', 'gen_conv_resnet50.py'] 17 | 18 | # nGraph can only generate timelines on CPU 19 | # modify the onnx_path in the following .py file 20 | python run_ng_perlayer.py --device ['x86', 'gpu'] --thread ['single', 'multiple'] 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /nGraph/ng.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | 3 | import argparse 4 | import os 5 | 6 | import onnx 7 | import ngraph as ng 8 | from ngraph_onnx.onnx_importer.importer import import_onnx_model 9 | import numpy as np 10 | 11 | import time 12 | from utils.utils import * 13 | 14 | parser = argparse.ArgumentParser(description = "run onnx ngraph model") 15 | parser.add_argument("onnx", help = "onnx model path") 16 | parser.add_argument("-d", "--device", default="x86", choices=["gpu","x86", "arm"]) 17 | parser.add_argument("-t", "--thread", default="multiple", choices=["multiple","single"]) 18 | arg = parser.parse_args() 19 | 20 | if arg.device=='x86': 21 | backend_name = 'CPU' 22 | if arg.thread=='single': 23 | os.environ["OMP_NUM_THREADS"] = '1' 24 | if arg.device=='gpu': 25 | backend_name = 'PlaidML' 26 | if arg.thread=='single': 27 | os.environ["OMP_NUM_THREADS"] = '1' 28 | os.environ["PLAIDML_DEVICE_IDS"] = 'opencl_nvidia_tesla_v100-pcie-32gb.1' 29 | #os.environ["PLAIDML_DEVICE_IDS"] = 'llvm_cpu.0' 30 | 31 | print(time.strftime("[localtime] %Y-%m-%d %H:%M:%S", time.localtime()) ) 32 | 33 | on, input_shape = get_onnx(arg.onnx) 34 | ng_function = import_onnx_model(on) 35 | print(ng_function) 36 | 37 | runtime = ng.runtime(backend_name) 38 | func = runtime.computation(ng_function) 39 | 40 | assert(len(input_shape) == 1) 41 | for value in input_shape.values(): 42 | shape = value 43 | 44 | print(shape) 45 | #shape=[1,3,224,224] 46 | picture = np.ones(shape, dtype=np.float32) 47 | 48 | nSteps=15 49 | avg_time=0 50 | for i in range(0, nSteps): 51 | time1 = time.time() 52 | ret = func(picture) 53 | time2 = time.time() 54 | if i < 5: 55 | continue 56 | avg_time+=float(time2-time1) 57 | info = '-- %d, iteration time(s) is %.4f' %(i, float(time2-time1)) 58 | print(info) 59 | avg_time = avg_time/10 60 | 61 | name = os.path.basename(arg.onnx) 62 | print("@@ %s, average time(s) is %.4f" % (name, avg_time)) 63 | print('FINISH') 64 | -------------------------------------------------------------------------------- /nGraph/run_ng_end2end.py: -------------------------------------------------------------------------------- 1 | import sys, argparse 2 | sys.path.append("../") 3 | from utils.utils import * 4 | 5 | if __name__ == "__main__": 6 | 7 | parser = argparse.ArgumentParser(description = "run ngraph test") 8 | parser.add_argument("-d", "--device", choices=["x86", "gpu"]) 9 | parser.add_argument("-t", "--thread", default="multiple", choices=["multiple","single"]) 10 | arg = parser.parse_args() 11 | 12 | onnx_path='~/onnx_2ng_0706/' 13 | 14 | if arg.device=='x86': 15 | device='x86' 16 | log_path='../logs/Appendix_ngraph_broadwell_5_15' 17 | if arg.thread=='single': 18 | log_path+='_single' 19 | if arg.device=='gpu': 20 | device='gpu' 21 | log_path='../logs/Appendix_ngraph_v100_5_15' 22 | if arg.thread=='single': 23 | log_path+='_single' 24 | 25 | mkdir(log_path) 26 | 27 | f = listdir(onnx_path) 28 | #f = ['resnet50-v1-7'] 29 | #f = ['resnet50_v2'] 30 | for i, name in enumerate(f): 31 | on = os.path.join(onnx_path, name.strip()+'.onnx') 32 | cmd = 'python ng.py %s -d %s -t %s' % (on, device, arg.thread) 33 | cmd += ' 2>&1 | tee ' + os.path.join(log_path,name.strip()) 34 | 35 | print('##NO %d' % i) 36 | print(cmd) 37 | os.system(cmd) 38 | 39 | 40 | -------------------------------------------------------------------------------- /nGraph/run_ng_perlayer_tracing.py: -------------------------------------------------------------------------------- 1 | import sys, argparse, time 2 | sys.path.append("../") 3 | from utils.utils import * 4 | 5 | if __name__ == "__main__": 6 | 7 | parser = argparse.ArgumentParser(description = "run ngraph test") 8 | parser.add_argument("-d", "--device", choices=["x86", "gpu"]) 9 | parser.add_argument("-t", "--thread", default="multiple", choices=["multiple","single"]) 10 | arg = parser.parse_args() 11 | 12 | onnx_path='../micro-models/mobilenet/' 13 | 14 | timeline_path='../logs/ng_timelines/mobilenet/' 15 | 16 | mkdir(timeline_path) 17 | 18 | if arg.device=='x86': 19 | device='x86' 20 | if arg.device=='gpu': 21 | device='gpu' 22 | 23 | f = listdir(onnx_path) 24 | #f = ['resnet50-v1-7'] 25 | #f = ['resnet50_v2'] 26 | os.system('rm Function_0.timeline.json') 27 | for i, name in enumerate(f): 28 | on = os.path.join(onnx_path, name.strip()+'.onnx') 29 | json = os.path.join(timeline_path, name.strip()+'.json') 30 | cmd = 'NGRAPH_CPU_TRACING=1 ' 31 | cmd += 'python ng.py %s -d %s -t %s' % (on, device, arg.thread) 32 | 33 | cmd2 = 'mv Function_0.timeline.json %s' % json 34 | 35 | print('##NO %d' % i) 36 | print(cmd) 37 | os.system(cmd) 38 | time.sleep(0.1) 39 | print(cmd2) 40 | os.system(cmd2) 41 | 42 | 43 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buaa-hipo/dlcompiler-comparison/b0dca60d45e22cd8847a4ca5a4e38e24db7fc346/utils/__init__.py -------------------------------------------------------------------------------- /utils/gather_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import numpy as np 3 | import pandas as pd 4 | import argparse 5 | # 导入本py文件所在目录下的utils.py 6 | from utils import * 7 | 8 | ## Log输出格式 9 | ''' 10 | [localtime] 2020-07-16 11:51:46 11 | 12 | [1, 3, 224, 224] 13 | -- 5, iteration time(s) is 0.0149 14 | -- 6, iteration time(s) is 0.0148 15 | -- 7, iteration time(s) is 0.0149 16 | -- 8, iteration time(s) is 0.0151 17 | -- 9, iteration time(s) is 0.0150 18 | -- 10, iteration time(s) is 0.0149 19 | -- 11, iteration time(s) is 0.0149 20 | -- 12, iteration time(s) is 0.0148 21 | -- 13, iteration time(s) is 0.0145 22 | -- 14, iteration time(s) is 0.0118 23 | @@ resnet50_v2.onnx, average time(s) is 0.0146 24 | FINISH 25 | ''' 26 | 27 | def get_time(fileName=""): 28 | 29 | # 当跑15轮,抛弃前5轮做warmup时 30 | # 共有10个iteration time, 1个average time 31 | # 存到len=11的data list中 32 | data = [] 33 | file = open(fileName) 34 | for line in file: 35 | s='time(s) is ' 36 | fd = line.find(s) 37 | first = fd+len(s) 38 | if fd != -1: 39 | time = float(line[first:]) 40 | data.append(time) 41 | file.close() 42 | 43 | try: 44 | assert(len(data)==11) 45 | except: 46 | data = [-1 for i in range(0,11)] 47 | 48 | return data 49 | 50 | def get_time_dict(log_path): 51 | temp = {} 52 | for file in os.listdir(log_path): 53 | time = get_time(os.path.join(log_path, file)) 54 | temp.update({file:time}) 55 | return temp 56 | 57 | if __name__ == "__main__": 58 | 59 | parser = argparse.ArgumentParser(description = "collect and get csv") 60 | parser.add_argument("dir", help = "log path") 61 | parser.add_argument("-l","--list", help = "list path") 62 | arg = parser.parse_args() 63 | 64 | log = arg.dir 65 | csv = os.path.basename(log) 66 | data = get_time_dict(log) 67 | 68 | f = open(arg.list, 'r') 69 | s = {} 70 | a = list(f) 71 | for i in a: 72 | key = i.strip() 73 | if key in data: 74 | s.update({key:data[key]}) 75 | f.close() 76 | print(s) 77 | 78 | data = pd.DataFrame.from_dict(data=s, orient='index') 79 | 80 | label = ['it'+str(i) for i in range(0,10)] 81 | label.append('avg') 82 | data.columns=label 83 | 84 | 85 | data.to_csv('csv/' + csv +'.csv') 86 | -------------------------------------------------------------------------------- /utils/list: -------------------------------------------------------------------------------- 1 | resnet26 2 | resnet26d 3 | resnet18 4 | resnet18_v2 5 | resnet34 6 | resnet34_v2 7 | resnet50 8 | resnet50_v2 9 | resnet101 10 | resnet101_v2 11 | resnet152 12 | resnet152_v2 13 | mobilenet0.25 14 | mobilenet0.5 15 | mobilenet0.75 16 | mobilenet1.0 17 | mobilenetv2_0.25 18 | mobilenetv2_0.5 19 | mobilenetv2_0.75 20 | mobilenetv2_1.0 21 | squeezenet1_0 22 | squeezenet1_1 23 | densenet121 24 | densenet161 25 | densenet169 26 | densenet201 27 | alexnet 28 | vgg11 29 | vgg13 30 | vgg16 31 | vgg19 32 | vgg11_bn 33 | vgg13_bn 34 | vgg16_bn 35 | vgg19_bn 36 | wide_resnet50_2 37 | wide_resnet101_2 38 | shufflenet_v2_x0_5 39 | shufflenet_v2_x1_0 40 | mnasnet0_5 41 | mnasnet1_0 42 | resnext50_32x4d 43 | resnext101_32x8d 44 | mobilenet_v2 45 | googlenet 46 | inceptionv3 47 | -------------------------------------------------------------------------------- /utils/list2: -------------------------------------------------------------------------------- 1 | imagenet_resnet_v1_50_classification_3 2 | imagenet_resnet_v2_50_classification_3 3 | imagenet_resnet_v1_101_classification_3 4 | imagenet_resnet_v2_101_classification_3 5 | imagenet_resnet_v1_152_classification_3 6 | imagenet_resnet_v2_152_classification_3 7 | imagenet_mobilenet_v1_025_224_classification_3 8 | imagenet_mobilenet_v1_050_224_classification_3 9 | imagenet_mobilenet_v1_075_224_classification_3 10 | imagenet_mobilenet_v1_100_224_classification_3 11 | imagenet_mobilenet_v2_035_224_classification_3 12 | imagenet_mobilenet_v2_050_224_classification_3 13 | imagenet_mobilenet_v2_075_224_classification_3 14 | imagenet_mobilenet_v2_100_224_classification_3 15 | densenet121 16 | vgg16 17 | vgg19 18 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | def get_onnx(path, batch=1): 4 | import onnx 5 | on = onnx.load(open(path, "rb")) 6 | name = on.graph.input[0].name 7 | input_shape = [i.dim_value for i in on.graph.input[0].type.tensor_type.shape.dim] 8 | input_shape[0] = batch 9 | return on, {name : input_shape} 10 | 11 | def convert_onnx_tf(onnx_path, pb_path): 12 | import onnx 13 | from onnx_tf.backend import prepare 14 | 15 | onnx_model = onnx.load(onnx_path) # load onnx model 16 | tf_rep = prepare(onnx_model, strict=False) # prepare tf representation 17 | 18 | tf_rep.export_graph(pb_path) # export the model 19 | 20 | def mkdir(path): 21 | path=path.strip() 22 | path=path.rstrip("/") 23 | isExists=os.path.exists(path) 24 | 25 | if not isExists: 26 | os.makedirs(path) 27 | 28 | print(path+' create folder') 29 | return True 30 | else: 31 | print(path+' already exists') 32 | return False 33 | 34 | # 列出文件夹下的所有后缀为suffix的文件(文件夹留空) 35 | def listdir(path, suffix='.onnx'): 36 | list_name = [] 37 | for file in os.listdir(path): 38 | #file_path = os.path.join(path, file) 39 | #if os.path.isdir(file_path): 40 | # listdir(file_path, list_name) 41 | if os.path.splitext(file)[1]==suffix: 42 | f = os.path.splitext(file)[0] 43 | f = os.path.basename(f) 44 | list_name.append(f) 45 | return list_name 46 | 47 | 48 | --------------------------------------------------------------------------------