├── .dockerignore ├── .github ├── autosa_flow.png ├── autosa_logo.png └── autosa_mm.png ├── .gitignore ├── .gitmodules ├── .libs └── lt-autosa ├── ChangeLog ├── Dockerfile ├── LICENSE ├── README.md ├── autosa_config ├── autosa_config.json ├── hw_info.json ├── hw_info_libs │ ├── hw_info.json.ku3 │ ├── hw_info.json.u200 │ └── hw_info.json.u250 ├── module_group.json ├── optimizer_settings.json └── optimizer_settings_libs │ ├── gemm3_fp32.json │ ├── gemm3_int16.json │ ├── gemm3_int16_32.json │ ├── gemm3_int8.json │ ├── gemm3_int8_64.json │ ├── gemm4_fp32.json │ ├── mm_small.json │ ├── mttkrp_fp32.json │ ├── ttm_fp32.json │ └── ttmc_fp32.json ├── autosa_scripts ├── autosa.py ├── codegen.py ├── hls_scripts │ ├── hls_script.tcl │ └── hls_script_synth.tcl ├── intel_opencl_scripts │ ├── Makefile │ ├── common │ │ ├── inc │ │ │ └── AOCLUtils │ │ │ │ ├── aocl_utils.h │ │ │ │ ├── opencl.h │ │ │ │ ├── options.h │ │ │ │ └── scoped_ptrs.h │ │ ├── readme.css │ │ └── src │ │ │ └── AOCLUtils │ │ │ ├── opencl.cpp │ │ │ └── options.cpp │ └── compile_design.sh ├── latency_model.py ├── module_group.py ├── odyssey │ ├── RL_utils.py │ ├── analyze.py │ ├── clean_up.sh │ ├── cst │ │ ├── hw_cst.json │ │ ├── single_test.json │ │ ├── u250.json │ │ └── vu9p.json │ ├── design.py │ ├── designs │ │ ├── kernel3.json │ │ └── register │ │ │ ├── kernel3.py │ │ │ ├── kernel3_0.py │ │ │ ├── kernel3_1.py │ │ │ └── kernel3_2.py │ ├── designs_lib │ │ ├── cnn │ │ │ ├── kernel0_0.json │ │ │ ├── kernel0_1.json │ │ │ ├── kernel0_2.json │ │ │ ├── kernel1_0.json │ │ │ ├── kernel1_1.json │ │ │ ├── kernel1_2.json │ │ │ ├── kernel2_0.json │ │ │ ├── kernel2_1.json │ │ │ ├── kernel2_2.json │ │ │ ├── kernel3_0.json │ │ │ ├── kernel3_1.json │ │ │ ├── kernel3_2.json │ │ │ ├── kernel4_0.json │ │ │ ├── kernel4_1.json │ │ │ ├── kernel4_2.json │ │ │ ├── kernel5_0.json │ │ │ ├── kernel5_1.json │ │ │ ├── kernel5_2.json │ │ │ ├── kernel6_0.json │ │ │ ├── kernel6_1.json │ │ │ ├── kernel6_2.json │ │ │ ├── kernel7_0.json │ │ │ ├── kernel7_1.json │ │ │ ├── kernel7_2.json │ │ │ ├── kernel8_0.json │ │ │ ├── kernel8_1.json │ │ │ ├── kernel8_2.json │ │ │ ├── kernel9_0.json │ │ │ ├── kernel9_1.json │ │ │ └── kernel9_2.json │ │ └── gemm │ │ │ ├── kernel0_0.json │ │ │ ├── kernel0_1.json │ │ │ ├── kernel0_2.json │ │ │ ├── kernel1_0.json │ │ │ ├── kernel1_1.json │ │ │ ├── kernel1_2.json │ │ │ ├── kernel2_0.json │ │ │ ├── kernel2_1.json │ │ │ ├── kernel2_2.json │ │ │ ├── kernel3_0.json │ │ │ ├── kernel3_1.json │ │ │ ├── kernel3_2.json │ │ │ ├── kernel4_0.json │ │ │ ├── kernel4_1.json │ │ │ ├── kernel4_2.json │ │ │ ├── kernel5_0.json │ │ │ ├── kernel5_1.json │ │ │ └── kernel5_2.json │ ├── explorer.py │ ├── main.py │ ├── requirements.txt │ ├── scripts │ │ ├── compute_network_info.py │ │ ├── grid_search_xgb_params.py │ │ ├── img2col.py │ │ ├── run_arch1.sh │ │ ├── run_arch1_free.sh │ │ ├── run_arch1_ml_cmp.sh │ │ ├── run_arch2.sh │ │ ├── run_arch3.sh │ │ ├── run_arch4.sh │ │ ├── run_dataflow_cmp_cnn.sh │ │ ├── run_dataflow_cmp_mm.sh │ │ ├── run_dataflow_cmp_mm_energy.sh │ │ ├── run_img2col_single.sh │ │ ├── run_method_cmp.sh │ │ ├── run_metric_cmp.sh │ │ ├── run_mutation_cmp.sh │ │ └── split_cnn_layers.py │ ├── search_task.py │ ├── solver.py │ ├── tuners.py │ ├── unit_test.py │ ├── utils.py │ └── workload │ │ ├── conv.json │ │ ├── mm.json │ │ ├── mm64.json │ │ ├── mobilenetv2.json │ │ ├── mobilenetv2_1.json │ │ ├── mobilenetv2_10.json │ │ ├── mobilenetv2_11.json │ │ ├── mobilenetv2_12.json │ │ ├── mobilenetv2_13.json │ │ ├── mobilenetv2_14.json │ │ ├── mobilenetv2_15.json │ │ ├── mobilenetv2_16.json │ │ ├── mobilenetv2_17.json │ │ ├── mobilenetv2_18.json │ │ ├── mobilenetv2_19.json │ │ ├── mobilenetv2_2.json │ │ ├── mobilenetv2_20.json │ │ ├── mobilenetv2_21.json │ │ ├── mobilenetv2_22.json │ │ ├── mobilenetv2_23.json │ │ ├── mobilenetv2_24.json │ │ ├── mobilenetv2_25.json │ │ ├── mobilenetv2_26.json │ │ ├── mobilenetv2_27.json │ │ ├── mobilenetv2_28.json │ │ ├── mobilenetv2_29.json │ │ ├── mobilenetv2_3.json │ │ ├── mobilenetv2_30.json │ │ ├── mobilenetv2_31.json │ │ ├── mobilenetv2_32.json │ │ ├── mobilenetv2_33.json │ │ ├── mobilenetv2_34.json │ │ ├── mobilenetv2_35.json │ │ ├── mobilenetv2_36.json │ │ ├── mobilenetv2_4.json │ │ ├── mobilenetv2_47.json │ │ ├── mobilenetv2_5.json │ │ ├── mobilenetv2_6.json │ │ ├── mobilenetv2_7.json │ │ ├── mobilenetv2_8.json │ │ ├── mobilenetv2_9.json │ │ ├── mobilenetv2_complete.json │ │ ├── mobilenetv2_conv3_1_0.json │ │ ├── mobilenetv2_first.json │ │ ├── mobilenetv2_first1.json │ │ ├── mobilenetv2_first2.json │ │ ├── mobilenetv2_half.json │ │ ├── mobilenetv2_img2col.json │ │ ├── mobilenetv2_no_first.json │ │ ├── mobilenetv2_original.json │ │ ├── mobilenetv2_test.json │ │ ├── mobilenetv2_test_single.json │ │ ├── resnet152.json │ │ ├── resnet50.json │ │ ├── resnet50_1.json │ │ ├── resnet50_10.json │ │ ├── resnet50_11.json │ │ ├── resnet50_12.json │ │ ├── resnet50_13.json │ │ ├── resnet50_14.json │ │ ├── resnet50_15.json │ │ ├── resnet50_16.json │ │ ├── resnet50_17.json │ │ ├── resnet50_18.json │ │ ├── resnet50_19.json │ │ ├── resnet50_2.json │ │ ├── resnet50_20.json │ │ ├── resnet50_21.json │ │ ├── resnet50_22.json │ │ ├── resnet50_23.json │ │ ├── resnet50_24.json │ │ ├── resnet50_25.json │ │ ├── resnet50_26.json │ │ ├── resnet50_27.json │ │ ├── resnet50_28.json │ │ ├── resnet50_29.json │ │ ├── resnet50_3.json │ │ ├── resnet50_30.json │ │ ├── resnet50_31.json │ │ ├── resnet50_32.json │ │ ├── resnet50_33.json │ │ ├── resnet50_34.json │ │ ├── resnet50_35.json │ │ ├── resnet50_36.json │ │ ├── resnet50_37.json │ │ ├── resnet50_38.json │ │ ├── resnet50_39.json │ │ ├── resnet50_4.json │ │ ├── resnet50_40.json │ │ ├── resnet50_41.json │ │ ├── resnet50_42.json │ │ ├── resnet50_43.json │ │ ├── resnet50_44.json │ │ ├── resnet50_45.json │ │ ├── resnet50_46.json │ │ ├── resnet50_47.json │ │ ├── resnet50_48.json │ │ ├── resnet50_49.json │ │ ├── resnet50_5.json │ │ ├── resnet50_6.json │ │ ├── resnet50_7.json │ │ ├── resnet50_8.json │ │ ├── resnet50_9.json │ │ ├── resnet50_batch4.json │ │ ├── resnet50_conv5_1.json │ │ ├── resnet50_img2col.json │ │ ├── resnet50_last.json │ │ ├── resnet50_last2.json │ │ ├── resnet50_original.json │ │ ├── vgg16-2-img2col.json │ │ ├── vgg16-3.json │ │ ├── vgg16-4.json │ │ ├── vgg16.json │ │ ├── vgg16_1.json │ │ ├── vgg16_10.json │ │ ├── vgg16_11.json │ │ ├── vgg16_12.json │ │ ├── vgg16_13.json │ │ ├── vgg16_2.json │ │ ├── vgg16_3.json │ │ ├── vgg16_4.json │ │ ├── vgg16_5.json │ │ ├── vgg16_6.json │ │ ├── vgg16_7.json │ │ ├── vgg16_8.json │ │ ├── vgg16_9.json │ │ └── vgg16_img2col.json ├── optimizer.py ├── optimizer_prune.py ├── pe_group.py ├── ppcg_changes │ ├── isl │ │ ├── ast_type.h │ │ ├── files.txt │ │ ├── isl_patch.sh │ │ ├── isl_schedule.c │ │ ├── isl_schedule_band.c │ │ ├── isl_schedule_band.h │ │ ├── isl_schedule_node.c │ │ ├── isl_schedule_tree.c │ │ ├── isl_schedule_tree.h │ │ ├── schedule.h │ │ ├── schedule_node.h │ │ └── vec.h │ └── ppcg │ │ └── files.txt ├── resource_model.py ├── tapa_scripts │ └── CMakeLists.txt ├── tuner │ ├── constraint.py │ ├── cst │ │ └── hw_cst.json │ ├── design.py │ ├── main.py │ ├── search_task.py │ ├── task │ │ ├── cnn.json │ │ ├── mm.json │ │ └── mm2.json │ ├── tuner.py │ ├── unit_test.py │ └── utils.py ├── tuning_scripts │ ├── cnn.sh │ ├── gemm.sh │ └── model_validate.sh └── vitis_scripts │ ├── Makefile │ └── connectivity.cfg ├── autosa_tests ├── cnn │ ├── Makefile │ ├── README.md │ ├── connectivity.cfg │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ ├── param_names.json │ └── simd_info.json ├── dnn_ops │ ├── dc_simd_info.json │ ├── fc_simd_info.json │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── pc_simd_info.json ├── large │ ├── cnn │ │ ├── Makefile │ │ ├── README.md │ │ ├── connectivity.cfg │ │ ├── hls_script.tcl │ │ ├── kernel.c │ │ ├── kernel.h │ │ ├── simd_info.json │ │ ├── step1-run-hls.tcl │ │ ├── step2-autobridge.py │ │ ├── step3-pack-xo.tcl │ │ └── step4-run-vitis.sh │ ├── mm │ │ ├── Makefile │ │ ├── README.md │ │ ├── connectivity.cfg │ │ ├── hls_script.tcl │ │ ├── kernel.c │ │ ├── kernel.h │ │ ├── simd_info.json │ │ ├── step1-run-hls.tcl │ │ ├── step2-autobridge.py │ │ ├── step3-pack-xo.tcl │ │ └── step4-run-vitis.sh │ ├── mm_block_sparse │ │ ├── Makefile │ │ ├── README.md │ │ ├── connectivity.cfg │ │ ├── hls_script.tcl │ │ ├── kernel.c │ │ ├── kernel.h │ │ └── simd_info.json │ ├── mm_int16 │ │ ├── Makefile │ │ ├── README.md │ │ ├── code.c │ │ ├── connectivity.cfg │ │ ├── hls_script.tcl │ │ ├── kernel.c │ │ ├── kernel.h │ │ ├── simd_info.json │ │ ├── step1-run-hls.tcl │ │ ├── step2-autobridge.py │ │ ├── step3-pack-xo.tcl │ │ ├── step4-run-vitis.sh │ │ └── unroll.py │ ├── mm_int8 │ │ ├── Makefile │ │ ├── README.md │ │ ├── code.c │ │ ├── connectivity.cfg │ │ ├── hls_script.tcl │ │ ├── kernel.c │ │ ├── kernel.h │ │ ├── kernel_kernel_opt.cpp │ │ ├── simd_info.json │ │ ├── step1-run-hls.tcl │ │ ├── step2-autobridge.py │ │ ├── step3-pack-xo.tcl │ │ ├── step4-run-vitis.sh │ │ └── unroll.py │ ├── mm_intel │ │ ├── Makefile │ │ ├── README.md │ │ ├── kernel.c │ │ ├── kernel.h │ │ └── simd_info.json │ ├── mttkrp │ │ ├── Makefile │ │ ├── README.md │ │ ├── connectivity.cfg │ │ ├── kernel.c │ │ ├── kernel.h │ │ ├── simd_info.json │ │ ├── step1-run-hls.tcl │ │ ├── step2-autobridge.py │ │ ├── step3-pack-xo.tcl │ │ └── step4-run-vitis.sh │ ├── ttm │ │ ├── Makefile │ │ ├── README.md │ │ ├── connectivity.cfg │ │ ├── kernel.c │ │ ├── kernel.h │ │ └── simd_info.json │ └── ttmc │ │ ├── Makefile │ │ ├── README.md │ │ ├── connectivity.cfg │ │ ├── kernel.c │ │ ├── kernel.h │ │ ├── simd_info.json │ │ ├── step1-run-hls.tcl │ │ ├── step2-autobridge.py │ │ ├── step3-pack-xo.tcl │ │ └── step4-run-vitis.sh ├── lu │ ├── Makefile │ ├── README.md │ ├── add_batch.py │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json ├── mm │ ├── Makefile │ ├── README.md │ ├── connectivity.cfg │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ ├── param_names.json │ └── simd_info.json ├── mm_block_sparse │ ├── Makefile │ ├── README.md │ ├── connectivity.cfg │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json ├── mm_catapult │ ├── README.md │ ├── directives.tcl │ ├── kernel.c │ ├── kernel.h │ ├── kernel_kernel_hw.h │ └── simd_info.json ├── mm_getting_started │ ├── Makefile │ ├── connectivity.cfg │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json ├── mm_hbm │ ├── Makefile │ ├── README.md │ ├── connectivity.cfg │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json ├── mm_hcl │ ├── README.md │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json ├── mm_hcl_intel │ ├── Makefile │ ├── README.md │ ├── kernel.c │ ├── kernel.h │ ├── kernel2.c │ └── simd_info.json ├── mm_int16 │ ├── Makefile │ ├── README.md │ ├── connectivity.cfg │ ├── hls_script.tcl │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json └── mm_intel │ ├── Makefile │ ├── README.md │ ├── kernel.c │ ├── kernel.h │ └── simd_info.json ├── clean.sh ├── docs ├── Makefile ├── conf.py ├── docker_image.rst ├── examples │ ├── cnn.rst │ ├── cnn_large.rst │ ├── dnn_ops.rst │ ├── images │ │ ├── array_hbm.png │ │ ├── cnn0_array.png │ │ ├── cnn10_array.png │ │ ├── cnn1_array.png │ │ ├── cnn2_2_array.png │ │ ├── cnn2_array.png │ │ ├── cnn3_array.png │ │ ├── cnn4_array.png │ │ ├── cnn5_array.png │ │ ├── cnn6_array.png │ │ ├── cnn7_array.png │ │ ├── cnn8_array.png │ │ ├── cnn9_array.png │ │ ├── cnn_w_reuse.png │ │ ├── dconv.png │ │ ├── fc.png │ │ ├── gemm0_array.png │ │ ├── gemm1_array.png │ │ ├── gemm2_array.png │ │ ├── gemm3_array.png │ │ ├── gemm4_array.png │ │ ├── gemm5_array.png │ │ ├── mm_dse.png │ │ ├── pconv.png │ │ └── resource_model.png │ ├── index.rst │ ├── lu.rst │ ├── mm.rst │ ├── mm_block_sparse.rst │ ├── mm_hbm.rst │ ├── mm_int16.rst │ ├── mm_int16_large.rst │ ├── mm_int8_large.rst │ ├── mm_large.rst │ ├── mttkrp_large.rst │ └── ttmc_large.rst ├── index.rst ├── install_from_source.rst ├── installation.rst ├── make.bat └── tutorials │ ├── auto_bridge.rst │ ├── auto_tuning_exhaustive.rst │ ├── auto_tuning_genetic.rst │ ├── catapult_backend.rst │ ├── getting_started.rst │ ├── hcl_integrate.rst │ ├── host_serialize.rst │ ├── images │ ├── 2d_array_mm.png │ ├── 2d_array_mm_schedule.png │ ├── ab_map.png │ ├── array_serialize.png │ ├── auto_tuner_flow.png │ ├── autobridge.jpg │ ├── catapult_0.png │ ├── catapult_1.png │ ├── catapult_2.png │ ├── catapult_3.png │ ├── catapult_4.png │ ├── catapult_5.png │ ├── catapult_6.png │ ├── catapult_7.png │ ├── catapult_sim.png │ ├── catapult_sim2.png │ ├── dense_array.png │ ├── dram_bw.png │ ├── flow.png │ ├── io_module_arch.png │ ├── mm_array_L1.png │ ├── mm_array_L2.png │ ├── mm_array_b.png │ ├── mm_array_opt.png │ ├── mm_array_unopt.png │ ├── mm_st_code.png │ ├── mm_tree.png │ ├── mm_tree_array_part.png │ ├── mm_tree_isolate.png │ ├── mm_tree_latency.png │ ├── mm_tree_param.png │ ├── mm_tree_simd.png │ ├── odyssey_flow.png │ ├── serialize_example.png │ ├── serialize_example2.png │ ├── sparse_array.png │ ├── sparse_example1.png │ ├── sparse_example2.png │ └── sparse_mm.png │ ├── index.rst │ ├── intel_backend.rst │ ├── matrix_multiplication.rst │ ├── optimize_array.rst │ ├── structural_sparsity.rst │ └── theory_background.rst ├── install.sh ├── ltmain.sh ├── requirements.txt └── src ├── ChangeLog ├── LICENSE ├── Makefile.am ├── README ├── autogen.sh ├── autosa_catapult_hls_c.cpp ├── autosa_catapult_hls_c.h ├── autosa_codegen.cpp ├── autosa_codegen.h ├── autosa_comm.cpp ├── autosa_comm.h ├── autosa_common.cpp ├── autosa_common.h ├── autosa_cpu.cpp ├── autosa_cpu.h ├── autosa_intel_opencl.cpp ├── autosa_intel_opencl.h ├── autosa_print.cpp ├── autosa_print.h ├── autosa_schedule_tree.cpp ├── autosa_schedule_tree.h ├── autosa_t2s.cpp ├── autosa_tapa_cpp.cpp ├── autosa_tapa_cpp.h ├── autosa_trans.cpp ├── autosa_trans.h ├── autosa_tuning.cpp ├── autosa_tuning.h ├── autosa_utils.cpp ├── autosa_utils.h ├── autosa_xilinx_hls_c.cpp ├── autosa_xilinx_hls_c.h ├── configure.ac ├── cpu.c ├── cpu.h ├── examples └── chemv.c ├── get_submodules.sh ├── grouping.c ├── grouping.h ├── hybrid.c ├── hybrid.h ├── json.hpp ├── m4 ├── ax_check_opencl.m4 ├── ax_check_openmp.m4 ├── ax_detect_git_head.m4 └── ax_submodule.m4 ├── main.cpp ├── ocl_utilities.c ├── ocl_utilities.h ├── opencl_test.sh.in ├── polybench_test.sh.in ├── ppcg.c ├── ppcg.h ├── ppcg_files ├── cuda.c ├── cuda.h ├── cuda_common.c ├── cuda_common.h ├── gpu.c ├── gpu.h ├── gpu_array_tile.c ├── gpu_array_tile.h ├── gpu_group.c ├── gpu_group.h ├── gpu_hybrid.c ├── gpu_hybrid.h ├── gpu_print.c ├── gpu_print.h ├── gpu_tree.c ├── gpu_tree.h ├── opencl.c └── opencl.h ├── ppcg_options.c ├── ppcg_options.h ├── print.c ├── print.h ├── schedule.c ├── schedule.h ├── tests ├── call.c ├── call2.c ├── call2_opencl_functions.cl ├── call3.c ├── call3_opencl_functions.cl ├── call4.c ├── call5.c ├── call_opencl_functions.cl ├── dead.c ├── iterator.c ├── live_out.c ├── local.c ├── loop.c ├── not_accessed.c ├── not_accessed_opencl_functions.cl ├── scalar.c ├── shared_sink.c ├── struct.c ├── struct2.c ├── struct3.c ├── struct4.c └── struct5.c ├── util.c ├── util.h └── version.c /.dockerignore: -------------------------------------------------------------------------------- 1 | src/.deps/ 2 | src/.libs/ 3 | src/Makefile 4 | src/Makefile.in 5 | src/aclocal.m4 6 | src/autom4te.cache/ 7 | src/compile 8 | src/config.guess 9 | src/config.log 10 | src/config.status 11 | src/config.sub 12 | src/configure 13 | src/depcomp 14 | src/gitversion.h 15 | src/install-sh 16 | src/libtool 17 | src/ltmain.sh 18 | src/m4/libtool.m4 19 | src/m4/ltoptions.m4 20 | src/m4/ltsugar.m4 21 | src/m4/ltversion.m4 22 | src/m4/lt~obsolete.m4 23 | src/missing 24 | src/ppcg 25 | src/test-driver 26 | src/build 27 | src/opencl_test.sh 28 | src/polybench_test.sh 29 | src/.nfs* 30 | src/*.o 31 | src/.vscode 32 | src/autosa 33 | src/tags 34 | 35 | autosa 36 | autosa.tmp 37 | .nfs* 38 | -------------------------------------------------------------------------------- /.github/autosa_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/.github/autosa_flow.png -------------------------------------------------------------------------------- /.github/autosa_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/.github/autosa_logo.png -------------------------------------------------------------------------------- /.github/autosa_mm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/.github/autosa_mm.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/.deps/ 2 | src/.libs/ 3 | src/Makefile 4 | src/Makefile.in 5 | src/aclocal.m4 6 | src/autom4te.cache/ 7 | src/compile 8 | src/config.guess 9 | src/config.log 10 | src/config.status 11 | src/config.sub 12 | src/configure 13 | src/depcomp 14 | src/gitversion.h 15 | src/install-sh 16 | src/libtool 17 | src/ltmain.sh 18 | src/m4/libtool.m4 19 | src/m4/ltoptions.m4 20 | src/m4/ltsugar.m4 21 | src/m4/ltversion.m4 22 | src/m4/lt~obsolete.m4 23 | src/missing 24 | src/ppcg 25 | src/test-driver 26 | src/build 27 | src/opencl_test.sh 28 | src/polybench_test.sh 29 | src/.nfs* 30 | src/*.o 31 | src/.vscode 32 | src/autosa 33 | src/tags 34 | 35 | autosa 36 | autosa.tmp 37 | .nfs* 38 | .vscode 39 | .libs 40 | autosa_scripts/__pycache__ 41 | docs/_build 42 | autosa_scripts/tuner/__pycache__ 43 | autosa_scripts/tuner/outdir 44 | 45 | autosa_scripts/odyssey/db/* 46 | autosa_scripts/odyssey/outdir/* 47 | autosa_scripts/odyssey/__pycache__ 48 | autosa_scripts/odyssey/tmp/* 49 | autosa_scripts/odyssey/solver/* 50 | autosa_scripts/odyssey/designs/register 51 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/isl"] 2 | path = src/isl 3 | url = git://repo.or.cz/isl.git 4 | [submodule "src/pet"] 5 | path = src/pet 6 | url = git://repo.or.cz/pet.git 7 | [submodule "src/cJSON"] 8 | path = src/cJSON 9 | url = https://github.com/DaveGamble/cJSON.git 10 | [submodule "src/barvinok"] 11 | path = src/barvinok 12 | url = https://repo.or.cz/barvinok.git 13 | -------------------------------------------------------------------------------- /.libs/lt-autosa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/.libs/lt-autosa -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | version: 0.01 2 | 2020-5-10 Jie Wang 3 | changes: 4 | - initial release of AutoSA 5 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Get the base Ubuntu image from Docker Hub 2 | FROM ubuntu:latest 3 | LABEL maintainer="jiewang@cs.ucla.edu" 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | # Update apps on the base image 7 | RUN apt-get -y update && apt-get install -y 8 | 9 | # Install the prerequisites 10 | RUN apt-get -y install apt-utils automake autoconf libtool libtool-bin pkg-config libgmp3-dev libyaml-dev python3.6 python3-pip git wget cmake vim gdb 11 | RUN apt-get -y install libllvm-9-ocaml-dev libllvm9 llvm-9 llvm-9-dev llvm-9-doc llvm-9-examples llvm-9-runtime clang-9 clang-tools-9 clang-9-doc libclang-common-9-dev libclang-9-dev libclang1-9 clang-format-9 python-clang-9 clangd-9 12 | RUN ln -s /usr/bin/llvm-config-9 /usr/bin/llvm-config 13 | 14 | # Install NTL for barvinok 15 | RUN mkdir /ntl 16 | WORKDIR /ntl 17 | RUN wget https://www.shoup.net/ntl/ntl-11.4.3.tar.gz 18 | RUN gunzip ntl-11.4.3.tar.gz 19 | RUN tar xf ntl-11.4.3.tar 20 | WORKDIR /ntl/ntl-11.4.3/src 21 | RUN ./configure NTL_GMP_LIP=on 22 | RUN make -j4 23 | RUN make install 24 | 25 | # Copy the current folder to the Docker image 26 | COPY . /usr/src/docker_autosa 27 | 28 | # Specify the working directory 29 | WORKDIR /usr/src/docker_autosa 30 | 31 | # Install AutoSA 32 | RUN ./install.sh 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License (MIT) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /autosa_config/autosa_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "space_time": { 3 | "mode": "manual" 4 | }, 5 | "array_part": { 6 | "enable": 1, 7 | "mode": "manual" 8 | }, 9 | "array_part_L2": { 10 | "enable": 1, 11 | "mode": "manual" 12 | }, 13 | "latency": { 14 | "enable": 1, 15 | "mode": "manual" 16 | }, 17 | "simd": { 18 | "enable": 1, 19 | "mode": "manual" 20 | }, 21 | "hbm": { 22 | "mode": "manual" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /autosa_config/hw_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": 5376, 3 | "DSP": 12288, 4 | "FF": 3456000, 5 | "LUT": 1728000, 6 | "URAM": 1280 7 | } 8 | -------------------------------------------------------------------------------- /autosa_config/hw_info_libs/hw_info.json.ku3: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM": 2160, 3 | "DSP": 2760, 4 | "FF": 663360, 5 | "LUT": 331680, 6 | "URAM": 0 7 | } 8 | -------------------------------------------------------------------------------- /autosa_config/hw_info_libs/hw_info.json.u200: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM": 4320, 3 | "DSP": 6840, 4 | "FF": 2364480, 5 | "LUT": 1182240, 6 | "URAM": 960 7 | } 8 | -------------------------------------------------------------------------------- /autosa_config/hw_info_libs/hw_info.json.u250: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": 5376, 3 | "DSP": 12288, 4 | "FF": 3456000, 5 | "LUT": 1728000, 6 | "URAM": 1280 7 | } 8 | -------------------------------------------------------------------------------- /autosa_config/module_group.json: -------------------------------------------------------------------------------- 1 | { 2 | "x": 8, 3 | "y": 1 4 | } 5 | -------------------------------------------------------------------------------- /autosa_scripts/hls_scripts/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_scripts/hls_scripts/hls_script_synth.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | #csim_design 17 | csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_scripts/intel_opencl_scripts/compile_design.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # - A script to compile and run the host program and bitstream on Intel OpenCL platform 4 | 5 | if [ $# != 1 ]; 6 | then 7 | echo "Usage: compile_design.sh [hw|emu|sim]" 8 | exit 9 | fi 10 | mode=$1 11 | echo $mode 12 | 13 | echo "Compiling the bitstream..." 14 | if [ "$mode" == "hw" ] 15 | then 16 | # Compile the bitstream 17 | # Change the board to your target board if necessary 18 | aoc src/kernel_kernel.cl -o bin/kernel_kernel.aocx -fp-relaxed -board=s10mx_hbm_es 19 | elif [ "$mode" == "emu" ] 20 | then 21 | # Compiling for emulator 22 | aoc -march=emulator src/kernel_kernel.cl -o bin/kernel_kernel.aocx -fp-relaxed -DEMULATE -legacy-emulator 23 | elif [ "$mode" == "sim" ] 24 | then 25 | # Compiling for simulator 26 | aoc -march=simulator src/kernel_kernel.cl -o bin/kernel_kernel.aocx -fp-relaxed 27 | else 28 | echo "Error: Unsupported mode" 29 | exit 30 | fi 31 | 32 | #echo "Compiling the host program..." 33 | ## Compile the host program 34 | #make 35 | 36 | #echo "Running the program..." 37 | #case "$mode" in 38 | # "hw") 39 | # # Run the host program 40 | # bin/host 41 | # ;; 42 | # "emu") 43 | # # Run the host program with the emulator 44 | # bin/host -emulator 45 | # ;; 46 | # "sim") 47 | # # Run the host program with the simulator 48 | # CL_CONTEXT_MPSIM_DEVICE_INTELFPGA=1 bin/host 49 | # ;; 50 | #esac 51 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/clean_up.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf db/* 4 | rm -rf opentuner.db 5 | rm -rf outdir/* 6 | rm -rf __pycache__ 7 | rm -rf tmp/* 8 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/cst/hw_cst.json: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": { 3 | "total": 5376, 4 | "ratio": 0.7 5 | }, 6 | "DSP": { 7 | "total": 12288, 8 | "ratio": 0.7 9 | }, 10 | "FF": { 11 | "total": 3456000, 12 | "ratio": 0.7 13 | }, 14 | "LUT": { 15 | "total": 1728000, 16 | "ratio": 0.7 17 | }, 18 | "URAM": { 19 | "total": 1280, 20 | "ratio": 0.7 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/cst/single_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": { 3 | "total": 300, 4 | "ratio": 1.0 5 | }, 6 | "DSP": { 7 | "total": 800, 8 | "ratio": 1.0 9 | }, 10 | "FF": { 11 | "total": 3456000, 12 | "ratio": 0.7 13 | }, 14 | "LUT": { 15 | "total": 1728000, 16 | "ratio": 0.7 17 | }, 18 | "URAM": { 19 | "total": 1280, 20 | "ratio": 0.7 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/cst/u250.json: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": { 3 | "total": 5376, 4 | "ratio": 0.7 5 | }, 6 | "DSP": { 7 | "total": 12288, 8 | "ratio": 0.7 9 | }, 10 | "FF": { 11 | "total": 3456000, 12 | "ratio": 0.7 13 | }, 14 | "LUT": { 15 | "total": 1728000, 16 | "ratio": 0.7 17 | }, 18 | "URAM": { 19 | "total": 1280, 20 | "ratio": 0.7 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/cst/vu9p.json: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": { 3 | "total": 4318, 4 | "ratio": 0.7 5 | }, 6 | "DSP": { 7 | "total": 6840, 8 | "ratio": 0.7 9 | }, 10 | "URAM": { 11 | "total": 960, 12 | "ratio": 0.7 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/requirements.txt: -------------------------------------------------------------------------------- 1 | bayesian-optimization==1.1.0 2 | certifi==2021.10.8 3 | dill @ file:///home/conda/feedstock_root/build_artifacts/dill_1623610058511/work 4 | joblib @ file:///tmp/build/80754af9/joblib_1635411271373/work 5 | mkl-fft==1.3.1 6 | mkl-random @ file:///tmp/build/80754af9/mkl_random_1626186066731/work 7 | mkl-service==2.4.0 8 | multiprocess @ file:///home/conda/feedstock_root/build_artifacts/multiprocess_1623774446079/work 9 | numpy @ file:///tmp/build/80754af9/numpy_and_numpy_base_1634095651905/work 10 | pathos @ file:///home/conda/feedstock_root/build_artifacts/pathos_1623937754918/work 11 | pox @ file:///home/conda/feedstock_root/build_artifacts/pox_1623773830989/work 12 | ppft @ file:///home/conda/feedstock_root/build_artifacts/ppft_1623774454681/work 13 | scikit-learn @ file:///tmp/build/80754af9/scikit-learn_1635187048948/work 14 | scipy @ file:///tmp/build/80754af9/scipy_1630606796912/work 15 | six @ file:///tmp/build/80754af9/six_1623709665295/work 16 | threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work 17 | xgboost==1.3.3 18 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/img2col.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | #with open('workload/vgg16.json') as f: 4 | #with open('workload/resnet50.json') as f: 5 | with open('workload/mobilenetv2.json') as f: 6 | data = json.load(f) 7 | 8 | for layer in data["workloads"]: 9 | i, o, r, c, p, q = layer["params"]["i"], layer["params"]["o"], layer["params"]["r"], \ 10 | layer["params"]["c"], layer["params"]["p"], layer["params"]["q"] 11 | gemm_i = o 12 | gemm_j = r * c 13 | gemm_k = i * p * q 14 | layer["params"] = {"i": gemm_i, "j": gemm_j, "k": gemm_k} 15 | layer["tags"] = ["gemm"] 16 | 17 | 18 | #with open("workload/vgg16_img2col.json", "w") as f: 19 | #with open("workload/resnet50_img2col.json", "w") as f: 20 | with open("workload/mobilenetv2_img2col.json", "w") as f: 21 | json.dump(data, f, indent=2) -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_arch1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | for design_idx in 1 4 7 10 13 16 19 22 25 28 7 | do 8 | python main.py --workload=vgg16 --stop-after-time=10 --use-db=0 --n-worker=32 --design-idx=$design_idx 9 | python main.py --workload=resnet50 --stop-after-time=10 --use-db=0 --n-worker=32 --design-idx=$design_idx 10 | python main.py --workload=mobilenetv2 --stop-after-time=10 --use-db=0 --n-worker=32 --design-idx=$design_idx 11 | done 12 | cp -r outdir/* tmp/ 13 | cd - 14 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_arch1_free.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | for design_idx in 1 4 7 10 13 16 19 22 25 28 7 | do 8 | #for layer_idx in {1..49} 9 | #do 10 | # python main.py --workload=resnet50_$layer_idx --stop-after-time=10 --use-db=0 --design-idx=$design_idx 11 | #done 12 | for layer_idx in {1..36} 13 | do 14 | python main.py --workload=mobilenetv2_$layer_idx --stop-after-time=10 --use-db=0 --design-idx=$design_idx 15 | done 16 | for layer_idx in {1..13} 17 | do 18 | python main.py --workload=vgg16_$layer_idx --stop-after-time=10 --use-db=0 --design-idx=$design_idx 19 | done 20 | done 21 | cp -r outdir/* tmp/ 22 | cd - 23 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_arch1_ml_cmp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | #rm -rf tmp/* 6 | for design_idx in 1 4 7 10 13 16 19 22 25 28 7 | do 8 | #python main.py --workload=vgg16 --stop-after-time=10 --use-db=0 --n-worker=32 --design-idx=$design_idx 9 | python main.py --workload=resnet50 --stop-after-time=15 --use-db=0 --n-worker=32 --design-idx=$design_idx 10 | done 11 | cp -r outdir/* tmp/ 12 | cd - 13 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_arch3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | 7 | #python main.py --workload=vgg16 --stop-after-time=10 --use-db=0 --n-worker=32 --explore-multi-acc --explore-fusion --method=customized2 --max-n-array=8 8 | #python main.py --workload=resnet50 --stop-after-time=10 --use-db=0 --n-worker=32 --explore-multi-acc --explore-fusion --method=customized2 --max-n-array=8 9 | #python main.py --workload=mobilenetv2 --stop-after-time=10 --use-db=0 --n-worker=32 --explore-multi-acc --explore-fusion --method=customized2 --max-n-array=8 10 | 11 | python main.py --workload=vgg16 --stop-after-time=10 --use-db=0 --n-worker=32 --explore-multi-acc --explore-fusion --method=customized2 --max-n-array=8 --batch-size=16 12 | python main.py --workload=resnet50 --stop-after-time=10 --use-db=0 --n-worker=32 --explore-multi-acc --explore-fusion --method=customized2 --max-n-array=8 --batch-size=16 13 | python main.py --workload=mobilenetv2 --stop-after-time=10 --use-db=0 --n-worker=32 --explore-multi-acc --explore-fusion --method=customized2 --max-n-array=8 --batch-size=16 14 | 15 | cp -r outdir/* tmp/ 16 | cd - 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_dataflow_cmp_cnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | #for design_idx in {0..29} 7 | for design_idx in 6 7 8 15 16 17 27 28 29 8 | do 9 | for layer_idx in {1..13} 10 | do 11 | python main.py --workload=vgg16_$layer_idx --stop-after-time=10 --use-db=0 --unit-task-method=genetic --design-idx=$design_idx --profiling 12 | done 13 | done 14 | cp -r outdir/* tmp/ 15 | cd - 16 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_dataflow_cmp_mm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | #for design_idx in {0..17} 7 | for design_idx in 0 8 | #for design_idx in {14..14} 9 | #for design_idx in 6 7 8 12 13 14 15 16 17 10 | do 11 | #python main.py --workload=mm --stop-after-time=10 --use-db=0 --unit-task-method=genetic --design-idx=$design_idx --profiling 12 | # Solver cmp 13 | python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --design-idx=$design_idx --profiling 14 | # Imperfect pruning 15 | #python main.py --workload=mm --stop-after-time=10 --use-db=0 --unit-task-method=genetic --design-idx=$design_idx --profiling --objective=off_chip_comm 16 | done 17 | 18 | cp -r outdir/* tmp/ 19 | cd - 20 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_dataflow_cmp_mm_energy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | for design_idx in {0..17} 7 | do 8 | python main.py --workload=mm --stop-after-time=10 --use-db=0 --unit-task-method=genetic --design-idx=$design_idx --objective=energy --profiling 9 | done 10 | 11 | cp -r outdir/* tmp/ 12 | cd - 13 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_img2col_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | python main.py --workload=vgg16_img2col --stop-after-time=10 --use-db=0 --n-worker=32 5 | python main.py --workload=resnet50_img2col --stop-after-time=10 --use-db=0 --n-worker=32 6 | python main.py --workload=mobilenetv2_img2col --stop-after-time=10 --use-db=0 --n-worker=32 7 | cd - 8 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_method_cmp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | for design_idx in {0..17} 7 | do 8 | python main.py --workload=mm --stop-after-time=300 --use-db=0 --unit-task-method=genetic --profiling --design-idx=$design_idx 9 | python main.py --workload=mm --stop-after-time=300 --use-db=0 --unit-task-method=random --profiling --design-idx=$design_idx 10 | python main.py --workload=mm --stop-after-time=300 --use-db=0 --unit-task-method=random_pruning --profiling --design-idx=$design_idx 11 | python main.py --workload=mm --stop-after-epoch=150000 --use-db=0 --unit-task-method=annealing --profiling --design-idx=$design_idx 12 | python main.py --workload=mm --stop-after-epoch=300 --use-db=0 --unit-task-method=bayesian --profiling --design-idx=$design_idx 13 | python main.py --workload=mm --stop-after-time=300 --use-db=0 --unit-task-method=open_tuner --profiling --design-idx=$design_idx 14 | python main.py --workload=mm --stop-after-epoch=50000 --use-db=0 --unit-task-method=RL --profiling --design-idx=$design_idx 15 | done 16 | cp -r outdir/* tmp/ 17 | cd - 18 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_metric_cmp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | rm -rf outdir/* 5 | rm -rf tmp/* 6 | #python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=0 7 | #python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=1 8 | #python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=2 9 | #python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=3 --objective=off_chip_comm 10 | python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=3 --objective=dsp_num 11 | #python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=4 12 | #python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --profiling --design-idx=5 13 | cp -r outdir/* tmp/ 14 | cd - 15 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/run_mutation_cmp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Use solver by default 4 | # Set epsilon to 0 when only using the factorization mutation 5 | cd .. 6 | rm -rf outdir/* 7 | rm -rf tmp/* 8 | python main.py --workload=mm --stop-after-time=20 --use-db=0 --unit-task-method=genetic --design-idx=3 --profiling 9 | cp -r outdir/* tmp/ 10 | cd - -------------------------------------------------------------------------------- /autosa_scripts/odyssey/scripts/split_cnn_layers.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | 4 | #network = "resnet50" 5 | network = "mobilenetv2" 6 | with open(f"../workload/{network}.json", "r") as f: 7 | network_data = json.load(f) 8 | layer_idx = 1 9 | for layer in network_data["workloads"]: 10 | data = {} 11 | data["workloads"] = [layer] 12 | with open(f"../workload/{network}_{layer_idx}.json", "w") as f: 13 | json.dump(data, f, indent=4) 14 | layer_idx += 1 -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/conv.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 1, 8 | "o": 6, 9 | "r": 5, 10 | "c": 5, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mm.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "gemm", 5 | "tags": ["gemm"], 6 | "params": { 7 | "i": 1024, 8 | "j": 1024, 9 | "k": 1024 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mm64.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "gemm", 5 | "tags": ["gemm"], 6 | "params": { 7 | "i": 64, 8 | "j": 64, 9 | "k": 64 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 3, 10 | "o": 32, 11 | "r": 112, 12 | "c": 112, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_10.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 32, 10 | "o": 144, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_11.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 144, 10 | "o": 32, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_12.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 32, 10 | "o": 144, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_13.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 144, 10 | "o": 32, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_14.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-0", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 32, 11 | "o": 192, 12 | "r": 28, 13 | "c": 28, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_15.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 192, 10 | "o": 64, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_16.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 192, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_17.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 192, 10 | "o": 64, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_18.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 192, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_19.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 192, 10 | "o": 64, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 32, 10 | "o": 32, 11 | "r": 112, 12 | "c": 112, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_20.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 192, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_21.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 192, 10 | "o": 64, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_22.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv6_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 384, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_23.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv6_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 384, 10 | "o": 96, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_24.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv6_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 96, 10 | "o": 384, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_25.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv6_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 384, 10 | "o": 96, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_26.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv6_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 96, 10 | "o": 384, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_27.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv6_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 384, 10 | "o": 96, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_28.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv7_1-0", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 96, 11 | "o": 576, 12 | "r": 14, 13 | "c": 14, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_29.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv7_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 576, 10 | "o": 160, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 32, 10 | "o": 16, 11 | "r": 112, 12 | "c": 112, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_30.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv7_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 160, 10 | "o": 576, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_31.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv7_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 576, 10 | "o": 160, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_32.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv7_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 160, 10 | "o": 576, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_33.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv7_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 576, 10 | "o": 160, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_34.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv8_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 160, 10 | "o": 960, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_35.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv8_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 960, 10 | "o": 320, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_36.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv9", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 320, 10 | "o": 1280, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_4.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-0", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 16, 11 | "o": 96, 12 | "r": 112, 13 | "c": 112, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_47.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 96, 10 | "o": 24, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | }, 17 | { 18 | "name": "conv3_1-1", 19 | "tags": [ 20 | "conv" 21 | ], 22 | "params": { 23 | "i": 24, 24 | "o": 96, 25 | "r": 56, 26 | "c": 56, 27 | "p": 1, 28 | "q": 1 29 | } 30 | }, 31 | { 32 | "name": "conv3_3-1", 33 | "tags": [ 34 | "conv" 35 | ], 36 | "params": { 37 | "i": 96, 38 | "o": 24, 39 | "r": 56, 40 | "c": 56, 41 | "p": 1, 42 | "q": 1 43 | } 44 | }, 45 | { 46 | "name": "conv4_1-0", 47 | "tags": [ 48 | "conv", 49 | "maxpool_2" 50 | ], 51 | "params": { 52 | "i": 24, 53 | "o": 144, 54 | "r": 56, 55 | "c": 56, 56 | "p": 1, 57 | "q": 1 58 | } 59 | } 60 | ] 61 | } 62 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_5.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 96, 10 | "o": 24, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_6.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 24, 10 | "o": 96, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_7.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 96, 10 | "o": 24, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_8.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-0", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 24, 11 | "o": 144, 12 | "r": 56, 13 | "c": 56, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_9.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 144, 10 | "o": 32, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_conv3_1_0.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-0", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 16, 11 | "o": 96, 12 | "r": 112, 13 | "c": 112, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_first.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 16, 10 | "o": 96, 11 | "r": 112, 12 | "c": 112, 13 | "p": 3, 14 | "q": 3 15 | } 16 | }, 17 | { 18 | "name": "conv3_3-0", 19 | "tags": [ 20 | "conv" 21 | ], 22 | "params": { 23 | "i": 96, 24 | "o": 24, 25 | "r": 56, 26 | "c": 56, 27 | "p": 3, 28 | "q": 3 29 | } 30 | }, 31 | { 32 | "name": "conv3_1-1", 33 | "tags": [ 34 | "conv" 35 | ], 36 | "params": { 37 | "i": 16, 38 | "o": 96, 39 | "r": 112, 40 | "c": 112, 41 | "p": 3, 42 | "q": 3 43 | } 44 | }, 45 | { 46 | "name": "conv3_3-1", 47 | "tags": [ 48 | "conv" 49 | ], 50 | "params": { 51 | "i": 96, 52 | "o": 24, 53 | "r": 56, 54 | "c": 56, 55 | "p": 3, 56 | "q": 3 57 | } 58 | } 59 | ] 60 | } 61 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_first1.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 16, 10 | "o": 96, 11 | "r": 112, 12 | "c": 112, 13 | "p": 3, 14 | "q": 3 15 | } 16 | }, 17 | { 18 | "name": "conv3_1-1", 19 | "tags": [ 20 | "conv" 21 | ], 22 | "params": { 23 | "i": 16, 24 | "o": 96, 25 | "r": 112, 26 | "c": 112, 27 | "p": 3, 28 | "q": 3 29 | } 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_first2.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 96, 10 | "o": 24, 11 | "r": 56, 12 | "c": 56, 13 | "p": 3, 14 | "q": 3 15 | } 16 | }, 17 | { 18 | "name": "conv3_3-1", 19 | "tags": [ 20 | "conv" 21 | ], 22 | "params": { 23 | "i": 96, 24 | "o": 24, 25 | "r": 56, 26 | "c": 56, 27 | "p": 3, 28 | "q": 3 29 | } 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/mobilenetv2_test_single.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-0", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 16, 11 | "o": 96, 12 | "r": 112, 13 | "c": 112, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 3, 11 | "o": 64, 12 | "r": 112, 13 | "c": 112, 14 | "p": 7, 15 | "q": 7 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_10.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_3-2", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 64, 11 | "o": 256, 12 | "r": 56, 13 | "c": 56, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_11.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_12.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_2-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_13.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 512, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_14.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_15.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_2-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_16.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 512, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_17.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_18.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_2-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_19.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 512, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 64, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_20.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_1-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_21.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_2-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 128, 10 | "o": 128, 11 | "r": 28, 12 | "c": 28, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_22.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3_3-3", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 128, 11 | "o": 512, 12 | "r": 28, 13 | "c": 28, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_23.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_24.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_2-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_25.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 1024, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_26.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 1024, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_27.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_2-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_28.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 1024, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_29.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 1024, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_2-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 64, 11 | "r": 56, 12 | "c": 56, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_30.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_2-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_31.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 1024, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_32.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 1024, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_33.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_2-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_34.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-3", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 1024, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_35.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-4", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 1024, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_36.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_2-4", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_37.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-4", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 1024, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_38.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_1-5", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 1024, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_39.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_2-5", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 256, 11 | "r": 14, 12 | "c": 14, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_4.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 256, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_40.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4_3-5", 5 | "tags": [ 6 | "conv", 7 | "maxpool_2" 8 | ], 9 | "params": { 10 | "i": 256, 11 | "o": 1024, 12 | "r": 14, 13 | "c": 14, 14 | "p": 1, 15 | "q": 1 16 | } 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_41.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 1024, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_42.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_2-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_43.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-0", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 2048, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_44.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 2048, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_45.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_2-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_46.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 2048, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_47.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 2048, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_48.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_2-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_49.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_3-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 512, 10 | "o": 2048, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_5.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 64, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_6.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_2-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 64, 11 | "r": 56, 12 | "c": 56, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_7.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_3-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 256, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_8.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_1-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 256, 10 | "o": 64, 11 | "r": 56, 12 | "c": 56, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_9.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2_2-2", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 64, 10 | "o": 64, 11 | "r": 56, 12 | "c": 56, 13 | "p": 3, 14 | "q": 3 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/resnet50_conv5_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5_1-1", 5 | "tags": [ 6 | "conv" 7 | ], 8 | "params": { 9 | "i": 2048, 10 | "o": 512, 11 | "r": 7, 12 | "c": 7, 13 | "p": 1, 14 | "q": 1 15 | } 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16-2-img2col.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1-1", 5 | "tags": ["gemm", "img2col"], 6 | "params": { 7 | "p0": 64, 8 | "p1": 50176, 9 | "p2": 27 10 | } 11 | }, 12 | { 13 | "name": "conv1-2", 14 | "tags": ["gemm", "img2col"], 15 | "params": { 16 | "p0": 64, 17 | "p1": 50176, 18 | "p2": 576 19 | } 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16-3.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 3, 8 | "o": 64, 9 | "r": 224, 10 | "c": 224, 11 | "p": 3, 12 | "q": 3 13 | } 14 | }, 15 | { 16 | "name": "conv1-2", 17 | "tags": ["conv", "maxpool_2"], 18 | "params": { 19 | "i": 64, 20 | "o": 64, 21 | "r": 224, 22 | "c": 224, 23 | "p": 3, 24 | "q": 3 25 | } 26 | }, 27 | { 28 | "name": "conv2-1", 29 | "tags": ["conv"], 30 | "params": { 31 | "i": 64, 32 | "o": 128, 33 | "r": 112, 34 | "c": 112, 35 | "p": 3, 36 | "q": 3 37 | } 38 | } 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16-4.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 3, 8 | "o": 64, 9 | "r": 224, 10 | "c": 224, 11 | "p": 3, 12 | "q": 3 13 | } 14 | }, 15 | { 16 | "name": "conv1-2", 17 | "tags": ["conv", "maxpool_2"], 18 | "params": { 19 | "i": 64, 20 | "o": 64, 21 | "r": 224, 22 | "c": 224, 23 | "p": 3, 24 | "q": 3 25 | } 26 | }, 27 | { 28 | "name": "conv2-1", 29 | "tags": ["conv"], 30 | "params": { 31 | "i": 64, 32 | "o": 128, 33 | "r": 112, 34 | "c": 112, 35 | "p": 3, 36 | "q": 3 37 | } 38 | }, 39 | { 40 | "name": "conv2-2", 41 | "tags": ["conv", "maxpool_2"], 42 | "params": { 43 | "i": 128, 44 | "o": 128, 45 | "r": 112, 46 | "c": 112, 47 | "p": 3, 48 | "q": 3 49 | } 50 | } 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 3, 8 | "o": 64, 9 | "r": 224, 10 | "c": 224, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_10.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4-3", 5 | "tags": ["conv", "maxpool_2"], 6 | "params": { 7 | "i": 512, 8 | "o": 512, 9 | "r": 28, 10 | "c": 28, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_11.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 512, 8 | "o": 512, 9 | "r": 14, 10 | "c": 14, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_12.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5-2", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 512, 8 | "o": 512, 9 | "r": 14, 10 | "c": 14, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_13.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv5-3", 5 | "tags": ["conv", "maxpool_2"], 6 | "params": { 7 | "i": 512, 8 | "o": 512, 9 | "r": 14, 10 | "c": 14, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv1-2", 5 | "tags": ["conv", "maxpool_2"], 6 | "params": { 7 | "i": 64, 8 | "o": 64, 9 | "r": 224, 10 | "c": 224, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 64, 8 | "o": 128, 9 | "r": 112, 10 | "c": 112, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_4.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv2-2", 5 | "tags": ["conv", "maxpool_2"], 6 | "params": { 7 | "i": 128, 8 | "o": 128, 9 | "r": 112, 10 | "c": 112, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_5.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 128, 8 | "o": 256, 9 | "r": 56, 10 | "c": 56, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_6.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3-2", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 256, 8 | "o": 256, 9 | "r": 56, 10 | "c": 56, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_7.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv3-3", 5 | "tags": ["conv", "maxpool_2"], 6 | "params": { 7 | "i": 256, 8 | "o": 256, 9 | "r": 56, 10 | "c": 56, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_8.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4-1", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 256, 8 | "o": 512, 9 | "r": 28, 10 | "c": 28, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/odyssey/workload/vgg16_9.json: -------------------------------------------------------------------------------- 1 | { 2 | "workloads": [ 3 | { 4 | "name": "conv4-2", 5 | "tags": ["conv"], 6 | "params": { 7 | "i": 512, 8 | "o": 512, 9 | "r": 28, 10 | "c": 28, 11 | "p": 3, 12 | "q": 3 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /autosa_scripts/ppcg_changes/isl/files.txt: -------------------------------------------------------------------------------- 1 | include/isl/schedule_node.h 2 | include/isl/ast_type.h 3 | include/isl/schedule.h 4 | isl_schedule_tree.c 5 | isl_schedule_tree.h 6 | isl_schedule_node.c 7 | isl_schedule_band.c 8 | isl_schedule_band.h 9 | isl_schedule.c 10 | -------------------------------------------------------------------------------- /autosa_scripts/ppcg_changes/isl/isl_patch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cp ast_type.h ../../../src/isl/include/isl/ 3 | cp schedule_node.h ../../../src/isl/include/isl/ 4 | cp schedule.h ../../../src/isl/include/isl/ 5 | cp vec.h ../../../src/isl/include/isl/ 6 | cp isl_schedule_tree.c ../../../src/isl/ 7 | cp isl_schedule_tree.h ../../../src/isl/ 8 | cp isl_schedule_node.c ../../../src/isl/ 9 | cp isl_schedule_band.c ../../../src/isl/ 10 | cp isl_schedule_band.h ../../../src/isl/ 11 | cp isl_schedule.c ../../../src/isl/ 12 | -------------------------------------------------------------------------------- /autosa_scripts/ppcg_changes/ppcg/files.txt: -------------------------------------------------------------------------------- 1 | cpu.h 2 | cuda.h 3 | opencl.h 4 | ppcg_options.h 5 | ppcg_options.c 6 | ppcg.c 7 | ppcg.h 8 | util.h 9 | print.h 10 | schedule.h 11 | gpu.h 12 | -------------------------------------------------------------------------------- /autosa_scripts/tuner/constraint.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | class Constraint(object): 4 | def __init__(self, cst_path): 5 | with open(cst_path) as f: 6 | data = json.load(f) 7 | self.hw_cst = {} 8 | for res in data: 9 | self.hw_cst[res] = data[res]["total"] * data[res]["ratio"] 10 | self.hw_cst[f'{res}_total'] = data[res]["total"] 11 | 12 | def __repr__(self): 13 | ret = "" 14 | ret += f"b{int(self.hw_cst['BRAM18K'])}" 15 | ret += f"d{int(self.hw_cst['DSP'])}" 16 | return ret -------------------------------------------------------------------------------- /autosa_scripts/tuner/cst/hw_cst.json: -------------------------------------------------------------------------------- 1 | { 2 | "BRAM18K": { 3 | "total": 5376, 4 | "ratio": 0.7 5 | }, 6 | "DSP": { 7 | "total": 12288, 8 | "ratio": 0.7 9 | }, 10 | "FF": { 11 | "total": 3456000, 12 | "ratio": 0.7 13 | }, 14 | "LUT": { 15 | "total": 1728000, 16 | "ratio": 0.7 17 | }, 18 | "URAM": { 19 | "total": 1280, 20 | "ratio": 0.7 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /autosa_scripts/tuner/task/cnn.json: -------------------------------------------------------------------------------- 1 | { 2 | "tasks": [ 3 | { 4 | "name": "conv", 5 | "params": { 6 | "o": 6, 7 | "i": 1, 8 | "r": 5, 9 | "c": 5, 10 | "p": 3, 11 | "q": 3 12 | } 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /autosa_scripts/tuner/task/mm.json: -------------------------------------------------------------------------------- 1 | { 2 | "tasks": [ 3 | { 4 | "name": "gemm1", 5 | "params": { 6 | "p0": 1024, 7 | "p1": 1024, 8 | "p2": 1024 9 | } 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /autosa_scripts/tuner/task/mm2.json: -------------------------------------------------------------------------------- 1 | { 2 | "tasks": [ 3 | { 4 | "name": "gemm1", 5 | "params": { 6 | "p0": 1024, 7 | "p1": 1024, 8 | "p2": 1024 9 | } 10 | }, 11 | { 12 | "name": "gemm2", 13 | "params": { 14 | "p0": 512, 15 | "p1": 512, 16 | "p2": 512 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /autosa_scripts/vitis_scripts/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | 6 | -------------------------------------------------------------------------------- /autosa_tests/cnn/README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Neural Network (Single Layer, Small) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/cnn/kernel.c 10 | autosa_tests/cnn/kernel.h 11 | autosa_tests/cnn/simd_info.json 12 | autosa_tests/cnn/Makefile 13 | autosa_tests/cnn/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/cnn/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[4];kernel[]->array_part[8,8,4,8];kernel[]->latency[4,2,4];kernel[]->simd[1,1,1,2]}" --simd-info=./autosa_tests/cnn/simd_info.json --host-serialize 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/cnn/Makefile autosa.tmp/output/ 25 | cp autosa_tests/cnn/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` 34 | -------------------------------------------------------------------------------- /autosa_tests/cnn/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.cin:DDR[0] 3 | sp=kernel0_1.w:DDR[1] 4 | sp=kernel0_1.cout:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/cnn/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/cnn/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | #define O 16 7 | #define I 16 8 | #define R 16 9 | #define C 16 10 | #define K 3 11 | 12 | //#define O 6 13 | //#define I 1 14 | //#define R 5 15 | //#define C 5 16 | //#define K 3 17 | -------------------------------------------------------------------------------- /autosa_tests/cnn/param_names.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": ["q", "p", "o", "r", "c", "i"], 3 | "kernel1": ["q", "p", "o", "r", "c", "i"], 4 | "kernel2": ["q", "p", "o", "r", "c", "i"], 5 | "kernel3": ["q", "p", "o", "r", "c", "i"], 6 | "kernel4": ["q", "p", "o", "r", "c", "i"], 7 | "kernel5": ["q", "p", "o", "r", "c", "i"], 8 | "kernel6": ["q", "p", "o", "r", "c", "i"], 9 | "kernel7": ["q", "p", "o", "r", "c", "i"], 10 | "kernel8": ["q", "p", "o", "r", "c", "i"], 11 | "kernel9": ["q", "p", "o", "r", "c", "i"] 12 | } 13 | -------------------------------------------------------------------------------- /autosa_tests/cnn/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y", "y", "y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y", "y", "y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y", "y", "y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y", "y", "y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y", "y", "y"] 16 | }, 17 | "kernel5": { 18 | "reduction": ["y", "y", "y"] 19 | }, 20 | "kernel6": { 21 | "reduction": ["y", "y", "y"] 22 | }, 23 | "kernel7": { 24 | "reduction": ["y", "y", "y"] 25 | }, 26 | "kernel8": { 27 | "reduction": ["y", "y", "y"] 28 | }, 29 | "kernel9": { 30 | "reduction": ["y", "y", "y"] 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /autosa_tests/dnn_ops/dc_simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel4": { 3 | "reduction": ["y", "y"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /autosa_tests/dnn_ops/fc_simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel2": { 3 | "reduction": ["y"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /autosa_tests/dnn_ops/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/dnn_ops/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | //#define PC 6 | //#define DC 7 | #define FC 8 | 9 | typedef float data_t; 10 | // point-wise conv 11 | #define PC_O 16 12 | #define PC_I 16 13 | #define PC_R 8 14 | #define PC_C 8 15 | #define PC_K 3 16 | 17 | // depth-wise conv 18 | #define DC_O 16 19 | #define DC_I 16 20 | #define DC_R 8 21 | #define DC_C 8 22 | #define DC_K 3 23 | 24 | // fc 25 | #define FC_I 16 26 | #define FC_J 16 27 | -------------------------------------------------------------------------------- /autosa_tests/dnn_ops/pc_simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel4": { 3 | "reduction": ["y", "y", "y"] 4 | }, 5 | "kernel5": { 6 | "reduction": ["y", "y", "y"] 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /autosa_tests/large/cnn/README.md: -------------------------------------------------------------------------------- 1 | # Convolutional Neural Network (Single Layer, Large) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/cnn/kernel.c 10 | autosa_tests/large/cnn/kernel.h 11 | autosa_tests/large/cnn/simd_info.json 12 | autosa_tests/large/cnn/Makefile 13 | autosa_tests/large/cnn/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/large/cnn/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[4];kernel[]->array_part[64,56,14,64];kernel[]->latency[4,4,7];kernel[]->simd[1,1,8]}" --simd-info=./autosa_tests/large/cnn/simd_info.json 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/large/cnn/Makefile autosa.tmp/output/ 25 | cp autosa_tests/large/cnn/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/cnn/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.cin:DDR[0] 3 | sp=kernel0_1.w:DDR[1] 4 | sp=kernel0_1.cout:DDR[3] 5 | -------------------------------------------------------------------------------- /autosa_tests/large/cnn/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/large/cnn/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | //#define O 512 7 | #define O 640 8 | #define I 512 9 | //#define R 60 10 | #define R 56 11 | #define C 56 12 | #define K 3 13 | 14 | //#define O 264 15 | //#define I 256 16 | //#define R 224 17 | //#define C 224 18 | //#define K 5 19 | -------------------------------------------------------------------------------- /autosa_tests/large/cnn/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel4": { 3 | "reduction": ["y", "y", "y"] 4 | }, 5 | "kernel5": { 6 | "reduction": ["y", "y", "y"] 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /autosa_tests/large/cnn/step1-run-hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | set_top kernel0 3 | add_files "src/kernel_kernel.cpp" 4 | #add_files -tb PATH_TO_TESTBENCH_FILE 5 | 6 | open_solution solution 7 | 8 | #u250 9 | set_part xcu250-figd2104-2L-e 10 | 11 | # u280 12 | #set_part xcu280-fsvh2892-2L-e 13 | 14 | # 300 MHz 15 | create_clock -period 3.333 16 | 17 | config_dataflow -strict_mode warning 18 | set_clock_uncertainty 27.000000% 19 | config_rtl -enable_maxiConservative=1 20 | config_interface -m_axi_addr64 21 | 22 | # to enable integration with Vitis 23 | config_sdx -target xocc 24 | 25 | #csim_design 26 | csynth_design 27 | close_project 28 | exit 29 | -------------------------------------------------------------------------------- /autosa_tests/large/cnn/step3-pack-xo.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | open_solution solution 3 | export_design -rtl verilog -format ip_catalog -xo kernel0.xo 4 | 5 | close_project 6 | puts "Pack XO successfully" 7 | exit 8 | -------------------------------------------------------------------------------- /autosa_tests/large/mm/README.md: -------------------------------------------------------------------------------- 1 | # Matrix Multiplication (Large) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/mm/kernel.c 10 | autosa_tests/large/mm/kernel.h 11 | autosa_tests/large/mm/simd_info.json 12 | autosa_tests/large/mm/Makefile 13 | autosa_tests/large/mm/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/large/mm/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[260,256,512];kernel[]->latency[20,16];kernel[]->simd[8]}" --simd-info=./autosa_tests/large/mm/simd_info.json --host-serialize 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/large/mm/Makefile autosa.tmp/output/ 25 | cp autosa_tests/large/mm/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/mm/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[3] 5 | -------------------------------------------------------------------------------- /autosa_tests/large/mm/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/large/mm/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | //typedef float data_t; 6 | typedef int data_t; 7 | //#define I 1024 8 | //#define J 1024 9 | //#define K 1024 10 | 11 | //#define I 1040 12 | //#define J 1024 13 | //#define K 1024 14 | 15 | #define I 208 16 | #define J 512 17 | #define K 256 18 | 19 | //#define I 1032 20 | //#define J 1024 21 | //#define K 1024 22 | 23 | //#define I 1024 24 | //#define J 1032 25 | //#define K 1024 26 | 27 | //#define I 1024 28 | //#define J 1024 29 | //#define K 1032 30 | 31 | //#define I 1060 32 | //#define J 1024 33 | //#define K 1024 34 | 35 | //#define I 1040 36 | //#define J 1024 37 | //#define K 1024 38 | 39 | //#define I 1024 40 | //#define J 1056 41 | //#define K 1080 42 | -------------------------------------------------------------------------------- /autosa_tests/large/mm/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | }, 17 | "kernel5": { 18 | "reduction": ["y"] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /autosa_tests/large/mm/step1-run-hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | set_top kernel0 3 | add_files "src/kernel_kernel.cpp" 4 | #add_files -tb PATH_TO_TESTBENCH_FILE 5 | 6 | open_solution solution 7 | 8 | #u250 9 | set_part xcu250-figd2104-2L-e 10 | 11 | # u280 12 | #set_part xcu280-fsvh2892-2L-e 13 | 14 | # 300 MHz 15 | create_clock -period 3.333 16 | 17 | config_dataflow -strict_mode warning 18 | set_clock_uncertainty 27.000000% 19 | config_rtl -enable_maxiConservative=1 20 | config_interface -m_axi_addr64 21 | 22 | # to enable integration with Vitis 23 | config_sdx -target xocc 24 | 25 | #csim_design 26 | csynth_design 27 | close_project 28 | exit 29 | -------------------------------------------------------------------------------- /autosa_tests/large/mm/step3-pack-xo.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | open_solution solution 3 | export_design -rtl verilog -format ip_catalog -xo kernel0.xo 4 | 5 | close_project 6 | puts "Pack XO successfully" 7 | exit 8 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_block_sparse/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_block_sparse/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_block_sparse/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/README.md: -------------------------------------------------------------------------------- 1 | # Matrix Multiplication in int16 (Large) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/mm_int16/kernel.c 10 | autosa_tests/large/mm_int16/kernel.h 11 | autosa_tests/large/mm_int16/simd_info.json 12 | autosa_tests/large/mm_int16/Makefile 13 | autosa_tests/large/mm_int16/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/large/mm_int16/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[256,256,32];kernel[]->latency[16,16];kernel[]->simd[32]}" --simd-info=./autosa_tests/large/mm_int16/simd_info.json --host-serialize --data-pack-sizes="{kernel[]->A[32,32,64];kernel[]->B[32,32,64];kernel[]->C[32,32,64]}" 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/large/mm_int16/Makefile autosa.tmp/output/ 25 | cp autosa_tests/large/mm_int16/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[3] 5 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | // data_t A[I][K], B[K][J], C[I][J], C_golden[I][J]; 5 | static data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; 6 | 7 | for (int i = 0; i < I; i++) 8 | for (int k = 0; k < K; k++) { 9 | A[i][k] = rand() % 100; 10 | } 11 | 12 | for (int j = 0; j < J; j++) 13 | for (int k = 0; k < K; k++) { 14 | B[j][k] = rand() % 100; 15 | } 16 | 17 | #pragma scop 18 | for (int i = 0; i < I; i++) 19 | for (int j = 0; j < J; j++) { 20 | C[i][j] = 0; 21 | for (int k = 0; k < K; k++) { 22 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 23 | } 24 | } 25 | #pragma endscop 26 | 27 | for (int i = 0; i < I; i++) 28 | for (int j = 0; j < J; j++) { 29 | C_golden[i][j] = 0; 30 | for (int k = 0; k < K; k++) { 31 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 32 | } 33 | } 34 | 35 | int err = 0; 36 | for (int i = 0; i < I; i++) 37 | for (int j = 0; j < J; j++) { 38 | if (abs(C_golden[i][j] - C[i][j]) > 0.001) 39 | err++; 40 | } 41 | 42 | if (err) 43 | printf("Failed with %d errors!\n", err); 44 | else 45 | printf("Passed!\n"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef unsigned short data_t; 6 | #define I 1024 7 | #define J 1024 8 | #define K 1024 -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel3": { 9 | "reduction": ["y"] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/step1-run-hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | set_top kernel0 3 | add_files "src/kernel_kernel.cpp" 4 | #add_files -tb PATH_TO_TESTBENCH_FILE 5 | 6 | open_solution solution 7 | 8 | #u250 9 | set_part xcu250-figd2104-2L-e 10 | 11 | # u280 12 | #set_part xcu280-fsvh2892-2L-e 13 | 14 | # 300 MHz 15 | create_clock -period 3.333 16 | 17 | config_dataflow -strict_mode warning 18 | set_clock_uncertainty 27.000000% 19 | config_rtl -enable_maxiConservative=1 20 | config_interface -m_axi_addr64 21 | 22 | # to enable integration with Vitis 23 | config_sdx -target xocc 24 | 25 | #csim_design 26 | csynth_design 27 | close_project 28 | exit 29 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/step3-pack-xo.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | open_solution solution 3 | export_design -rtl verilog -format ip_catalog -xo kernel0.xo 4 | 5 | close_project 6 | puts "Pack XO successfully" 7 | exit 8 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int16/unroll.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | # Modify the parameters here 4 | UNROLL_FACTOR = 32 5 | DATA_T = 'unsigned short' 6 | 7 | # Generate the code 8 | data_type = DATA_T 9 | level = int(math.log2(UNROLL_FACTOR)) 10 | for layer in range(level - 1, -1, -1): 11 | pair = int(math.pow(2, layer)) 12 | for i in range(pair): 13 | # data_t tmp_[layer]_[pair] = tmp_[layer+1]_[pair*2]_[pair*2+1] 14 | if layer == level - 1: 15 | print(f'{data_type} mul_{layer}_{i}_0 = local_A[0][{i*2}] * local_B[0][{i*2}];') 16 | print(f'{data_type} add_{layer}_{i} = mul_{layer}_{i}_0 + local_A[0][{i*2+1}] * local_B[0][{i*2+1}];') 17 | else: 18 | print(f'{data_type} add_{layer}_{i} = add_{layer+1}_{i*2} + add_{layer+1}_{i*2+1};') 19 | print('local_C[c7][c6] += add_0_0;') 20 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/README.md: -------------------------------------------------------------------------------- 1 | # Matrix Multiplication in int8 (Large) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/mm_int8/kernel.c 10 | autosa_tests/large/mm_int8/kernel.h 11 | autosa_tests/large/mm_int8/simd_info.json 12 | autosa_tests/large/mm_int8/Makefile 13 | autosa_tests/large/mm_int8/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/large/mm_int8/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[264,256,64];kernel[]->latency[11,32];kernel[]->simd[64]}" --simd-info=./autosa_tests/large/mm_int8/simd_info.json --host-serialize --data-pack-sizes="{kernel[]->A[32,32,64];kernel[]->B[32,32,64];kernel[]->C[32,32,64]}" 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/large/mm_int8/Makefile autosa.tmp/output/ 25 | cp autosa_tests/large/mm_int8/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[3] 5 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | // data_t A[I][K], B[K][J], C[I][J], C_golden[I][J]; 5 | static data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; 6 | 7 | for (int i = 0; i < I; i++) 8 | for (int k = 0; k < K; k++) { 9 | A[i][k] = 1; 10 | } 11 | 12 | for (int j = 0; j < J; j++) 13 | for (int k = 0; k < K; k++) { 14 | B[j][k] = 1; 15 | } 16 | 17 | #pragma scop 18 | for (int i = 0; i < I; i++) 19 | for (int j = 0; j < J; j++) { 20 | C[i][j] = 0; 21 | for (int k = 0; k < K; k++) { 22 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 23 | } 24 | } 25 | #pragma endscop 26 | 27 | for (int i = 0; i < I; i++) 28 | for (int j = 0; j < J; j++) { 29 | C_golden[i][j] = 0; 30 | for (int k = 0; k < K; k++) { 31 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 32 | } 33 | } 34 | 35 | int err = 0; 36 | for (int i = 0; i < I; i++) 37 | for (int j = 0; j < J; j++) { 38 | if (abs(C_golden[i][j] - C[i][j]) > 0.001) 39 | err++; 40 | } 41 | 42 | if (err) 43 | printf("Failed with %d errors!\n", err); 44 | else 45 | printf("Passed!\n"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef char data_t; 6 | //#define I 1024 7 | //#define J 1024 8 | //#define K 1024 9 | 10 | // Test case 1 11 | // kernel3 2D IxJ 12 | #define I 1056 13 | #define J 1024 14 | #define K 1024 -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel3": { 9 | "reduction": ["y"] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/step1-run-hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | set_top kernel0 3 | add_files "src/kernel_kernel.cpp" 4 | #add_files -tb PATH_TO_TESTBENCH_FILE 5 | 6 | open_solution solution 7 | 8 | #u250 9 | set_part xcu250-figd2104-2L-e 10 | 11 | # u280 12 | #set_part xcu280-fsvh2892-2L-e 13 | 14 | # 300 MHz 15 | create_clock -period 3.333 16 | 17 | config_dataflow -strict_mode warning 18 | set_clock_uncertainty 27.000000% 19 | config_rtl -enable_maxiConservative=1 20 | config_interface -m_axi_addr64 21 | 22 | # to enable integration with Vitis 23 | config_sdx -target xocc 24 | 25 | #csim_design 26 | csynth_design 27 | close_project 28 | exit 29 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/step3-pack-xo.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | open_solution solution 3 | export_design -rtl verilog -format ip_catalog -xo kernel0.xo 4 | 5 | close_project 6 | puts "Pack XO successfully" 7 | exit 8 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_int8/unroll.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | # Modify the parameters here 4 | UNROLL_FACTOR = 64 5 | DATA_T = 'char' 6 | 7 | # Generate the code 8 | data_type = DATA_T 9 | level = int(math.log2(UNROLL_FACTOR)) 10 | for layer in range(level - 1, -1, -1): 11 | pair = int(math.pow(2, layer)) 12 | for i in range(pair): 13 | # data_t tmp_[layer]_[pair] = tmp_[layer+1]_[pair*2]_[pair*2+1] 14 | if layer == level - 1: 15 | print(f'{data_type} mul_{layer}_{i}_0 = local_A[0][{i*2}] * local_B[0][{i*2}];') 16 | print(f'{data_type} add_{layer}_{i} = mul_{layer}_{i}_0 + local_A[0][{i*2+1}] * local_B[0][{i*2+1}];') 17 | else: 18 | print(f'{data_type} add_{layer}_{i} = add_{layer+1}_{i*2} + add_{layer+1}_{i*2+1};') 19 | 20 | # Add resource 21 | for layer in range(level - 1, -1, -1): 22 | pair = int(math.pow(2, layer)) 23 | for i in range(pair): 24 | if layer == level - 1: 25 | print(f'#pragma HLS RESOURCE variable=mul_{layer}_{i}_0 core=Mul_LUT') 26 | else: 27 | print(f'#pragma HLS RESOURCE variable=add_{layer}_{i} core=AddSub') 28 | 29 | print('local_C[c7][c6] += add_0_0;') 30 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_intel/README.md: -------------------------------------------------------------------------------- 1 | # Matrix Multiplication (Large) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Stratix 10 | Intel FPGA SDK for OpenCL 19.4 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/mm_intel/kernel.c 10 | autosa_tests/large/mm_intel/kernel.h 11 | autosa_tests/large/mm_intel/simd_info.json 12 | autosa_tests/large/mm_intel/Makefile 13 | ``` 14 | 15 | __Command__: 16 | ```c 17 | ./autosa ./autosa_tests/large/mm_intel/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_opencl --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[260,256,512];kernel[]->latency[20,16];kernel[]->simd[8]}" --simd-info=./autosa_tests/large/mm_intel/simd_info.json --host-serialize --loop-infinitize --double-buffer-style=0 --mem-port-map="{kernel[]->A[0];kernel[]->B[1];kernel[]->C[2]}" 18 | ``` 19 | 20 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 21 | 22 | ``` 23 | cp autosa_tests/large/mm_intel/Makefile autosa.tmp/output/ 24 | ``` 25 | 26 | Execute the makefile to perform software emulation 27 | ``` 28 | make sw_emu_check 29 | ``` 30 | or synthesize the design to RTL 31 | ``` 32 | make hls 33 | ``` 34 | or generate the bitstream 35 | ``` 36 | make hw 37 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/mm_intel/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | 7 | #define I 1040 8 | #define J 1024 9 | #define K 1024 10 | -------------------------------------------------------------------------------- /autosa_tests/large/mm_intel/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/large/mttkrp/README.md: -------------------------------------------------------------------------------- 1 | # Matricized Tensor Times Khatri-Rao Product (MTTKRP) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/mttkrp/kernel.c 10 | autosa_tests/large/mttkrp/kernel.h 11 | autosa_tests/large/mttkrp/simd_info.json 12 | autosa_tests/large/mttkrp/Makefile 13 | autosa_tests/large/mttkrp/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/large/mttkrp/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[128,128,2];kernel[]->latency[16,8];kernel[]->simd[8,1]}" --simd-info=./autosa_tests/large/mttkrp/simd_info.json --host-serialize 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/large/mttkrp/Makefile autosa.tmp/output/ 25 | cp autosa_tests/large/mttkrp/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/mttkrp/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | sp=kernel0_1.D:DDR[3] -------------------------------------------------------------------------------- /autosa_tests/large/mttkrp/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | #define I 256 7 | //#define J 256 8 | #define J 336 9 | #define K 256 10 | #define L 256 11 | -------------------------------------------------------------------------------- /autosa_tests/large/mttkrp/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel3": { 3 | "reduction": ["y", "y"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /autosa_tests/large/mttkrp/step1-run-hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | set_top kernel0 3 | add_files "src/kernel_kernel.cpp" 4 | #add_files -tb PATH_TO_TESTBENCH_FILE 5 | 6 | open_solution solution 7 | 8 | #u250 9 | set_part xcu250-figd2104-2L-e 10 | 11 | # u280 12 | #set_part xcu280-fsvh2892-2L-e 13 | 14 | # 300 MHz 15 | create_clock -period 3.333 16 | 17 | config_dataflow -strict_mode warning 18 | set_clock_uncertainty 27.000000% 19 | config_rtl -enable_maxiConservative=1 20 | config_interface -m_axi_addr64 21 | 22 | # to enable integration with Vitis 23 | config_sdx -target xocc 24 | 25 | #csim_design 26 | csynth_design 27 | close_project 28 | exit 29 | -------------------------------------------------------------------------------- /autosa_tests/large/mttkrp/step3-pack-xo.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | open_solution solution 3 | export_design -rtl verilog -format ip_catalog -xo kernel0.xo 4 | 5 | close_project 6 | puts "Pack XO successfully" 7 | exit 8 | -------------------------------------------------------------------------------- /autosa_tests/large/ttm/README.md: -------------------------------------------------------------------------------- 1 | # Tensor Times Matrix (TTM) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/ttm/kernel.c 10 | autosa_tests/large/ttm/kernel.h 11 | autosa_tests/large/ttm/simd_info.json 12 | autosa_tests/large/ttm/Makefile 13 | autosa_tests/large/ttm/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ``` 19 | 20 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 21 | 22 | ``` 23 | cp autosa_tests/large/ttm/Makefile autosa.tmp/output/ 24 | cp autosa_tests/large/ttm/connectivity.cfg autosa.tmp/output/ 25 | ``` 26 | 27 | Execute the makefile to build the design. 28 | 29 | ``` 30 | cd autosa.tmp/output 31 | make all 32 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/ttm/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/large/ttm/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | //#define I 256 7 | //#define J 256 8 | //#define K 256 9 | //#define L 256 10 | 11 | #define I 264 12 | #define J 256 13 | #define K 256 14 | #define L 256 15 | -------------------------------------------------------------------------------- /autosa_tests/large/ttm/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel4": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel5": { 6 | "reduction": ["y"] 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /autosa_tests/large/ttmc/README.md: -------------------------------------------------------------------------------- 1 | # Chain of Tensor-matrix multiplications (TTMc) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/large/ttmc/kernel.c 10 | autosa_tests/large/ttmc/kernel.h 11 | autosa_tests/large/ttmc/simd_info.json 12 | autosa_tests/large/ttmc/Makefile 13 | autosa_tests/large/ttmc/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```c 18 | ./autosa ./autosa_tests/large/ttmc/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[4];kernel[]->array_part[16,64,16,32];kernel[]->latency[1,8,8];kernel[]->simd[8,1]}" --simd-info=./autosa_tests/large/ttmc/simd_info.json --host-serialize 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/large/ttmc/Makefile autosa.tmp/output/ 25 | cp autosa_tests/large/ttmc/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/large/ttmc/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | sp=kernel0_1.D:DDR[3] 6 | -------------------------------------------------------------------------------- /autosa_tests/large/ttmc/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | #define I 128 7 | #define J 128 8 | #define K 128 9 | #define L 128 10 | #define M 128 11 | -------------------------------------------------------------------------------- /autosa_tests/large/ttmc/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel4": { 3 | "reduction": ["y", "y"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /autosa_tests/large/ttmc/step1-run-hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | set_top kernel0 3 | add_files "src/kernel_kernel.cpp" 4 | #add_files -tb PATH_TO_TESTBENCH_FILE 5 | 6 | open_solution solution 7 | 8 | #u250 9 | set_part xcu250-figd2104-2L-e 10 | 11 | # u280 12 | #set_part xcu280-fsvh2892-2L-e 13 | 14 | # 300 MHz 15 | create_clock -period 3.333 16 | 17 | config_dataflow -strict_mode warning 18 | set_clock_uncertainty 27.000000% 19 | config_rtl -enable_maxiConservative=1 20 | config_interface -m_axi_addr64 21 | 22 | # to enable integration with Vitis 23 | config_sdx -target xocc 24 | 25 | #csim_design 26 | csynth_design 27 | close_project 28 | exit 29 | -------------------------------------------------------------------------------- /autosa_tests/large/ttmc/step3-pack-xo.tcl: -------------------------------------------------------------------------------- 1 | open_project kernel0 2 | open_solution solution 3 | export_design -rtl verilog -format ip_catalog -xo kernel0.xo 4 | 5 | close_project 6 | puts "Pack XO successfully" 7 | exit 8 | -------------------------------------------------------------------------------- /autosa_tests/lu/README.md: -------------------------------------------------------------------------------- 1 | # LU Decomposition (Small) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Xilinx Alveo U250 | Xilinx Vitis 2019.2 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/lu/kernel.c 10 | autosa_tests/lu/kernel.h 11 | autosa_tests/lu/simd_info.json 12 | autosa_tests/lu/Makefile 13 | autosa_tests/lu/connectivity.cfg 14 | ``` 15 | 16 | __Command__: 17 | ```bash 18 | ./autosa ./autosa_tests/lu/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[-1,-1,-1];kernel[]->latency[]}" --simd-info=./autosa_tests/lu/simd_info.json --use-cplusplus-template --no-reschedule --live-range-reordering 19 | ``` 20 | 21 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 22 | 23 | ``` 24 | cp autosa_tests/lu/Makefile autosa.tmp/output/ 25 | cp autosa_tests/lu/connectivity.cfg autosa.tmp/output/ 26 | ``` 27 | 28 | Execute the makefile to build the design. 29 | 30 | ``` 31 | cd autosa.tmp/output 32 | make all 33 | ``` -------------------------------------------------------------------------------- /autosa_tests/lu/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/lu/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | //#define N 3 7 | #define N 32 8 | -------------------------------------------------------------------------------- /autosa_tests/lu/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel3": { 3 | "reduction": ["n"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /autosa_tests/mm/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/mm/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/mm/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; 5 | 6 | for (int i = 0; i < I; i++) 7 | for (int k = 0; k < K; k++) { 8 | A[i][k] = (data_t)rand() / RAND_MAX; 9 | } 10 | 11 | for (int j = 0; j < J; j++) 12 | for (int k = 0; k < K; k++) { 13 | B[j][k] = (data_t)rand() / RAND_MAX; 14 | } 15 | 16 | #pragma scop 17 | for (int i = 0; i < I; i++) 18 | for (int j = 0; j < J; j++) { 19 | //C[i][j] = 0; 20 | for (int k = 0; k < K; k++) { 21 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 22 | } 23 | } 24 | #pragma endscop 25 | 26 | for (int i = 0; i < I; i++) 27 | for (int j = 0; j < J; j++) { 28 | C_golden[i][j] = 0; 29 | for (int k = 0; k < K; k++) { 30 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 31 | } 32 | } 33 | 34 | int err = 0; 35 | for (int i = 0; i < I; i++) 36 | for (int j = 0; j < J; j++) { 37 | if (fabs((float)C_golden[i][j] - (float)C[i][j]) > 0.001) 38 | err++; 39 | } 40 | 41 | if (err) 42 | printf("Failed with %d errors!\n", err); 43 | else 44 | printf("Passed!\n"); 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /autosa_tests/mm/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | //#define I 256 7 | //#define J 264 8 | //#define K 256 9 | 10 | //#define I 128 11 | //#define J 128 12 | //#define K 128 13 | 14 | #define I 64 15 | #define J 64 16 | #define K 64 17 | -------------------------------------------------------------------------------- /autosa_tests/mm/param_names.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": ["i", "j", "k"], 3 | "kernel1": ["i", "j", "k"], 4 | "kernel2": ["i", "j", "k"], 5 | "kernel3": ["i", "j", "k"], 6 | "kernel4": ["i", "j", "k"], 7 | "kernel5": ["i", "j", "k"] 8 | } 9 | -------------------------------------------------------------------------------- /autosa_tests/mm/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | }, 17 | "kernel5": { 18 | "reduction": ["y"] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /autosa_tests/mm_block_sparse/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/mm_block_sparse/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/mm_block_sparse/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | 10 | //#define VEC_LEN 4 11 | //#define NON_ZERO_NUM 3 12 | //#define COMPRESS_RATIO (VEC_LEN/NON_ZERO_NUM) 13 | //#define META_DATA_NUM 1 14 | //#define EFF_COMPRESS_RATIO (VEC_LEN/(NON_ZERO_NUM+META_DATA_NUM)) 15 | 16 | #define VEC_LEN 4 17 | #define NON_ZERO_NUM 2 18 | #define COMPRESS_RATIO (VEC_LEN/NON_ZERO_NUM) 19 | #define META_DATA_NUM 2 20 | #define EFF_COMPRESS_RATIO (VEC_LEN/(NON_ZERO_NUM+META_DATA_NUM)) 21 | 22 | //#define VEC_LEN 4 23 | //#define NON_ZERO_NUM 1 24 | //#define COMPRESS_RATIO (VEC_LEN/NON_ZERO_NUM) 25 | //#define META_DATA_NUM 1 26 | //#define EFF_COMPRESS_RATIO (VEC_LEN/(NON_ZERO_NUM+META_DATA_NUM)) 27 | 28 | //#define VEC_LEN 8 29 | //#define NON_ZERO_NUM 4 30 | //#define COMPRESS_RATIO (VEC_LEN/NON_ZERO_NUM) 31 | //#define META_DATA_NUM 4 32 | //#define EFF_COMPRESS_RATIO (VEC_LEN/(NON_ZERO_NUM+META_DATA_NUM)) 33 | 34 | //#define VEC_LEN 8 35 | //#define NON_ZERO_NUM 3 36 | //#define COMPRESS_RATIO (VEC_LEN/NON_ZERO_NUM) 37 | //#define META_DATA_NUM 1 38 | //#define EFF_COMPRESS_RATIO (VEC_LEN/(NON_ZERO_NUM+META_DATA_NUM)) 39 | 40 | //#define VEC_LEN 8 41 | //#define NON_ZERO_NUM 2 42 | //#define COMPRESS_RATIO (VEC_LEN/NON_ZERO_NUM) 43 | //#define META_DATA_NUM 2 44 | //#define EFF_COMPRESS_RATIO (VEC_LEN/(NON_ZERO_NUM+META_DATA_NUM)) 45 | -------------------------------------------------------------------------------- /autosa_tests/mm_block_sparse/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/mm_catapult/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | data_t A[I_P][K_P], B[J_P][K_P], C[I_P][J_P], C_golden[I_P][J_P]; // gemm0,3 5 | 6 | for (int i = 0; i < I_P; i++) 7 | for (int k = 0; k < K_P; k++) { 8 | //A[i][k] = (data_t)rand() / RAND_MAX; 9 | A[i][k] = (data_t)1; 10 | } 11 | 12 | for (int j = 0; j < J_P; j++) 13 | for (int k = 0; k < K_P; k++) { 14 | //B[j][k] = (data_t)rand() / RAND_MAX; 15 | B[j][k] = (data_t)1; 16 | } 17 | 18 | #pragma scop 19 | for (int i = 0; i < I_P; i++) 20 | for (int j = 0; j < J_P; j++) { 21 | C[i][j] = 0; 22 | for (int k = 0; k < K_P; k++) { 23 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 24 | } 25 | } 26 | #pragma endscop 27 | 28 | for (int i = 0; i < I_P; i++) 29 | for (int j = 0; j < J_P; j++) { 30 | C_golden[i][j] = 0; 31 | for (int k = 0; k < K_P; k++) { 32 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 33 | } 34 | } 35 | 36 | int err = 0; 37 | for (int i = 0; i < I_P; i++) 38 | for (int j = 0; j < J_P; j++) { 39 | if (fabs((float)C_golden[i][j] - (float)C[i][j]) > 0.001) 40 | err++; 41 | } 42 | 43 | if (err) 44 | printf("Failed with %d errors!\n", err); 45 | else 46 | printf("Passed!\n"); 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /autosa_tests/mm_catapult/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | //typedef float data_t; 6 | typedef unsigned int data_t; 7 | #define I_P 64 8 | #define J_P 64 9 | #define K_P 64 10 | -------------------------------------------------------------------------------- /autosa_tests/mm_catapult/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/mm_getting_started/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/mm_getting_started/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/mm_getting_started/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | -------------------------------------------------------------------------------- /autosa_tests/mm_getting_started/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/mm_hbm/README.md: -------------------------------------------------------------------------------- 1 | # Matrix Multiplication (HBM) 2 | 3 | This is an example of small-size matrix multiplication using high-bandwidth memory (HBM). 4 | 5 | Board | Software Version 6 | -------------|----------------- 7 | Xilinx Alveo U280 | Xilinx Vitis 2019.2 8 | 9 | __Files__: 10 | ``` 11 | autosa_tests/mm_hbm/kernel.c 12 | autosa_tests/mm_hbm/kernel.h 13 | autosa_tests/mm_hbm/simd_info.json 14 | autosa_tests/mm_hbm/Makefile 15 | autosa_tests/mm_hbm/connectivity.cfg 16 | ``` 17 | 18 | __Command__: 19 | ```c 20 | ./autosa ./autosa_tests/mm_hbm/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_hls_c --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[32,32,32];kernel[]->latency[8,8];kernel[]->simd[2];kernel[]->hbm_A[2];kernel[]->hbm_B[2];kernel[]->hbm_C_drain[2]}" --simd-info=./autosa_tests/mm_hbm/simd_info.json --hbm 21 | ``` 22 | 23 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` and `connectivity.cfg` to the directory `autosa.tmp/output`. 24 | 25 | ``` 26 | cp autosa_tests/mm_hbm/Makefile autosa.tmp/output/ 27 | cp autosa_tests/mm_hbm/connectivity.cfg autosa.tmp/output/ 28 | ``` 29 | 30 | Execute the makefile to build the design. 31 | 32 | ``` 33 | cd autosa.tmp/output 34 | make all 35 | ``` -------------------------------------------------------------------------------- /autosa_tests/mm_hbm/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A_0:HBM[0] 3 | sp=kernel0_1.A_1:HBM[1] 4 | sp=kernel0_1.B_0:HBM[2] 5 | sp=kernel0_1.B_1:HBM[3] 6 | sp=kernel0_1.C_0:HBM[4] 7 | sp=kernel0_1.C_1:HBM[5] 8 | -------------------------------------------------------------------------------- /autosa_tests/mm_hbm/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/mm_hbm/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | // data_t A[I][K], B[K][J], C[I][J], C_golden[I][J]; 5 | data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; 6 | 7 | for (int i = 0; i < I; i++) 8 | for (int k = 0; k < K; k++) { 9 | A[i][k] = k; 10 | } 11 | 12 | for (int j = 0; j < J; j++) 13 | for (int k = 0; k < K; k++) { 14 | B[j][k] = k; 15 | } 16 | 17 | #pragma scop 18 | for (int i = 0; i < I; i++) 19 | for (int j = 0; j < J; j++) { 20 | C[i][j] = 0; 21 | for (int k = 0; k < K; k++) { 22 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 23 | } 24 | } 25 | #pragma endscop 26 | 27 | for (int i = 0; i < I; i++) 28 | for (int j = 0; j < J; j++) { 29 | C_golden[i][j] = 0; 30 | for (int k = 0; k < K; k++) { 31 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 32 | } 33 | } 34 | 35 | int err = 0; 36 | for (int i = 0; i < I; i++) 37 | for (int j = 0; j < J; j++) { 38 | if (fabs((float)C_golden[i][j] - (float)C[i][j]) > 0.001) 39 | err++; 40 | } 41 | 42 | if (err) 43 | printf("Failed with %d errors!\n", err); 44 | else 45 | printf("Passed!\n"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /autosa_tests/mm_hbm/kernel.h: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "math.h" 4 | 5 | typedef float data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | -------------------------------------------------------------------------------- /autosa_tests/mm_hbm/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel3": { 3 | "reduction": ["y"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /autosa_tests/mm_hcl/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/mm_hcl/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | -------------------------------------------------------------------------------- /autosa_tests/mm_hcl/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/mm_hcl_intel/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | // data_t A[I][K], B[K][J], C[I][J], C_golden[I][J]; 5 | data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; 6 | 7 | for (int i = 0; i < I; i++) 8 | for (int k = 0; k < K; k++) { 9 | A[i][k] = k; 10 | } 11 | 12 | for (int j = 0; j < J; j++) 13 | for (int k = 0; k < K; k++) { 14 | B[j][k] = k; 15 | } 16 | 17 | #pragma scop 18 | for (int i = 0; i < I; i++) 19 | for (int j = 0; j < J; j++) { 20 | C[i][j] = 0; 21 | for (int k = 0; k < K; k++) { 22 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 23 | } 24 | } 25 | #pragma endscop 26 | 27 | for (int i = 0; i < I; i++) 28 | for (int j = 0; j < J; j++) { 29 | C_golden[i][j] = 0; 30 | for (int k = 0; k < K; k++) { 31 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 32 | } 33 | } 34 | 35 | int err = 0; 36 | for (int i = 0; i < I; i++) 37 | for (int j = 0; j < J; j++) { 38 | if (fabs((float)C_golden[i][j] - (float)C[i][j]) > 0.001) 39 | err++; 40 | } 41 | 42 | if (err) 43 | printf("Failed with %d errors!\n", err); 44 | else 45 | printf("Passed!\n"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /autosa_tests/mm_hcl_intel/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | -------------------------------------------------------------------------------- /autosa_tests/mm_hcl_intel/kernel2.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main(int argc, char **argv) { 3 | static float Y0[1024][1024]; 4 | static float A[1024][1024]; 5 | static float B[1024][1024]; 6 | 7 | #pragma scop 8 | for (int i = 0; i < 1024; ++i) { 9 | for (int j = 0; j < 1024; ++j) { 10 | Y0[i][j] = 0.000000e+00f; 11 | for (int k = 0; k < 1024; ++k) { 12 | Y0[i][j] = (Y0[i][j] + (A[i][k] * B[j][k])); 13 | } 14 | } 15 | } 16 | #pragma endscop 17 | 18 | printf("%f", Y0[0][0]); 19 | printf("%f", A[0][0]); 20 | printf("%f", B[0][0]); 21 | } -------------------------------------------------------------------------------- /autosa_tests/mm_hcl_intel/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | }, 17 | "kernel5": { 18 | "reduction": ["y"] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /autosa_tests/mm_int16/connectivity.cfg: -------------------------------------------------------------------------------- 1 | [connectivity] 2 | sp=kernel0_1.A:DDR[0] 3 | sp=kernel0_1.B:DDR[1] 4 | sp=kernel0_1.C:DDR[2] 5 | -------------------------------------------------------------------------------- /autosa_tests/mm_int16/hls_script.tcl: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## This file is generated automatically by Vivado HLS. 3 | ## Please DO NOT edit it. 4 | ## Copyright (C) 1986-2019 Xilinx, Inc. All Rights Reserved. 5 | ############################################################ 6 | open_project hls_prj 7 | set_top kernel0 8 | add_files src/kernel_kernel.h 9 | add_files src/kernel_kernel.cpp 10 | add_files -tb src/kernel_host.cpp 11 | open_solution "solution1" 12 | set_part {xcu200-fsgd2104-2-e} 13 | create_clock -period 5 -name default 14 | config_compile -name_max_length 50 15 | #source "./prj/solution1/directives.tcl" 16 | csim_design 17 | #csynth_design 18 | #cosim_design 19 | #cosim_design -trace_level all 20 | #cosim_design -setup -trace_level all 21 | #export_design -format ip_catalog 22 | exit 23 | -------------------------------------------------------------------------------- /autosa_tests/mm_int16/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | // data_t A[I][K], B[K][J], C[I][J], C_golden[I][J]; 5 | data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; // gemm0,3 6 | // data_t A[K][I], B[K][J], C[I][J], C_golden[I][J]; // gemm4 7 | 8 | for (int i = 0; i < I; i++) 9 | for (int k = 0; k < K; k++) { 10 | A[i][k] = k; 11 | // A[k][i] = k; 12 | } 13 | 14 | for (int j = 0; j < J; j++) 15 | for (int k = 0; k < K; k++) { 16 | B[j][k] = k; 17 | // B[k][j] = k; 18 | } 19 | 20 | #pragma scop 21 | for (int i = 0; i < I; i++) 22 | for (int j = 0; j < J; j++) { 23 | C[i][j] = 0; 24 | for (int k = 0; k < K; k++) { 25 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 26 | // C[i][j] = C[i][j] + A[k][i] * B[k][j]; 27 | } 28 | } 29 | #pragma endscop 30 | 31 | for (int i = 0; i < I; i++) 32 | for (int j = 0; j < J; j++) { 33 | C_golden[i][j] = 0; 34 | for (int k = 0; k < K; k++) { 35 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 36 | // C_golden[i][j] = C_golden[i][j] + A[k][i] * B[k][j]; 37 | } 38 | } 39 | 40 | int err = 0; 41 | for (int i = 0; i < I; i++) 42 | for (int j = 0; j < J; j++) { 43 | if (fabs((float)C_golden[i][j] - (float)C[i][j]) > 0.001) 44 | err++; 45 | } 46 | 47 | if (err) 48 | printf("Failed with %d errors!\n", err); 49 | else 50 | printf("Passed!\n"); 51 | 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /autosa_tests/mm_int16/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef unsigned short data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | -------------------------------------------------------------------------------- /autosa_tests/mm_int16/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel0": { 3 | "reduction": ["y"] 4 | }, 5 | "kernel1": { 6 | "reduction": ["y"] 7 | }, 8 | "kernel2": { 9 | "reduction": ["y"] 10 | }, 11 | "kernel3": { 12 | "reduction": ["y"] 13 | }, 14 | "kernel4": { 15 | "reduction": ["y"] 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /autosa_tests/mm_intel/README.md: -------------------------------------------------------------------------------- 1 | # Matrix Multiplication (Small) 2 | 3 | Board | Software Version 4 | -------------|----------------- 5 | Stratix 10 | Intel FPGA SDK for OpenCL 19.4 6 | 7 | __Files__: 8 | ``` 9 | autosa_tests/mm_intel/kernel.c 10 | autosa_tests/mm_intel/kernel.h 11 | autosa_tests/mm_intel/simd_info.json 12 | autosa_tests/mm_intel/Makefile 13 | ``` 14 | 15 | __Command__: 16 | ```c 17 | ./autosa ./autosa_tests/mm_intel/kernel.c --config=./autosa_config/autosa_config.json --target=autosa_opencl --output-dir=./autosa.tmp/output --sa-sizes="{kernel[]->space_time[3];kernel[]->array_part[16,16,16];kernel[]->array_part_L2[2,2,2];kernel[]->latency[8,8];kernel[]->simd[2]}" --simd-info=./autosa_tests/mm_intel/simd_info.json --host-serialize --loop-infinitize --double-buffer-style=0 --mem-port-map="{kernel[]->A[0];kernel[]->B[1];kernel[]->C[2]}" 18 | ``` 19 | 20 | After compilation, you will find all generated files under the directory `autosa.tmp/output/src`. Copy the `Makefile` to the directory `autosa.tmp/output`. 21 | 22 | ``` 23 | cp autosa_tests/mm/Makefile autosa.tmp/output/ 24 | ``` 25 | 26 | Execute the makefile to perform software emulation 27 | ``` 28 | make sw_emu_check 29 | ``` 30 | or synthesize the design to RTL 31 | ``` 32 | make hls 33 | ``` 34 | or generate the bitstream 35 | ``` 36 | make hw 37 | ``` 38 | -------------------------------------------------------------------------------- /autosa_tests/mm_intel/kernel.c: -------------------------------------------------------------------------------- 1 | #include "kernel.h" 2 | 3 | int main(int argc, char **argv) { 4 | // data_t A[I][K], B[K][J], C[I][J], C_golden[I][J]; 5 | data_t A[I][K], B[J][K], C[I][J], C_golden[I][J]; 6 | 7 | for (int i = 0; i < I; i++) 8 | for (int k = 0; k < K; k++) { 9 | A[i][k] = k; 10 | } 11 | 12 | for (int j = 0; j < J; j++) 13 | for (int k = 0; k < K; k++) { 14 | B[j][k] = k; 15 | } 16 | 17 | #pragma scop 18 | for (int i = 0; i < I; i++) 19 | for (int j = 0; j < J; j++) { 20 | C[i][j] = 0; 21 | for (int k = 0; k < K; k++) { 22 | C[i][j] = C[i][j] + A[i][k] * B[j][k]; 23 | } 24 | } 25 | #pragma endscop 26 | 27 | for (int i = 0; i < I; i++) 28 | for (int j = 0; j < J; j++) { 29 | C_golden[i][j] = 0; 30 | for (int k = 0; k < K; k++) { 31 | C_golden[i][j] = C_golden[i][j] + A[i][k] * B[j][k]; 32 | } 33 | } 34 | 35 | int err = 0; 36 | for (int i = 0; i < I; i++) 37 | for (int j = 0; j < J; j++) { 38 | if (fabs((float)C_golden[i][j] - (float)C[i][j]) > 0.001) 39 | err++; 40 | } 41 | 42 | if (err) 43 | printf("Failed with %d errors!\n", err); 44 | else 45 | printf("Passed!\n"); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /autosa_tests/mm_intel/kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef float data_t; 6 | #define I 64 7 | #define J 64 8 | #define K 64 9 | -------------------------------------------------------------------------------- /autosa_tests/mm_intel/simd_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "kernel3": { 3 | "reduction": ["y"] 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | rm ./autosa 3 | rm -rf ./autosa.tmp 4 | cd src 5 | make clean 6 | cd - 7 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/docker_image.rst: -------------------------------------------------------------------------------- 1 | .. _docker-image-label: 2 | 3 | Docker Image 4 | ============ 5 | 6 | We provide a docker image to quickly try out the features of AutoSA. 7 | 8 | Pull the Docker image using the following command. 9 | 10 | .. code:: bash 11 | 12 | docker pull whbldhwj/autosa:latest -------------------------------------------------------------------------------- /docs/examples/images/array_hbm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/array_hbm.png -------------------------------------------------------------------------------- /docs/examples/images/cnn0_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn0_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn10_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn10_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn1_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn1_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn2_2_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn2_2_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn2_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn2_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn3_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn3_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn4_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn4_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn5_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn5_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn6_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn6_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn7_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn7_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn8_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn8_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn9_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn9_array.png -------------------------------------------------------------------------------- /docs/examples/images/cnn_w_reuse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/cnn_w_reuse.png -------------------------------------------------------------------------------- /docs/examples/images/dconv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/dconv.png -------------------------------------------------------------------------------- /docs/examples/images/fc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/fc.png -------------------------------------------------------------------------------- /docs/examples/images/gemm0_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/gemm0_array.png -------------------------------------------------------------------------------- /docs/examples/images/gemm1_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/gemm1_array.png -------------------------------------------------------------------------------- /docs/examples/images/gemm2_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/gemm2_array.png -------------------------------------------------------------------------------- /docs/examples/images/gemm3_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/gemm3_array.png -------------------------------------------------------------------------------- /docs/examples/images/gemm4_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/gemm4_array.png -------------------------------------------------------------------------------- /docs/examples/images/gemm5_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/gemm5_array.png -------------------------------------------------------------------------------- /docs/examples/images/mm_dse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/mm_dse.png -------------------------------------------------------------------------------- /docs/examples/images/pconv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/pconv.png -------------------------------------------------------------------------------- /docs/examples/images/resource_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/images/resource_model.png -------------------------------------------------------------------------------- /docs/examples/index.rst: -------------------------------------------------------------------------------- 1 | AutoSA Examples 2 | =============== 3 | 4 | This page covers a list of design exmaples to get you familiar with the AutoSA 5 | compilation process. Examples are divided into two categories: 6 | 7 | * Small Designs: These designs are limited in the problem size so that you could 8 | easily verify and synthesize the design within hours. 9 | * Large Designs: These designs are used for demonstrating the performance of AutoSA-generated 10 | designs, and it may take more than days for verification and synthesis flow. 11 | 12 | Small Designs 13 | ------------- 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | 18 | mm 19 | cnn 20 | lu 21 | mm_int16 22 | mm_hbm 23 | dnn_ops 24 | 25 | Large Designs 26 | ------------- 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | 31 | mm_large 32 | cnn_large 33 | mm_int16_large 34 | mm_int8_large 35 | mttkrp_large 36 | ttmc_large -------------------------------------------------------------------------------- /docs/examples/mm_block_sparse.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/examples/mm_block_sparse.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. AutoSA documentation master file, created by 2 | sphinx-quickstart on Sun Jan 17 15:06:11 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to AutoSA's documentation! 7 | ================================== 8 | 9 | AutoSA is an end-to-end systolic array compiler for FPGAs based on the polyhedral model. 10 | It takes algorithms in high-level programming languages (C) as inputs, 11 | performs polyhedral transformation and other architecture optimizations to map algorithms 12 | to systolic array architecture. 13 | 14 | 15 | Getting Started 16 | --------------- 17 | 18 | .. toctree:: 19 | :maxdepth: 1 20 | 21 | installation 22 | tutorials/index 23 | examples/index 24 | 25 | Resources 26 | --------- 27 | * `AutoSA Paper `_ 28 | * `Github Project `_ 29 | * `Docker Image `_ 30 | * `FCCM 2021 Tutorial Slides `_ 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | To install AutoSA, please read :ref:`install-from-source-label`. Alternatively, 5 | if you would like to quickly try out AutoSA, please check the 6 | :ref:`docker-image-label`. 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | install_from_source 12 | docker_image -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/tutorials/images/2d_array_mm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/2d_array_mm.png -------------------------------------------------------------------------------- /docs/tutorials/images/2d_array_mm_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/2d_array_mm_schedule.png -------------------------------------------------------------------------------- /docs/tutorials/images/ab_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/ab_map.png -------------------------------------------------------------------------------- /docs/tutorials/images/array_serialize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/array_serialize.png -------------------------------------------------------------------------------- /docs/tutorials/images/auto_tuner_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/auto_tuner_flow.png -------------------------------------------------------------------------------- /docs/tutorials/images/autobridge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/autobridge.jpg -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_0.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_1.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_2.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_3.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_4.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_5.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_6.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_7.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_sim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_sim.png -------------------------------------------------------------------------------- /docs/tutorials/images/catapult_sim2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/catapult_sim2.png -------------------------------------------------------------------------------- /docs/tutorials/images/dense_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/dense_array.png -------------------------------------------------------------------------------- /docs/tutorials/images/dram_bw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/dram_bw.png -------------------------------------------------------------------------------- /docs/tutorials/images/flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/flow.png -------------------------------------------------------------------------------- /docs/tutorials/images/io_module_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/io_module_arch.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_array_L1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_array_L1.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_array_L2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_array_L2.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_array_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_array_b.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_array_opt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_array_opt.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_array_unopt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_array_unopt.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_st_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_st_code.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_tree.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_tree_array_part.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_tree_array_part.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_tree_isolate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_tree_isolate.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_tree_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_tree_latency.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_tree_param.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_tree_param.png -------------------------------------------------------------------------------- /docs/tutorials/images/mm_tree_simd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/mm_tree_simd.png -------------------------------------------------------------------------------- /docs/tutorials/images/odyssey_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/odyssey_flow.png -------------------------------------------------------------------------------- /docs/tutorials/images/serialize_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/serialize_example.png -------------------------------------------------------------------------------- /docs/tutorials/images/serialize_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/serialize_example2.png -------------------------------------------------------------------------------- /docs/tutorials/images/sparse_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/sparse_array.png -------------------------------------------------------------------------------- /docs/tutorials/images/sparse_example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/sparse_example1.png -------------------------------------------------------------------------------- /docs/tutorials/images/sparse_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/sparse_example2.png -------------------------------------------------------------------------------- /docs/tutorials/images/sparse_mm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/docs/tutorials/images/sparse_mm.png -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | AutoSA Tutorials 2 | ================ 3 | 4 | This page contains a series of tutorials to get you familiar with the systolic array 5 | architectures and the compilation process of AutoSA. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | theory_background 11 | optimize_array 12 | getting_started 13 | matrix_multiplication 14 | auto_tuning_exhaustive 15 | auto_tuning_genetic 16 | auto_bridge 17 | structural_sparsity 18 | intel_backend 19 | catapult_backend 20 | host_serialize 21 | hcl_integrate -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Initialize ISL and PET 3 | git submodule init 4 | git submodule update 5 | (cd src/isl; git submodule init imath; git submodule update imath) 6 | (cd src/barvinok; ./get_submodules.sh) 7 | 8 | # Install python packages 9 | pip3 install -r requirements.txt 10 | 11 | # Patch ISL 12 | echo "Patch ISL" 13 | (cd ./autosa_scripts/ppcg_changes/isl; ./isl_patch.sh) 14 | 15 | # Compilation 16 | (cd src; echo "autogen"; ./autogen.sh; echo "configure"; ./configure; echo "make"; make -j4) 17 | 18 | # Cleanup 19 | cp ./autosa_scripts/autosa.py ./autosa 20 | (mkdir autosa.tmp; cd autosa.tmp; mkdir output optimizer; cd output; mkdir src latency_est resource_est tuning) 21 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | decorator>=4.3.0 2 | joblib>=0.14.1 3 | numpy>=1.18.2 4 | pandas>=1.0.3 5 | scipy>=1.4.1 6 | sklearn>=0.0 7 | sympy>=1.4 8 | xgboost>=0.81 9 | -------------------------------------------------------------------------------- /src/ChangeLog: -------------------------------------------------------------------------------- 1 | version: 0.08.3 2 | date: Wed Nov 13 11:39:01 CET 2019 3 | changes: 4 | - support recent versions of clang 5 | - fix OpenMP support when contraction is enabled 6 | --- 7 | version: 0.08.2 8 | date: Thu Mar 28 18:36:52 CET 2019 9 | changes: 10 | - support recent versions of clang 11 | --- 12 | version: 0.08.1 13 | date: Mon Jul 30 23:05:04 CEST 2018 14 | changes: 15 | - move some functionality to isl 16 | --- 17 | version: 0.08 18 | date: Sat Mar 3 15:31:38 CET 2018 19 | changes: 20 | - minor fixes 21 | --- 22 | version: 0.07 23 | date: Tue Feb 7 17:23:22 CET 2017 24 | changes: 25 | - support hybrid tiling 26 | --- 27 | version: 0.06 28 | date: Fri May 6 12:08:50 CEST 2016 29 | changes: 30 | - use PPCG specific macro names in generated code 31 | - complete transition to schedule trees 32 | - maximize coincidence by default 33 | - map arrays with constant index expressions to private memory 34 | - optionally group chains of statements 35 | --- 36 | version: 0.05 37 | date: Fri Jan 15 09:30:23 CET 2016 38 | changes: 39 | - fix live-out computation 40 | - optionally compute schedule for C target 41 | - optionally perform tiling for C target 42 | - create single kernel for non-permutable subtree 43 | --- 44 | version: 0.04 45 | date: Wed Jun 17 10:52:58 CEST 2015 46 | changes: 47 | - use schedule trees 48 | - fix live-range reordering 49 | - improve generation of synchronization 50 | - exploit independences during dependence analysis 51 | -------------------------------------------------------------------------------- /src/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License (MIT) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /src/autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | autoreconf -i 3 | if test -f isl/autogen.sh; then 4 | (cd isl; ./autogen.sh) 5 | fi 6 | if test -f barvinok/autogen.sh; then 7 | (cd barvinok; ./autogen.sh) 8 | fi 9 | if test -f pet/autogen.sh; then 10 | (cd pet; ./autogen.sh) 11 | fi 12 | -------------------------------------------------------------------------------- /src/autosa_catapult_hls_c.h: -------------------------------------------------------------------------------- 1 | #ifndef _AUTOSA_CATAPULT_HLS_C_H 2 | #define _AUTOSA_CATAPULT_HLS_C_H 3 | 4 | #include 5 | #include "ppcg_options.h" 6 | #include "ppcg.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | int generate_autosa_catapult_hls_c(isl_ctx *ctx, struct ppcg_options *options, 14 | const char *input); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif -------------------------------------------------------------------------------- /src/autosa_cpu.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/src/autosa_cpu.cpp -------------------------------------------------------------------------------- /src/autosa_cpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _AUTOSA_CPU_H 2 | #define _AUTOSA_CPU_H 3 | 4 | #include 5 | 6 | #include "ppcg.h" 7 | 8 | struct ppcg_options; 9 | 10 | int generate_autosa_cpu(isl_ctx *ctx, struct ppcg_options *options, 11 | const char *input); 12 | 13 | #endif -------------------------------------------------------------------------------- /src/autosa_intel_opencl.h: -------------------------------------------------------------------------------- 1 | #ifndef _AUTOSA_INTEL_OPENCL_H 2 | #define _AUTOSA_INTEL_OPENCL_H 3 | 4 | #include 5 | #include "ppcg_options.h" 6 | #include "ppcg.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | int generate_autosa_intel_opencl(isl_ctx *ctx, struct ppcg_options *options, 13 | const char *input); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | #endif -------------------------------------------------------------------------------- /src/autosa_t2s.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLA-VAST/AutoSA/b61a1b4132d631600696feba59eb606acb34d304/src/autosa_t2s.cpp -------------------------------------------------------------------------------- /src/autosa_tapa_cpp.h: -------------------------------------------------------------------------------- 1 | #ifndef _AUTOSA_TAPA_CPP_H 2 | #define _AUTOSA_TAPA_CPP_H 3 | 4 | #include 5 | #include "ppcg_options.h" 6 | #include "ppcg.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | int generate_autosa_tapa_cpp(isl_ctx *ctx, struct ppcg_options *options, 14 | const char *input); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/autosa_xilinx_hls_c.h: -------------------------------------------------------------------------------- 1 | #ifndef _AUTOSA_XILINX_HLS_C_H 2 | #define _AUTOSA_XILINX_HLS_C_H 3 | 4 | #include 5 | #include "ppcg_options.h" 6 | #include "ppcg.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | int generate_autosa_xilinx_hls_c(isl_ctx *ctx, struct ppcg_options *options, 14 | const char *input); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif -------------------------------------------------------------------------------- /src/cpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _CPU_H 2 | #define _CPU_H 3 | 4 | #include 5 | 6 | #include "ppcg.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | struct ppcg_options; 14 | 15 | __isl_give isl_printer *print_cpu(__isl_take isl_printer *p, 16 | struct ppcg_scop *ps, struct ppcg_options *options); 17 | int generate_cpu(isl_ctx *ctx, struct ppcg_options *options, 18 | const char *input, const char *output); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/get_submodules.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git submodule init 3 | git submodule update 4 | (cd isl; git submodule init imath; git submodule update imath) 5 | -------------------------------------------------------------------------------- /src/grouping.h: -------------------------------------------------------------------------------- 1 | #ifndef PPCG_GROUPING_H 2 | 3 | #include 4 | 5 | #include "ppcg_options.h" 6 | 7 | __isl_give isl_schedule *ppcg_compute_grouping_schedule( 8 | __isl_take isl_schedule_constraints *sc, 9 | __isl_keep isl_schedule *schedule, struct ppcg_options *options); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /src/m4/ax_check_opencl.m4: -------------------------------------------------------------------------------- 1 | # Check if OpenCL is available and that it supports a CPU device. 2 | # The check for a CPU device is the same check that is performed 3 | # by opencl_create_device in ocl_utilities.c 4 | AC_DEFUN([AX_CHECK_OPENCL], [ 5 | AC_SUBST(HAVE_OPENCL) 6 | HAVE_OPENCL=no 7 | AC_CHECK_HEADER([CL/opencl.h], [ 8 | AC_CHECK_LIB([OpenCL], [clGetPlatformIDs], [ 9 | SAVE_LIBS=$LIBS 10 | LIBS="$LIBS -lOpenCL" 11 | AC_MSG_CHECKING([for OpenCL CPU device]) 12 | AC_RUN_IFELSE([AC_LANG_PROGRAM( 13 | [[#include ]], [[ 14 | cl_platform_id platform; 15 | cl_device_id dev; 16 | 17 | if (clGetPlatformIDs(1, &platform, NULL) < 0) 18 | return 1; 19 | if (clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL) < 0) 20 | return 1; 21 | ]])], [HAVE_OPENCL=yes]) 22 | AC_MSG_RESULT($HAVE_OPENCL) 23 | LIBS=$SAVE_LIBS 24 | ])]) 25 | ]) 26 | -------------------------------------------------------------------------------- /src/m4/ax_check_openmp.m4: -------------------------------------------------------------------------------- 1 | # Check if $CC supports openmp. 2 | AC_DEFUN([AX_CHECK_OPENMP], [ 3 | AC_SUBST(HAVE_OPENMP) 4 | HAVE_OPENMP=no 5 | AC_MSG_CHECKING([for OpenMP support by $CC]) 6 | echo | $CC -x c - -fsyntax-only -fopenmp -Werror >/dev/null 2>/dev/null 7 | if test $? -eq 0; then 8 | HAVE_OPENMP=yes 9 | fi 10 | AC_MSG_RESULT($HAVE_OPENMP) 11 | 12 | if test $HAVE_OPENMP = yes; then 13 | SAVE_CFLAGS=$CFLAGS 14 | CFLAGS="$CFLAGS -fopenmp" 15 | # Using some version of clang, the value of "m" becomes zero 16 | # after the parallel for loop. 17 | AC_RUN_IFELSE([AC_LANG_PROGRAM([[ 18 | #include 19 | 20 | static void f(int m, double A[m]) 21 | { 22 | #pragma omp parallel for 23 | for (int c0 = 0; c0 < m; c0 += 1) 24 | A[c0] = 0.; 25 | if (m != 100) 26 | abort(); 27 | } 28 | ]],[[ 29 | double A[100]; 30 | 31 | f(100, A); 32 | ]])],[],[ 33 | AC_MSG_NOTICE([OpenMP support broken, disabling]) 34 | HAVE_OPENMP=no 35 | ],[]) 36 | CFLAGS=$SAVE_CFLAGS 37 | fi 38 | ]) 39 | -------------------------------------------------------------------------------- /src/m4/ax_detect_git_head.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_DETECT_GIT_HEAD], [ 2 | AC_SUBST(GIT_HEAD_ID) 3 | AC_SUBST(GIT_HEAD) 4 | AC_SUBST(GIT_HEAD_VERSION) 5 | if test -f $srcdir/.git; then 6 | gitdir=`GIT_DIR=$srcdir/.git git rev-parse --git-dir` 7 | GIT_HEAD="$gitdir/index" 8 | GIT_REPO="$gitdir" 9 | GIT_HEAD_ID=`GIT_DIR=$GIT_REPO git describe --always` 10 | elif test -f $srcdir/.git/HEAD; then 11 | GIT_HEAD="$srcdir/.git/index" 12 | GIT_REPO="$srcdir/.git" 13 | GIT_HEAD_ID=`GIT_DIR=$GIT_REPO git describe --always` 14 | elif test -f $srcdir/GIT_HEAD_ID; then 15 | GIT_HEAD_ID=`cat $srcdir/GIT_HEAD_ID` 16 | else 17 | mysrcdir=`(cd $srcdir; pwd)` 18 | head=`basename $mysrcdir | sed -e 's/.*-//'` 19 | head2=`echo $head | sed -e 's/[^0-9a-f]//'` 20 | head3=`echo $head2 | sed -e 's/........................................//'` 21 | if test "x$head3" = "x" -a "x$head" = "x$head2"; then 22 | GIT_HEAD_ID="$head" 23 | else 24 | GIT_HEAD_ID="UNKNOWN" 25 | fi 26 | fi 27 | if test -z "$GIT_REPO" ; then 28 | GIT_HEAD_VERSION="$GIT_HEAD_ID" 29 | else 30 | GIT_HEAD_VERSION="\`GIT_DIR=$GIT_REPO git describe --always\`" 31 | fi 32 | ]) 33 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "ppcg.h" 23 | #include "ppcg_options.h" 24 | //#include "cuda.h" 25 | //#include "opencl.h" 26 | //#include "cpu.h" 27 | 28 | #include 29 | 30 | using namespace std; 31 | 32 | int main(int argc, char **argv) 33 | { 34 | int r; 35 | 36 | r = autosa_main_wrap(argc, argv); 37 | 38 | return r; 39 | } 40 | -------------------------------------------------------------------------------- /src/ocl_utilities.h: -------------------------------------------------------------------------------- 1 | #ifndef OCL_UTILITIES_H 2 | #define OCL_UTILITIES_H 3 | 4 | #if defined(__APPLE__) 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | /* Return the OpenCL error string for a given error number. 11 | */ 12 | const char *opencl_error_string(cl_int error); 13 | 14 | /* Find a GPU or a CPU associated with the first available platform. 15 | * If use_gpu is set, then this function first tries to look for a GPU 16 | * in the first available platform. 17 | * If this fails or if use_gpu is not set, then it tries to use the CPU. 18 | */ 19 | cl_device_id opencl_create_device(int use_gpu); 20 | 21 | /* Create an OpenCL program from a string and compile it. 22 | */ 23 | cl_program opencl_build_program_from_string(cl_context ctx, cl_device_id dev, 24 | const char *program_source, size_t program_size, 25 | const char *opencl_options); 26 | 27 | /* Create an OpenCL program from a source file and compile it. 28 | */ 29 | cl_program opencl_build_program_from_file(cl_context ctx, cl_device_id dev, 30 | const char *filename, const char *opencl_options); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/ppcg_files/cuda.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_H 2 | #define _CUDA_H 3 | 4 | #include "ppcg_options.h" 5 | #include "ppcg.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | 12 | int generate_cuda(isl_ctx *ctx, struct ppcg_options *options, 13 | const char *input); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/ppcg_files/cuda_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_COMMON_H_ 2 | #define _CUDA_COMMON_H_ 3 | 4 | #include 5 | 6 | struct cuda_info 7 | { 8 | FILE *host_c; 9 | FILE *kernel_c; 10 | FILE *kernel_h; 11 | }; 12 | 13 | void cuda_open_files(struct cuda_info *info, const char *input); 14 | void cuda_close_files(struct cuda_info *info); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/ppcg_files/gpu_hybrid.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_HYBRID_H 2 | #define GPU_HYBRID_H 3 | 4 | #include 5 | 6 | #include "gpu.h" 7 | #include "hybrid.h" 8 | 9 | __isl_give isl_schedule_node *gpu_hybrid_tile(struct gpu_gen *gen, 10 | __isl_take isl_schedule_node *node, __isl_take ppcg_ht_bounds *bounds, 11 | int *tile_sizes); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/ppcg_files/gpu_print.h: -------------------------------------------------------------------------------- 1 | #ifndef GPU_PRINT_H 2 | #define GPU_PRINT_H 3 | 4 | #include "gpu.h" 5 | 6 | __isl_give isl_printer *gpu_print_local_declarations(__isl_take isl_printer *p, 7 | struct gpu_prog *prog); 8 | 9 | __isl_give isl_printer *gpu_print_types(__isl_take isl_printer *p, 10 | struct gpu_types *types, struct gpu_prog *prog); 11 | 12 | __isl_give isl_printer *gpu_print_macros(__isl_take isl_printer *p, 13 | __isl_keep isl_ast_node *node); 14 | 15 | __isl_give isl_printer *gpu_array_info_print_size(__isl_take isl_printer *prn, 16 | struct gpu_array_info *array); 17 | __isl_give isl_printer *gpu_array_info_print_declaration_argument( 18 | __isl_take isl_printer *p, struct gpu_array_info *array, 19 | const char *memory_space); 20 | __isl_give isl_printer *gpu_array_info_print_call_argument( 21 | __isl_take isl_printer *p, struct gpu_array_info *array); 22 | 23 | __isl_give isl_printer *ppcg_kernel_print_copy(__isl_take isl_printer *p, 24 | struct ppcg_kernel_stmt *stmt); 25 | __isl_give isl_printer *ppcg_kernel_print_domain(__isl_take isl_printer *p, 26 | struct ppcg_kernel_stmt *stmt); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/ppcg_files/opencl.h: -------------------------------------------------------------------------------- 1 | #ifndef _OPENCL_H 2 | #define _OPENCL_H 3 | 4 | #include 5 | #include "ppcg_options.h" 6 | #include "ppcg.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | int generate_opencl(isl_ctx *ctx, struct ppcg_options *options, 14 | const char *input, const char *output); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/schedule.h: -------------------------------------------------------------------------------- 1 | #ifndef _SCHEDULE_H 2 | #define _SCHEDULE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "ppcg_options.h" 10 | 11 | #ifdef __cplusplus 12 | extern "C" 13 | { 14 | #endif 15 | 16 | __isl_give isl_set *parametrization(__isl_take isl_space *space, 17 | int len, int first, __isl_keep isl_id_list *names); 18 | 19 | __isl_give isl_schedule *ppcg_compute_non_grouping_schedule( 20 | __isl_take isl_schedule_constraints *sc, struct ppcg_options *options); 21 | __isl_give isl_schedule *ppcg_compute_schedule( 22 | __isl_take isl_schedule_constraints *sc, 23 | __isl_keep isl_schedule *schedule, struct ppcg_options *options); 24 | 25 | __isl_give isl_schedule *ppcg_get_schedule(isl_ctx *ctx, 26 | struct ppcg_options *options, 27 | __isl_give isl_schedule *(*compute)(void *user), void *user); 28 | 29 | __isl_give isl_schedule_node *ppcg_set_schedule_node_type( 30 | __isl_take isl_schedule_node *node, enum isl_ast_loop_type type); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /src/tests/call.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_summary(int b[1000], int a[1000], int pos) 4 | { 5 | b[pos] = 0; 6 | int c = a[pos]; 7 | } 8 | 9 | #ifdef pencil_access 10 | __attribute__((pencil_access(copy_summary))) 11 | #endif 12 | void copy(int b[1000], int a[1000], int pos); 13 | 14 | int main() 15 | { 16 | int a[1000], b[1000]; 17 | 18 | for (int i = 0; i < 1000; ++i) 19 | a[i] = i; 20 | #pragma scop 21 | for (int i = 0; i < 1000; ++i) 22 | copy(b, a, i); 23 | #pragma endscop 24 | for (int i = 0; i < 1000; ++i) 25 | if (b[i] != a[i]) 26 | return EXIT_FAILURE; 27 | 28 | return EXIT_SUCCESS; 29 | } 30 | -------------------------------------------------------------------------------- /src/tests/call2.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_summary(int b[1000], int a[1000], int pos) 4 | { 5 | b[pos] = 0; 6 | int c = a[pos]; 7 | } 8 | 9 | #ifdef pencil_access 10 | __attribute__((pencil_access(copy_summary))) 11 | #endif 12 | void copy(int b[1000], int a[1000], int pos); 13 | 14 | int main() 15 | { 16 | int a[2][1000]; 17 | 18 | for (int i = 0; i < 1000; ++i) 19 | a[0][i] = i; 20 | #pragma scop 21 | for (int i = 0; i < 1000; ++i) 22 | copy(a[1], a[0], i); 23 | #pragma endscop 24 | for (int i = 0; i < 1000; ++i) 25 | if (a[1][i] != a[0][i]) 26 | return EXIT_FAILURE; 27 | 28 | return EXIT_SUCCESS; 29 | } 30 | -------------------------------------------------------------------------------- /src/tests/call2_opencl_functions.cl: -------------------------------------------------------------------------------- 1 | void copy(__global int b[1000], __global int a[1000], int pos) 2 | { 3 | b[pos] = a[pos]; 4 | } 5 | -------------------------------------------------------------------------------- /src/tests/call3.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_summary(int b[100], int a[100]) 4 | { 5 | for (int i = 0; i < 100; ++i) { 6 | b[i] = 0; 7 | int c = a[i]; 8 | } 9 | } 10 | 11 | #ifdef pencil_access 12 | __attribute__((pencil_access(copy_summary))) 13 | #endif 14 | void copy(int b[100], int a[100]); 15 | 16 | int main() 17 | { 18 | int A[100][100], B[100]; 19 | 20 | for (int i = 0; i < 100; ++i) 21 | B[i] = i; 22 | #pragma scop 23 | for (int i = 0; i < 100; ++i) 24 | copy(A[i], B); 25 | #pragma endscop 26 | for (int i = 0; i < 100; ++i) 27 | for (int j = 0; j < 100; ++j) 28 | if (A[j][i] != B[i]) 29 | return EXIT_FAILURE; 30 | 31 | return EXIT_SUCCESS; 32 | } 33 | -------------------------------------------------------------------------------- /src/tests/call3_opencl_functions.cl: -------------------------------------------------------------------------------- 1 | void copy(__global int b[100], __global int a[100]) 2 | { 3 | for (int i = 0; i < 100; ++i) 4 | b[i] = a[i]; 5 | } 6 | -------------------------------------------------------------------------------- /src/tests/call4.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int inline get(int a[1000], int pos) 4 | { 5 | int tmp = a[pos]; 6 | return tmp; 7 | } 8 | 9 | int main() 10 | { 11 | int a[1000], b[1000]; 12 | 13 | for (int i = 0; i < 1000; ++i) 14 | a[i] = i; 15 | #pragma scop 16 | for (int i = 0; i < 999; ++i) 17 | b[i] = get(a, i) + get(a, i + 1); 18 | #pragma endscop 19 | for (int i = 0; i < 999; ++i) 20 | if (b[i] != a[i] + a[i + 1]) 21 | return EXIT_FAILURE; 22 | 23 | return EXIT_SUCCESS; 24 | } 25 | -------------------------------------------------------------------------------- /src/tests/call5.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int inline add_one(int i) 4 | { 5 | return i + 1; 6 | } 7 | 8 | int main() 9 | { 10 | int a[1000], b[1000]; 11 | 12 | for (int i = 0; i < 1000; ++i) 13 | a[i] = i; 14 | #pragma scop 15 | for (int i = 0; i < 999; ++i) 16 | b[i] = add_one(add_one(a[i])); 17 | #pragma endscop 18 | for (int i = 0; i < 999; ++i) 19 | if (b[i] != a[i] + 2) 20 | return EXIT_FAILURE; 21 | 22 | return EXIT_SUCCESS; 23 | } 24 | -------------------------------------------------------------------------------- /src/tests/call_opencl_functions.cl: -------------------------------------------------------------------------------- 1 | void copy(__global int b[1000], __global int a[1000], int pos) 2 | { 3 | b[pos] = a[pos]; 4 | } 5 | -------------------------------------------------------------------------------- /src/tests/dead.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int a[1000], b[1000]; 6 | 7 | for (int i = 0; i < 1000; ++i) 8 | a[i] = i; 9 | #pragma scop 10 | for (int i = 0; i < 1000; ++i) { 11 | int c; 12 | int d; 13 | c = a[i]; 14 | d = c; 15 | b[i] = c; 16 | } 17 | #pragma endscop 18 | for (int i = 0; i < 1000; ++i) 19 | if (b[i] != a[i]) 20 | return EXIT_FAILURE; 21 | 22 | return EXIT_SUCCESS; 23 | } 24 | -------------------------------------------------------------------------------- /src/tests/iterator.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int i; 6 | int a[101]; 7 | 8 | i = 0; 9 | #pragma scop 10 | for (i = 0; i < 100; ++i) 11 | a[i] = i; 12 | a[i] = i; 13 | #pragma endscop 14 | if (a[100] != 100) 15 | return EXIT_FAILURE; 16 | 17 | return EXIT_SUCCESS; 18 | } 19 | -------------------------------------------------------------------------------- /src/tests/live_out.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* Check that a write access is not removed from the live-out 4 | * accesses only because a strict subset of the (potentially) 5 | * accessed elements are killed by a later write. 6 | */ 7 | int main() 8 | { 9 | int A[10]; 10 | 11 | A[1] = 0; 12 | #pragma scop 13 | int i = 1; 14 | i = i * i; 15 | A[i] = 1; 16 | A[0] = 0; 17 | #pragma endscop 18 | if (A[1] != 1) 19 | return EXIT_FAILURE; 20 | 21 | return EXIT_SUCCESS; 22 | } 23 | -------------------------------------------------------------------------------- /src/tests/local.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int A[100]; 6 | 7 | #pragma scop 8 | { 9 | int B[100]; 10 | B[0] = 0; 11 | for (int i = 1; i < 100; ++i) 12 | B[i] = B[i - 1] + 1; 13 | for (int i = 0; i < 100; ++i) 14 | A[i] = B[i]; 15 | } 16 | #pragma endscop 17 | for (int i = 0; i < 100; ++i) 18 | if (A[i] != i) 19 | return EXIT_FAILURE; 20 | 21 | return EXIT_SUCCESS; 22 | } 23 | -------------------------------------------------------------------------------- /src/tests/loop.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int a[1000], b[1000]; 6 | 7 | for (int i = 0; i < 1000; ++i) 8 | a[i] = i; 9 | #pragma scop 10 | for (int i = 0; i < 1000; ++i) 11 | b[i] = a[i]; 12 | #pragma endscop 13 | for (int i = 0; i < 1000; ++i) 14 | if (b[i] != a[i]) 15 | return EXIT_FAILURE; 16 | 17 | return EXIT_SUCCESS; 18 | } 19 | -------------------------------------------------------------------------------- /src/tests/not_accessed.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_summary(int b[1000], int a[1000], int pos, int c[1000]) 4 | { 5 | b[pos] = 0; 6 | int d = a[pos]; 7 | } 8 | 9 | #ifdef pencil_access 10 | __attribute__((pencil_access(copy_summary))) 11 | #endif 12 | void copy(int b[1000], int a[1000], int pos, int c[1000]); 13 | 14 | int main() 15 | { 16 | int a[1000], b[1000], c[1000]; 17 | 18 | for (int i = 0; i < 1000; ++i) 19 | a[i] = i; 20 | #pragma scop 21 | for (int i = 0; i < 1000; ++i) 22 | copy(b, a, i, c); 23 | #pragma endscop 24 | for (int i = 0; i < 1000; ++i) 25 | if (b[i] != a[i]) 26 | return EXIT_FAILURE; 27 | 28 | return EXIT_SUCCESS; 29 | } 30 | -------------------------------------------------------------------------------- /src/tests/not_accessed_opencl_functions.cl: -------------------------------------------------------------------------------- 1 | void copy(__global int b[1000], __global int a[1000], int pos, 2 | __global int c[1000]) 3 | { 4 | b[pos] = a[pos]; 5 | } 6 | -------------------------------------------------------------------------------- /src/tests/scalar.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int a; 6 | #pragma scop 7 | a = 1; 8 | #pragma endscop 9 | if (a != 1) 10 | return EXIT_FAILURE; 11 | 12 | return EXIT_SUCCESS; 13 | } 14 | -------------------------------------------------------------------------------- /src/tests/shared_sink.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* Check that the sources of live ranges with the same sink 4 | * are executed in order. 5 | */ 6 | int main() 7 | { 8 | int A[128]; 9 | int n = 128; 10 | 11 | A[0] = 0; 12 | #pragma scop 13 | for (int i = 0; i < n; ++i) { 14 | int set = 0; 15 | if (A[i] < 2) 16 | set = 1; 17 | if (set) 18 | A[i] = 2; 19 | } 20 | #pragma endscop 21 | if (A[0] != 2) 22 | return EXIT_FAILURE; 23 | 24 | return EXIT_SUCCESS; 25 | } 26 | -------------------------------------------------------------------------------- /src/tests/struct.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct s { 4 | int c[10][10]; 5 | }; 6 | 7 | int main() 8 | { 9 | struct s a[10][10], b[10][10]; 10 | 11 | for (int i = 0; i < 10; ++i) 12 | for (int j = 0; j < 10; ++j) 13 | for (int k = 0; k < 10; ++k) 14 | for (int l = 0; l < 10; ++l) 15 | a[i][j].c[k][l] = i + j + k + l; 16 | #pragma scop 17 | for (int i = 0; i < 10; ++i) 18 | for (int j = 0; j < 10; ++j) 19 | for (int k = 0; k < 10; ++k) 20 | for (int l = 0; l < 10; ++l) 21 | b[i][j].c[k][l] = i + j + k + l; 22 | #pragma endscop 23 | for (int i = 0; i < 10; ++i) 24 | for (int j = 0; j < 10; ++j) 25 | for (int k = 0; k < 10; ++k) 26 | for (int l = 0; l < 10; ++l) 27 | if (b[i][j].c[k][l] != a[i][j].c[k][l]) 28 | return EXIT_FAILURE; 29 | 30 | return EXIT_SUCCESS; 31 | } 32 | -------------------------------------------------------------------------------- /src/tests/struct2.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct s { 4 | int a; 5 | }; 6 | 7 | int main() 8 | { 9 | struct s a, b[10]; 10 | 11 | #pragma scop 12 | a.a = 42; 13 | for (int i = 0; i < 10; ++i) 14 | b[i].a = a.a; 15 | #pragma endscop 16 | for (int i = 0; i < 10; ++i) 17 | if (b[i].a != 42) 18 | return EXIT_FAILURE; 19 | 20 | return EXIT_SUCCESS; 21 | } 22 | -------------------------------------------------------------------------------- /src/tests/struct3.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct s { 4 | int a; 5 | int b; 6 | }; 7 | 8 | int main() 9 | { 10 | struct s a, b[10]; 11 | 12 | a.b = 57; 13 | #pragma scop 14 | a.a = 42; 15 | for (int i = 0; i < 10; ++i) 16 | b[i] = a; 17 | #pragma endscop 18 | for (int i = 0; i < 10; ++i) 19 | if (b[i].a != 42) 20 | return EXIT_FAILURE; 21 | if (a.b != 57) 22 | return EXIT_FAILURE; 23 | 24 | return EXIT_SUCCESS; 25 | } 26 | -------------------------------------------------------------------------------- /src/tests/struct4.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct s { 4 | int a; 5 | int b; 6 | }; 7 | 8 | int main() 9 | { 10 | int a[10]; 11 | 12 | for (int i = 0; i < 10; ++i) 13 | a[i] = 0; 14 | #pragma scop 15 | for (int i = 0; i < 10; ++i) { 16 | struct s b; 17 | b.a = 1; 18 | b.b = i; 19 | a[i] = b.a + b.b; 20 | } 21 | #pragma endscop 22 | for (int i = 0; i < 10; ++i) 23 | if (a[i] != 1 + i) 24 | return EXIT_FAILURE; 25 | 26 | return EXIT_SUCCESS; 27 | } 28 | -------------------------------------------------------------------------------- /src/tests/struct5.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct s { 4 | int a; 5 | int b; 6 | }; 7 | 8 | int main() 9 | { 10 | int a[10]; 11 | 12 | for (int i = 0; i < 10; ++i) 13 | a[i] = 0; 14 | #pragma scop 15 | for (int i = 0; i < 10; ++i) { 16 | struct s b[1]; 17 | b[0].a = 1; 18 | b[0].b = i; 19 | a[i] = b[0].a + b[0].b; 20 | } 21 | #pragma endscop 22 | for (int i = 0; i < 10; ++i) 23 | if (a[i] != 1 + i) 24 | return EXIT_FAILURE; 25 | 26 | return EXIT_SUCCESS; 27 | } 28 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" 11 | { 12 | #endif 13 | 14 | /* Compare the prefix of "s" to "prefix" up to the length of "prefix". 15 | */ 16 | static inline int prefixcmp(const char *s, const char *prefix) 17 | { 18 | return strncmp(s, prefix, strlen(prefix)); 19 | } 20 | 21 | __isl_give isl_multi_val *ppcg_multi_val_from_int(__isl_take isl_space *space, 22 | int val); 23 | __isl_give isl_multi_val *ppcg_multi_val_from_int_list( 24 | __isl_take isl_space *space, int *list); 25 | __isl_give isl_multi_pw_aff *ppcg_size_from_extent(__isl_take isl_set *set); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/version.c: -------------------------------------------------------------------------------- 1 | #include "gitversion.h" 2 | 3 | const char *ppcg_version(void) 4 | { 5 | return GIT_HEAD_ID"\n"; 6 | } 7 | --------------------------------------------------------------------------------