├── HLS_Data_Lib ├── or │ └── note ├── and │ └── note ├── xor │ └── note ├── br │ └── br ├── ret │ └── ret ├── getelementptr │ └── getelementptr ├── store │ └── store ├── load │ └── load ├── fcmp │ ├── fcmp │ └── overview_float ├── README ├── dcmp │ └── dcmp ├── fadd │ └── fadd ├── fsub │ └── fsub ├── fmul │ └── fmul ├── dmul │ └── dmul ├── fdiv │ └── fdiv ├── ddiv │ └── ddiv ├── dsub │ └── dsub └── dadd │ └── dadd ├── Tests ├── LLVM_exp10_HI_LoopLabeler │ ├── testcase │ │ ├── aaaa.h │ │ ├── a.out │ │ └── test.c │ ├── Build.sh │ ├── LLVM_exp10_HI_LoopLabeler.h │ └── README ├── LLVM_exp10_HI_APIntegerAnalysis │ ├── testcase │ │ ├── aaaa.h │ │ ├── a.out │ │ └── test.c │ ├── Build.sh │ ├── LLVM_exp10_HI_APIntegerAnalysis.h │ └── README ├── LLVM_exp11_HI_clang_import_test │ ├── testcase │ │ ├── aaaa.h │ │ ├── a.out │ │ └── test.c │ ├── README │ ├── Build.sh │ └── LLVM_exp11_HI_clang_import_test.h ├── LLVM_exp12b_HI_ResourceEvaluation │ ├── config.txt │ ├── README │ └── Build.sh ├── LLVM_exp12c_HI_AggressiveLSR_MUL │ ├── config.txt │ ├── Build.sh │ └── README ├── LLVM_exp13_HI_ArrayAccessPattern │ ├── config.txt │ ├── Build.sh │ └── README ├── LLVM_exp13c_HI_IR2SourceCode │ ├── config.txt │ ├── README │ └── Build.sh ├── LLVM_exp12a_HI_InstructionMoveBackward │ ├── config.txt │ ├── Build.sh │ └── README ├── LLVM_exp13b_HI_ReplaceSelectAccess │ ├── config.txt │ ├── Build.sh │ └── README ├── LLVM_exp13a_HI_FunctionInstantiation │ ├── config.txt │ ├── Build.sh │ └── README ├── LLVM_exp12_HI_NoDirectiveTimingResourceEvaluation │ ├── config.txt │ ├── Build.sh │ └── README ├── LLVM_exp13d_HI_LoopUnroll │ ├── config.txt │ ├── Build.sh │ ├── README │ └── ConfigParse.h ├── README ├── LLVM_exp2_loop_processing │ ├── README │ └── Build.sh ├── Light_HLS_Top │ ├── Build.sh │ ├── format-corret │ ├── config_2mm.txt │ ├── ciinfor │ ├── config.txt │ ├── config_conv.txt │ ├── ConfigParse.h │ └── README ├── LLVM_exp14_HI_WithDirectiveTimingResourceEvaluation │ ├── config.txt │ ├── Build.sh │ ├── ConfigParse.h │ └── README ├── LLVM_exp4_polly_info │ ├── Build.sh │ ├── README │ └── LLVM_exp4_polly_info.h ├── LLVM_exp0_find_functions │ ├── Build.sh │ ├── README │ └── LLVM_exp0_find_functions.cc ├── LLVM_exp1_dependence_list │ ├── Build.sh │ ├── README │ └── LLVM_exp1_dependence_list.cc ├── LLVM_exp5a_InstrucitonList │ ├── Build.sh │ └── README ├── LLVM_exp6_GEP_Transformation │ ├── Build.sh │ └── README ├── LLVM_exp8_VarWidthReduce │ ├── Build.sh │ └── README ├── LLVM_exp9_HI_SepConstGEP │ ├── Build.sh │ └── README ├── LLVM_exp3_loop_info_extraction │ ├── Build.sh │ ├── README │ └── LLVM_exp3_loop_info_extraction.h ├── LLVM_exp5_SimpleTimingAnalysis │ ├── Build.sh │ └── README ├── LLVM_exp7_DuplicateInstRemove │ ├── Build.sh │ └── README ├── LLVM_expAPINT_test │ ├── arb_prec_array.cpp │ └── README └── LLVM_Learner_Libs │ └── CMakeLists.txt ├── HLS_Lib_Generator ├── Constant │ ├── br │ │ └── br │ ├── ret │ │ └── ret │ └── getelementptr │ │ └── getelementptr └── instructionList ├── App ├── bin_conv │ ├── config_bin_conv.txt │ ├── ap_int.h │ └── bin_conv.h ├── 2dloop2darray_pl │ ├── inlined.bc │ ├── 2dloop2darray_pl.1.cc │ └── 2dloop2darray_pl.cc ├── 2dloop2darray_pl_apint │ ├── test │ ├── ap_test.cc │ ├── ap_int.h │ └── 2dloop2darray_pl.cc ├── README ├── 2dloop1darray_pl │ └── 2dloop1darray.c ├── extremeLoop │ └── extremeLoop.cpp ├── 2dloop1darray_notpl │ └── 2dloop1darray_notpl.c ├── Subfunctions │ └── Subfunctions.cc ├── 2dloop2darray_pl_s │ └── 2dloop2darray_pl_s.c ├── 2dloop2darray_notpl │ └── 2dloop2darrary_notpl.c ├── 2dloop2darray_pl_multest │ ├── mulorder.cc │ └── 2dloop2darray_pl.cc ├── 2dloop2darray_ifinloop │ └── 2dloop2darray_ifinloop.c ├── 2dloop2darray_pl_brAccess │ ├── 2dloop2darray_pl_brAccess_b.cc │ └── 2dloop2darray_pl_brAccess_a.cc ├── 2dloop2darray_pl_depCheck │ └── 2dloop2darray_pl.cc ├── allocation_free │ └── allocation_free.c ├── 2dloop2darray_pl_indeploop │ └── 2dloop2darray_pl.cc ├── 2dloop2darray_struct │ └── 2dloop2darray_struct.c ├── 2dloop2darray_pl_moreAccess │ └── 2dloop2darray_pl_moreAccess.cc ├── 2dloop2darray_pl_subf2 │ └── 2dloop2darray_pl_subf2.cc ├── 2dloop2darray_win_filter │ └── 2dloop2darray_pl.cc ├── 2mm │ ├── 2mm_tiny copy.cc │ ├── 2mm.cc │ ├── 2mm_tiny.cc │ └── 2mm_float.cc ├── 2dloop2darray_pl_subf │ └── 2dloop2darray_pl.cc ├── jacobi │ ├── jacobi-2d.c │ └── jacobi-2d.h ├── fdtd2d │ └── fdtd.cc ├── conv │ ├── conv_tiny.cc │ ├── conv.cc │ └── conv_tiny3.cc ├── spam-filter │ └── sgd.cpp ├── 2dloop2darray_pl_functionAccess │ └── 2dloop2darray_pl_functionAccess.cc └── deriche │ ├── deriche_int.cc │ └── deriche.cc ├── Implementations ├── HI_LoopUnroll │ ├── README │ └── CMakeLists.txt ├── HI_Mul2Shl │ ├── README │ └── CMakeLists.txt ├── HI_TopLoop2Func │ ├── README │ ├── CMakeLists.txt │ └── HI_TopLoop2Func.cc ├── HI_LoopLabeler │ ├── README │ ├── CMakeLists.txt │ └── HI_LoopLabeler.cc ├── HI_FunctionInterfaceInfo │ ├── README │ ├── CMakeLists.txt │ └── HI_FunctionInterfaceInfo.cc ├── README.md ├── HI_DuplicateInstRm │ ├── README │ └── CMakeLists.txt ├── HI_HLSDuplicateInstRm │ ├── README │ └── CMakeLists.txt ├── HI_ConstantDivisorOpt │ ├── README │ └── CMakeLists.txt ├── HI_MuxInsertionArrayPartition │ ├── README.md │ ├── Note │ ├── ClockInfo.h │ ├── HLS_Instructions_Info.txt │ ├── CMakeLists.txt │ └── ClockInfo.cc ├── HI_PragmaTargetExtraction │ ├── README.md │ ├── Note │ ├── ClockInfo.h │ ├── HLS_Instructions_Info.txt │ ├── CMakeLists.txt │ └── ClockInfo.cc ├── HI_ArraySensitiveToLoopLevel │ ├── Note │ ├── ClockInfo.h │ ├── README.md │ ├── HLS_Instructions_Info.txt │ ├── CMakeLists.txt │ └── ClockInfo.cc ├── HI_NoDirectiveTimingResourceEvaluation │ ├── Note │ ├── ClockInfo.h │ ├── HLS_Instructions_Info.txt │ ├── CMakeLists.txt │ ├── ClockInfo.cc │ ├── HI_InstructionFiles.h │ └── README.md ├── HI_WithDirectiveTimingResourceEvaluation │ ├── Note │ ├── ClockInfo.h │ ├── HLS_Instructions_Info.txt │ ├── CMakeLists.txt │ ├── ClockInfo.cc │ ├── HI_InstructionFiles.h │ └── README.md ├── HI_GEP_OffsetCombine │ ├── HLS_Instructions.txt │ ├── CMakeLists.txt │ ├── README │ └── HI_GEP_OffsetCombine.cc ├── HI_InstructionLatencyCollect │ ├── HLS_Instructions.txt │ ├── CMakeLists.txt │ └── README ├── HI_SimpleTimingEvaluation │ ├── HLS_Instructions.txt │ ├── CMakeLists.txt │ └── README ├── HI_SysExec │ ├── HI_SysExec.h │ ├── CMakeLists.txt │ └── HI_SysExec.cc ├── HI_ArrayAccessPattern │ ├── README │ └── CMakeLists.txt ├── HI_SeparateConstOffsetFromGEP │ ├── README │ └── CMakeLists.txt ├── HI_RemoveRedundantAccess │ ├── README │ └── CMakeLists.txt ├── HI_print │ ├── CMakeLists.txt │ ├── HI_print.h │ └── HI_print.cc ├── HI_LoadALAP │ ├── README │ └── CMakeLists.txt ├── HI_FindFunctions │ ├── README │ ├── CMakeLists.txt │ └── HI_FindFunction.cc ├── HI_Polly_Info │ ├── CMakeLists.txt │ ├── README │ ├── HI_Polly_Info.cc │ └── HI_Polly_Info.h ├── HI_MulOrderOpt │ ├── CMakeLists.txt │ └── README ├── HI_IR2SourceCode │ ├── CMakeLists.txt │ └── README ├── HI_ReplaceSelectAccess │ ├── README │ └── CMakeLists.txt ├── HI_StringProcess │ ├── CMakeLists.txt │ └── HI_StringProcess.h ├── HI_DependenceList │ ├── CMakeLists.txt │ └── README ├── HI_VarWidthReduce │ ├── CMakeLists.txt │ └── README ├── HI_APIntSrcAnalysis │ ├── CMakeLists.txt │ ├── README │ └── HI_APIntSrcAnalysis.cc ├── HI_AggressiveLSR_MUL │ ├── CMakeLists.txt │ └── README ├── HI_FunctionInstantiation │ ├── CMakeLists.txt │ └── README ├── HI_LoopDependenceAnalysis │ └── CMakeLists.txt ├── HI_LoopInFormationCollect │ ├── CMakeLists.txt │ └── README ├── HI_IntstructionMoveBackward │ ├── CMakeLists.txt │ └── README └── CMakeLists.txt ├── .vscode └── settings.json ├── LLVM_IR_Apps ├── 2dloop2darrary_pl │ └── top.bc └── OpticalFlow │ ├── optical_flow.bc │ └── optical_flow.g.bc ├── .github └── ISSUE_TEMPLATE │ ├── custom.md │ ├── feature_request.md │ └── bug_report.md ├── CleanBuiltFiles.sh ├── BuildLibs.sh ├── BuildAllFiles.sh ├── TestBuiltFiles.sh └── Patch_for_LLVM └── README /HLS_Data_Lib/or/note: -------------------------------------------------------------------------------- 1 | LUT = bit + 7 2 | -------------------------------------------------------------------------------- /HLS_Data_Lib/and/note: -------------------------------------------------------------------------------- 1 | LUT = bit + 7 2 | -------------------------------------------------------------------------------- /HLS_Data_Lib/xor/note: -------------------------------------------------------------------------------- 1 | LUT = bit + 7 2 | -------------------------------------------------------------------------------- /HLS_Data_Lib/br/br: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 -1 0 1 --- -------------------------------------------------------------------------------- /HLS_Data_Lib/ret/ret: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 -1 0 1 --- -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_LoopLabeler/testcase/aaaa.h: -------------------------------------------------------------------------------- 1 | int a = 1; -------------------------------------------------------------------------------- /HLS_Lib_Generator/Constant/br/br: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 -1 0 1 --- -------------------------------------------------------------------------------- /HLS_Lib_Generator/Constant/ret/ret: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 -1 0 1 --- -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_APIntegerAnalysis/testcase/aaaa.h: -------------------------------------------------------------------------------- 1 | int a = 1; -------------------------------------------------------------------------------- /Tests/LLVM_exp11_HI_clang_import_test/testcase/aaaa.h: -------------------------------------------------------------------------------- 1 | int a = 1; -------------------------------------------------------------------------------- /HLS_Data_Lib/getelementptr/getelementptr: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 0 0 1 --- -------------------------------------------------------------------------------- /App/bin_conv/config_bin_conv.txt: -------------------------------------------------------------------------------- 1 | clock = 5 2 | HLS_lib_path = ../../../HLS_Data_Lib/ -------------------------------------------------------------------------------- /HLS_Lib_Generator/Constant/getelementptr/getelementptr: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 0 0 1 --- -------------------------------------------------------------------------------- /Implementations/HI_LoopUnroll/README: -------------------------------------------------------------------------------- 1 | The HI_LoopUnroll pass is used to unroll specific loop -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "*.inc": "cpp" 4 | } 5 | } -------------------------------------------------------------------------------- /Tests/LLVM_exp12b_HI_ResourceEvaluation/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ -------------------------------------------------------------------------------- /Tests/LLVM_exp12c_HI_AggressiveLSR_MUL/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ -------------------------------------------------------------------------------- /Tests/LLVM_exp13_HI_ArrayAccessPattern/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ -------------------------------------------------------------------------------- /Tests/LLVM_exp13c_HI_IR2SourceCode/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | -------------------------------------------------------------------------------- /Tests/LLVM_exp12a_HI_InstructionMoveBackward/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ -------------------------------------------------------------------------------- /Tests/LLVM_exp13b_HI_ReplaceSelectAccess/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | -------------------------------------------------------------------------------- /Implementations/HI_Mul2Shl/README: -------------------------------------------------------------------------------- 1 | HI_Mul2Shl will try to transform Multiplication with Constant into (2^x +- 2^y) -------------------------------------------------------------------------------- /Implementations/HI_TopLoop2Func/README: -------------------------------------------------------------------------------- 1 | The HI_IR2SourceCode pass is used to map the IRs back to the source code. -------------------------------------------------------------------------------- /Tests/LLVM_exp13a_HI_FunctionInstantiation/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | -------------------------------------------------------------------------------- /Tests/LLVM_exp13c_HI_IR2SourceCode/README: -------------------------------------------------------------------------------- 1 | The HI_IR2SourceCode pass is used to map the IRs back to the source code. -------------------------------------------------------------------------------- /Tests/LLVM_exp12_HI_NoDirectiveTimingResourceEvaluation/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10.0 2 | HLS_lib_path = ../../../HLS_Data_Lib/ -------------------------------------------------------------------------------- /App/2dloop2darray_pl/inlined.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/App/2dloop2darray_pl/inlined.bc -------------------------------------------------------------------------------- /App/2dloop2darray_pl_apint/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/App/2dloop2darray_pl_apint/test -------------------------------------------------------------------------------- /Tests/LLVM_exp13d_HI_LoopUnroll/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | loop_unroll label=loop1 factor=2 -------------------------------------------------------------------------------- /Implementations/HI_LoopLabeler/README: -------------------------------------------------------------------------------- 1 | HI_LoopLabeler is used to assign labels to each loop so it will be easier to mapping pragmas 2 | -------------------------------------------------------------------------------- /App/README: -------------------------------------------------------------------------------- 1 | This directory stores all the source codes, which can be analyzed and processed by the passes. These codes are the input of tests. -------------------------------------------------------------------------------- /Implementations/HI_FunctionInterfaceInfo/README: -------------------------------------------------------------------------------- 1 | HI_LoopLabeler is used to assign labels to each loop so it will be easier to mapping pragmas 2 | -------------------------------------------------------------------------------- /LLVM_IR_Apps/2dloop2darrary_pl/top.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/LLVM_IR_Apps/2dloop2darrary_pl/top.bc -------------------------------------------------------------------------------- /Implementations/README.md: -------------------------------------------------------------------------------- 1 | This directory stores all the implementation of the Passes in the blog, which are located in their own directories respectively. -------------------------------------------------------------------------------- /LLVM_IR_Apps/OpticalFlow/optical_flow.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/LLVM_IR_Apps/OpticalFlow/optical_flow.bc -------------------------------------------------------------------------------- /Implementations/HI_DuplicateInstRm/README: -------------------------------------------------------------------------------- 1 | The HI_DuplicateInstRm pass is used to remove the duplicated instruction after the SeparateConstOffsetFromGEP pass. -------------------------------------------------------------------------------- /LLVM_IR_Apps/OpticalFlow/optical_flow.g.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/LLVM_IR_Apps/OpticalFlow/optical_flow.g.bc -------------------------------------------------------------------------------- /Implementations/HI_HLSDuplicateInstRm/README: -------------------------------------------------------------------------------- 1 | The HI_DuplicateInstRm pass is used to remove the duplicated instruction after the SeparateConstOffsetFromGEP pass. -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_LoopLabeler/testcase/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/Tests/LLVM_exp10_HI_LoopLabeler/testcase/a.out -------------------------------------------------------------------------------- /Tests/LLVM_exp11_HI_clang_import_test/README: -------------------------------------------------------------------------------- 1 | LLVM_exp11_HI_clang_import_test is used to test the official clang import-test, showing how clang interface work. 2 | -------------------------------------------------------------------------------- /Implementations/HI_ConstantDivisorOpt/README: -------------------------------------------------------------------------------- 1 | HI_ConstantDivisorOpt will try to transform the division with constant divisor into mul/add/shr by using magic number 2 | 3 | -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_APIntegerAnalysis/testcase/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/Tests/LLVM_exp10_HI_APIntegerAnalysis/testcase/a.out -------------------------------------------------------------------------------- /Tests/LLVM_exp11_HI_clang_import_test/testcase/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zslwyuan/LLVM-9.0-Learner-Tutorial/HEAD/Tests/LLVM_exp11_HI_clang_import_test/testcase/a.out -------------------------------------------------------------------------------- /Implementations/HI_MuxInsertionArrayPartition/README.md: -------------------------------------------------------------------------------- 1 | HI_MuxInsertionArrayPartition is used to add array mux for specific array load instructions which access multiple partitions. -------------------------------------------------------------------------------- /Implementations/HI_PragmaTargetExtraction/README.md: -------------------------------------------------------------------------------- 1 | HI_PragmaTargetExtraction is used to detect thost targets, e.g. loops and arrays, which can be set HLS directives, and the relationship between them. -------------------------------------------------------------------------------- /App/2dloop1darray_pl/2dloop1darray.c: -------------------------------------------------------------------------------- 1 | void f ( int *A) { 2 | int N = 100; 3 | int M = 50; 4 | for ( int j = 0; j < N; j++ ) 5 | for ( int i = 0; i < M; i++ ) 6 | A[i + 8] = A[i] + 1; 7 | } 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /Implementations/HI_PragmaTargetExtraction/Note: -------------------------------------------------------------------------------- 1 | Be careful!!! 2 | There could be "llvm.xxxx" functions called in the llvm IR, which should be filtered, otherwise, some passes (like ScalarEvolutionWrapperPass) will go wrong!!! -------------------------------------------------------------------------------- /App/extremeLoop/extremeLoop.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | void extremeLoop(int A[6], int B[8]) 4 | { 5 | int i, j, k; 6 | for (i=1;i<6;i++) 7 | A[i] = A[i-1] + 1; 8 | for (i=1;i<8;i++) 9 | B[i] = B[i-1] * 13; 10 | } 11 | -------------------------------------------------------------------------------- /Implementations/HI_ArraySensitiveToLoopLevel/Note: -------------------------------------------------------------------------------- 1 | Be careful!!! 2 | There could be "llvm.xxxx" functions called in the llvm IR, which should be filtered, otherwise, some passes (like ScalarEvolutionWrapperPass) will go wrong!!! -------------------------------------------------------------------------------- /Implementations/HI_MuxInsertionArrayPartition/Note: -------------------------------------------------------------------------------- 1 | Be careful!!! 2 | There could be "llvm.xxxx" functions called in the llvm IR, which should be filtered, otherwise, some passes (like ScalarEvolutionWrapperPass) will go wrong!!! -------------------------------------------------------------------------------- /App/2dloop2darray_pl_apint/ap_test.cc: -------------------------------------------------------------------------------- 1 | #include "ap_int.h" 2 | 3 | int f(ap_int<317> a, ap_int<17> b) 4 | { 5 | 6 | a = a + 1; 7 | b = b + a; 8 | a = a + partSelect(b, 4, 1) + 1; 9 | 10 | return a; 11 | } 12 | -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/Note: -------------------------------------------------------------------------------- 1 | Be careful!!! 2 | There could be "llvm.xxxx" functions called in the llvm IR, which should be filtered, otherwise, some passes (like ScalarEvolutionWrapperPass) will go wrong!!! -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/Note: -------------------------------------------------------------------------------- 1 | Be careful!!! 2 | There could be "llvm.xxxx" functions called in the llvm IR, which should be filtered, otherwise, some passes (like ScalarEvolutionWrapperPass) will go wrong!!! -------------------------------------------------------------------------------- /Tests/LLVM_exp12b_HI_ResourceEvaluation/README: -------------------------------------------------------------------------------- 1 | The LLVM_exp12b_HI_ResourceEvaluation pass is tested by LLVM_exp12b_HI_ResourceEvaluation. 2 | 3 | In the experiment, the resource costs of the design are considered. This is just an extension of experiment#12. -------------------------------------------------------------------------------- /HLS_Lib_Generator/instructionList: -------------------------------------------------------------------------------- 1 | add sub 2 | mul 3 | sdiv srem udiv urem 4 | and or xor 5 | ashr 6 | lshr 7 | shl 8 | 9 | dadd dsub 10 | dmul 11 | ddiv 12 | dcmp 13 | fadd fsub 14 | fmul 15 | fdiv 16 | fcmp 17 | 18 | br 19 | ret 20 | getelementptr 21 | -------------------------------------------------------------------------------- /Tests/README: -------------------------------------------------------------------------------- 1 | This directory stores all the tests, which have their main functions and can generate standalone executable. 2 | 3 | Please note that the Passes in the Implementaions directory will be first compiled into libraries in LLVM_Learner_Libs, for later linking. -------------------------------------------------------------------------------- /Implementations/HI_ArraySensitiveToLoopLevel/ClockInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_ClockInfo 2 | #define _HI_ClockInfo 3 | 4 | #include 5 | 6 | const int clockNum = 15; 7 | 8 | extern const std::string clockStrs[100]; 9 | extern const float clockPeriod[100]; 10 | 11 | #endif -------------------------------------------------------------------------------- /Implementations/HI_MuxInsertionArrayPartition/ClockInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_ClockInfo 2 | #define _HI_ClockInfo 3 | 4 | #include 5 | 6 | const int clockNum = 15; 7 | 8 | extern const std::string clockStrs[100]; 9 | extern const float clockPeriod[100]; 10 | 11 | #endif -------------------------------------------------------------------------------- /Implementations/HI_PragmaTargetExtraction/ClockInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_ClockInfo 2 | #define _HI_ClockInfo 3 | 4 | #include 5 | 6 | const int clockNum = 15; 7 | 8 | extern const std::string clockStrs[100]; 9 | extern const float clockPeriod[100]; 10 | 11 | #endif -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/ClockInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_ClockInfo 2 | #define _HI_ClockInfo 3 | 4 | #include 5 | 6 | const int clockNum = 15; 7 | 8 | extern const std::string clockStrs[100]; 9 | extern const float clockPeriod[100]; 10 | 11 | #endif -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_LoopLabeler/testcase/test.c: -------------------------------------------------------------------------------- 1 | #include "aaaa.h" 2 | #include 3 | 4 | 5 | void do_math(int *x) { 6 | *x += 5; 7 | } 8 | 9 | int main(void) { 10 | int result = -1, val = 4; 11 | do_math(&val); 12 | printf("%d\n",a); 13 | return result; 14 | } -------------------------------------------------------------------------------- /Tests/LLVM_exp2_loop_processing/README: -------------------------------------------------------------------------------- 1 | LLVM_exp2_loop_processing is to show how to call some Passes (e.g. LoopSimplify) properly 2 | since it seems that Passes are defined in different way. 3 | 4 | The test can be run with the following command: 5 | 6 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/ClockInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_ClockInfo 2 | #define _HI_ClockInfo 3 | 4 | #include 5 | 6 | const int clockNum = 15; 7 | 8 | extern const std::string clockStrs[100]; 9 | extern const float clockPeriod[100]; 10 | 11 | #endif -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_APIntegerAnalysis/testcase/test.c: -------------------------------------------------------------------------------- 1 | #include "aaaa.h" 2 | #include 3 | 4 | 5 | void do_math(int *x) { 6 | *x += 5; 7 | } 8 | 9 | int main(void) { 10 | int result = -1, val = 4; 11 | do_math(&val); 12 | printf("%d\n",a); 13 | return result; 14 | } -------------------------------------------------------------------------------- /Tests/LLVM_exp11_HI_clang_import_test/testcase/test.c: -------------------------------------------------------------------------------- 1 | #include "aaaa.h" 2 | #include 3 | 4 | 5 | void do_math(int *x) { 6 | *x += 5; 7 | } 8 | 9 | int main(void) { 10 | int result = -1, val = 4; 11 | do_math(&val); 12 | printf("%d\n",a); 13 | return result; 14 | } -------------------------------------------------------------------------------- /Implementations/HI_GEP_OffsetCombine/HLS_Instructions.txt: -------------------------------------------------------------------------------- 1 | call 2 | ret 3 | br 4 | phi 5 | getelementptr 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 23 | -------------------------------------------------------------------------------- /Tests/LLVM_exp14_HI_WithDirectiveTimingResourceEvaluation/config.txt: -------------------------------------------------------------------------------- 1 | clock = 10 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | array_partition variable=A dim=2 factor=4 scope=f cyclic 4 | loop_unroll label=Loop_f_2 factor=4 5 | loop_unroll label=Loop_subf_2 factor=4 6 | loop_pipeline label=Loop_f_2 II=1 -------------------------------------------------------------------------------- /Tests/LLVM_exp4_polly_info/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Implementations/HI_InstructionLatencyCollect/HLS_Instructions.txt: -------------------------------------------------------------------------------- 1 | call 2 | ret 3 | br 4 | phi 5 | getelementptr 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 23 | -------------------------------------------------------------------------------- /Implementations/HI_SimpleTimingEvaluation/HLS_Instructions.txt: -------------------------------------------------------------------------------- 1 | call 2 | ret 3 | br 4 | phi 5 | getelementptr 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 23 | -------------------------------------------------------------------------------- /Tests/LLVM_exp0_find_functions/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_LoopLabeler/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp13c_HI_IR2SourceCode/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp13d_HI_LoopUnroll/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp1_dependence_list/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp2_loop_processing/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp5a_InstrucitonList/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp6_GEP_Transformation/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp8_VarWidthReduce/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp9_HI_SepConstGEP/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_APIntegerAnalysis/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp11_HI_clang_import_test/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp12b_HI_ResourceEvaluation/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp12c_HI_AggressiveLSR_MUL/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp13_HI_ArrayAccessPattern/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp3_loop_info_extraction/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp5_SimpleTimingAnalysis/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp7_DuplicateInstRemove/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp12a_HI_InstructionMoveBackward/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp13a_HI_FunctionInstantiation/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp13b_HI_ReplaceSelectAccess/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Implementations/HI_SysExec/HI_SysExec.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_SYSEXEC 2 | #define _HI_SYSEXEC 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | bool sysexec(const char *cmd); 13 | 14 | #endif -------------------------------------------------------------------------------- /Tests/LLVM_exp13d_HI_LoopUnroll/README: -------------------------------------------------------------------------------- 1 | The HI_IR2SourceCode pass is used to map the IRs back to the source code. 2 | In the application source, you should add label for the loop which you want to unroll 3 | and define the unrolling in the configuration file in the following format: 4 | 5 | loop_unroll label=XXXX factor=XXX -------------------------------------------------------------------------------- /Implementations/HI_ArraySensitiveToLoopLevel/README.md: -------------------------------------------------------------------------------- 1 | HI_ArraySensitiveToLoopLevel pass is used to check that whether some loops 2 | are strongly sensitive to the partitioning of some dimensions of some arrays. 3 | 4 | Find them and when we unroll some of the loops, we will also partition the specific 5 | dimension of the array. -------------------------------------------------------------------------------- /Tests/LLVM_exp12_HI_NoDirectiveTimingResourceEvaluation/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /Tests/LLVM_exp14_HI_WithDirectiveTimingResourceEvaluation/Build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | if [ ! -d ./build ] 7 | then 8 | mkdir build 9 | fi 10 | 11 | cd build 12 | rm -rf * 13 | cmake .. 14 | make -j4 15 | cd .. -------------------------------------------------------------------------------- /App/2dloop1darray_notpl/2dloop1darray_notpl.c: -------------------------------------------------------------------------------- 1 | void f ( int *A) { 2 | int N = 100; 3 | int M = 50; 4 | for ( int j = 0; j < N; j++ ) 5 | { 6 | A[3] = 6; 7 | if (A[4] ==8) 8 | { 9 | A[6] = A[9] + 3; 10 | } 11 | 12 | for ( int i = 0; i < M; i++ ) 13 | A[i + 8] = A[i] + 1; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/HLS_Instructions_Info.txt: -------------------------------------------------------------------------------- 1 | call -1 2 | ret -1 3 | br -1 4 | phi -1 5 | getelementptr 0 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 0 23 | -------------------------------------------------------------------------------- /Tests/LLVM_exp7_DuplicateInstRemove/README: -------------------------------------------------------------------------------- 1 | LLVM_exp7_DuplicateInstRemove is used to test the pass HI_DuplicateInstRm 2 | 3 | The pass is a transformation pass and remove the duplicated instructions in a block. 4 | 5 | 6 | The test can be run with the following command: 7 | 8 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/format-corret: -------------------------------------------------------------------------------- 1 | clang-format ./bin_conv.cc -style="{BreakBeforeBraces: Allman ,BinPackParameters: true,IndentWidth: 4,TabWidth: 4,ColumnLimit: 10000,AllowShortBlocksOnASingleLine: false,AllowShortFunctionsOnASingleLine: false,AllowShortIfStatementsOnASingleLine: false ,AllowShortLoopsOnASingleLine: false }" > test.cc -------------------------------------------------------------------------------- /Implementations/HI_ArrayAccessPattern/README: -------------------------------------------------------------------------------- 1 | The HI_ArrayAccessPattern pass is used to analyze the access pattern of 2 | arrays in the source code Here, I assume that for each iteration and 3 | specific array, the access pattern are the same. 4 | 5 | Please note that this pass is not suitable for program with multiple 6 | function!!! However, when I merge this pass into those evaluation -------------------------------------------------------------------------------- /Tests/LLVM_exp13_HI_ArrayAccessPattern/README: -------------------------------------------------------------------------------- 1 | The HI_ArrayAccessPattern pass is used to analyze the access pattern of 2 | arrays in the source code Here, I assume that for each iteration and 3 | specific array, the access pattern are the same. 4 | 5 | Please note that this pass is not suitable for program with multiple 6 | function!!! However, when I merge this pass into those evaluation -------------------------------------------------------------------------------- /App/Subfunctions/Subfunctions.cc: -------------------------------------------------------------------------------- 1 | 2 | 3 | void f2(int A[50][100]) 4 | { 5 | int N = 100; 6 | int M = 50; 7 | for (int j = 3; j < N; j++) 8 | for (int i = 1; i < M; i++) 9 | A[i][j] = A[i - 1][j] + A[i][j - 2] + A[i][j - 3]; 10 | return; 11 | } 12 | 13 | void f1(int A[50][100], int B[50][100]) 14 | { 15 | f2(B); 16 | f2(A); 17 | return; 18 | } -------------------------------------------------------------------------------- /Implementations/HI_SeparateConstOffsetFromGEP/README: -------------------------------------------------------------------------------- 1 | HI_SeparateConstOffsetFromGEP pass is tested by LLVM_exp9_HI_SepConstGEP 2 | 3 | The pass is modified from the SeparateConstOffsetFromGEP pass, but in this pass, we lower the GEP considering that in HLS, arrays are not aligned by bytes (like DDR), but elements. 4 | 5 | Therefore, we do not need to consider how wide (how many bytes/bits) a element is. -------------------------------------------------------------------------------- /CleanBuiltFiles.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | for buildfile in `find -name build` 7 | do 8 | echo "removing files in $buildfile/* " 9 | rm $buildfile/* -rf 10 | done 11 | 12 | for archivefile in `find -name *.a` 13 | do 14 | echo "removing file $archivefile" 15 | rm $archivefile 16 | done 17 | -------------------------------------------------------------------------------- /Implementations/HI_RemoveRedundantAccess/README: -------------------------------------------------------------------------------- 1 | The HI_RemoveRedundantAccess pass is used to remove redundant load by checking RAW and forwarding the data from previous store. 2 | 3 | for (i=1;i<101;i++) 4 | A[i][j] = A[i-1][j] + 1; 1 R + 1 W 5 | 6 | unroll 2 ===> 7 | 8 | A[i][j] = A[i-1][j] + 1; 9 | A[i+1][j] = A[i][j] + 1; 10 | 11 | the stored data A[i][j] can be forwarded and a redundant load can be removed. -------------------------------------------------------------------------------- /App/2dloop2darray_pl_s/2dloop2darray_pl_s.c: -------------------------------------------------------------------------------- 1 | void f ( int A[5000]) { 2 | int N = 100; 3 | int M = 50; 4 | for ( int j = 1; j < N; j++ ) 5 | for ( int i = 0; i < M; i++ ) 6 | A[i*100+j] = A[(i-1)*100+j-1] + A[i*100+j-1] + A[(i-1)*100+j] + 1; 7 | for ( int j = 1; j < N; j++ ) 8 | for ( int i = 0; i < M; i++ ) 9 | A[i*100+j] = A[(i-1)*100+j-1] + A[i*100+j-1] + A[(i-1)*100+j] + 1; 10 | return; 11 | } 12 | -------------------------------------------------------------------------------- /Implementations/HI_PragmaTargetExtraction/HLS_Instructions_Info.txt: -------------------------------------------------------------------------------- 1 | call -1 2 | ret -1 3 | br -1 4 | phi -1 5 | getelementptr 0 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 0 23 | 24 | 25 | consider partition mux 26 | // 2 27 | // 4 1.95 28 | // 8 2 29 | // 16 2.19 30 | // 32 2.73 31 | // 64 3.35 32 | -------------------------------------------------------------------------------- /Implementations/HI_ArraySensitiveToLoopLevel/HLS_Instructions_Info.txt: -------------------------------------------------------------------------------- 1 | call -1 2 | ret -1 3 | br -1 4 | phi -1 5 | getelementptr 0 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 0 23 | 24 | 25 | consider partition mux 26 | // 2 27 | // 4 1.95 28 | // 8 2 29 | // 16 2.19 30 | // 32 2.73 31 | // 64 3.35 32 | -------------------------------------------------------------------------------- /Implementations/HI_MuxInsertionArrayPartition/HLS_Instructions_Info.txt: -------------------------------------------------------------------------------- 1 | call -1 2 | ret -1 3 | br -1 4 | phi -1 5 | getelementptr 0 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 0 23 | 24 | 25 | consider partition mux 26 | // 2 27 | // 4 1.95 28 | // 8 2 29 | // 16 2.19 30 | // 32 2.73 31 | // 64 3.35 32 | -------------------------------------------------------------------------------- /Implementations/HI_print/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | find_package(LLVM REQUIRED CONFIG) 5 | # Needed to use support library 6 | 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 8 | 9 | include_directories(${LLVM_INCLUDE_DIRS}) 10 | add_definitions(${LLVM_DEFINITIONS}) 11 | 12 | aux_source_directory(. DIR_HI_PRINT_SRCS) 13 | add_library(HI_print ${DIR_HI_PRINT_SRCS}) 14 | 15 | 16 | -------------------------------------------------------------------------------- /Implementations/HI_LoadALAP/README: -------------------------------------------------------------------------------- 1 | HI_LoadALAP will try to reorganize the calculation order of a sequence of additions or multiplications 2 | to make a load can be occurred as late as possible. 3 | 4 | example: 5 | 6 | a = load 7 | b1 = a + b0 8 | b2 = c1 + b1 9 | 10 | 11 | 12 | after transform: 13 | (condition: b1,b2 has only one user) 14 | (condition: c1 is not a load instruction) 15 | 16 | a = load 17 | b1 = c1 + b0 18 | b2 = a + b1 19 | 20 | -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/HLS_Instructions_Info.txt: -------------------------------------------------------------------------------- 1 | call -1 2 | ret -1 3 | br -1 4 | phi -1 5 | getelementptr 0 6 | icmp 7 | alloca 8 | store 9 | load 10 | sext 11 | add 12 | sitofp 13 | fmul 14 | fadd 15 | fdiv 16 | sub 17 | bitcast 18 | zext 19 | unreachable 20 | fsub 21 | uitofp 22 | trunc 0 23 | 24 | 25 | consider partition mux 26 | // 2 27 | // 4 1.95 28 | // 8 2 29 | // 16 2.19 30 | // 32 2.73 31 | // 64 3.35 32 | -------------------------------------------------------------------------------- /App/bin_conv/ap_int.h: -------------------------------------------------------------------------------- 1 | #ifndef _APINT 2 | #define _APINT 3 | #define NULL __null 4 | 5 | template using ap_uint = __attribute__((__ap_int(Bits))) unsigned; 6 | 7 | template using ap_int = __attribute__((__ap_int(Bits))) int; 8 | 9 | template 10 | inline __attribute__((always_inline)) F partSelect(F input, const int l, const int r) 11 | { 12 | return ((input) & ((1 << (l + 1)) - 1)) >> r; 13 | } 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /App/2dloop2darray_notpl/2dloop2darrary_notpl.c: -------------------------------------------------------------------------------- 1 | void f ( int A[50][100]) { 2 | int N = 100; 3 | int M = 50; 4 | for ( int j = 1; j < N; j++ ) 5 | { 6 | if (j%2) 7 | { 8 | for ( int i = 0; i < M; i++ ) 9 | A[i][j] = A[i-1][j-1] + A[i][j-1] + A[i-1][j] + 1; 10 | } 11 | else 12 | { 13 | for ( int i = 0; i < M; i++ ) 14 | A[i][j] = A[i-1][j-1] + A[i][j-1] + A[i-1][j] + 2; 15 | } 16 | 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_apint/ap_int.h: -------------------------------------------------------------------------------- 1 | #ifndef _APINT 2 | #define _APINT 3 | #define NULL __null 4 | 5 | template using ap_uint = __attribute__((__ap_int(Bits))) unsigned; 6 | 7 | template using ap_int = __attribute__((__ap_int(Bits))) int; 8 | 9 | template 10 | inline __attribute__((always_inline)) F partSelect(F input, const int l, const int r) 11 | { 12 | return ((input) & ((1 << (l + 1)) - 1)) >> r; 13 | } 14 | 15 | #endif -------------------------------------------------------------------------------- /App/2dloop2darray_pl_multest/mulorder.cc: -------------------------------------------------------------------------------- 1 | void f(int A[56][100]) 2 | { 3 | int N = 100; 4 | int M = 56; 5 | for (int j = 1; j < N; j++) 6 | { 7 | for (int i = 1; i < M + 1; i++) 8 | { 9 | A[i][j] = (A[i - 1][j - 1] * A[i - 1][j - 1]) * 10 | (A[i - 1][j] * A[i - 1][j] * A[i - 1][j] * A[i - 1][j] * A[i - 1][j]) + 11 | 1; 12 | } 13 | } 14 | return; 15 | } 16 | -------------------------------------------------------------------------------- /App/2dloop2darray_ifinloop/2dloop2darray_ifinloop.c: -------------------------------------------------------------------------------- 1 | void f ( int A[50][100]) { 2 | int N = 100; 3 | int M = 50; 4 | for ( int j = 1; j < N; j++ ) 5 | { 6 | for ( int i = 0; i < M; i++ ) 7 | { 8 | if (i%2) 9 | { 10 | A[i][j] = A[i-1][j-1] + A[i][j-1] + A[i-1][j] + 2; 11 | } 12 | else 13 | { 14 | A[i][j] = A[i-1][j-1] + 1; 15 | } 16 | 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_multest/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | void f(int A[56][100]) 2 | { 3 | int N = 100; 4 | int M = 56; 5 | for (int j = 1; j < N; j++) 6 | { 7 | for (int i = 1; i < M + 1; i++) 8 | { 9 | // A[i][j] = (A[i-1][j-1] * A[i-1][j-1]) * (A[i-1][j] * A[i-1][j] * A[i-1][j] * 10 | // A[i-1][j]* A[i-1][j]) + 1; 11 | A[i][j] = A[i - 1][j - 1] * (132); 12 | } 13 | } 14 | return; 15 | } 16 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_brAccess/2dloop2darray_pl_brAccess_b.cc: -------------------------------------------------------------------------------- 1 | void f(int A[100][50][321][456]) 2 | { 3 | int N = 50; 4 | int M = 100; 5 | int L = 321; 6 | int P = 456; 7 | for (int i = 1; i < M; i++) 8 | for (int j = 1; j < N; j++) 9 | for (int k = 1; k < L; k++) 10 | for (int q = 1; q < P; q++) 11 | { 12 | 13 | A[i][j][k][q] = A[i][j][k][q] + 1; 14 | } 15 | return; 16 | } 17 | -------------------------------------------------------------------------------- /BuildLibs.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | cd Tests 7 | 8 | 9 | for folder in `ls` 10 | do 11 | if [ -d ./$folder ] 12 | then 13 | cd $folder 14 | if [ ! -d ./build ] 15 | then 16 | mkdir build 17 | fi 18 | cd .. 19 | fi 20 | done 21 | 22 | cd LLVM_Learner_Libs/build 23 | cmake .. 24 | make -j4 25 | cp `find -name *.a` ../ 26 | -------------------------------------------------------------------------------- /Implementations/HI_LoadALAP/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_LoadALAP_SRCS) 14 | add_library(HI_LoadALAP ${DIR_HI_LoadALAP_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_Mul2Shl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_Mul2Shl_SRCS) 14 | add_library(HI_Mul2Shl ${DIR_HI_Mul2Shl_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_SysExec/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_SysExec_SRCS) 14 | add_library(HI_SysExec ${DIR_HI_SysExec_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_FindFunctions/README: -------------------------------------------------------------------------------- 1 | The HI_FindFunction pass is mainlyed tested by LLVM_exp0_find_functions 2 | 3 | HI_FindFunction Pass is just a HelloWorld-level pass, showing the template of the construction of Pass in LLVM. Detailed explanation can be found in the source code's comments. 4 | 5 | 6 | The following line is used to demangle a mangled function name in IR: e.g. _ZN6ap_intILi271EEC2Ei ==> ap_int<271>::ap_int(int) 7 | 8 | (abi::__cxa_demangle(functioname.c_str(), NULL, &size, &status)); -------------------------------------------------------------------------------- /Implementations/HI_LoopUnroll/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_LoopUnroll_SRCS) 14 | add_library(HI_LoopUnroll ${DIR_HI_LoopUnroll_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_Polly_Info/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_Polly_Info_SRCS) 14 | add_library(HI_Polly_Info ${DIR_HI_Polly_Info_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_depCheck/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | void f(int A[56][100]) 2 | { 3 | int N = 100; 4 | int M = 56; 5 | for (int j = 1; j < N; j++) 6 | { 7 | for (int i = 1; i < M + 1; i++) 8 | { 9 | // A[i][j] = (A[i-1][j-1] * A[i-1][j-1]) * (A[i-1][j] * A[i-1][j] * A[i-1][j] * 10 | // A[i-1][j]* A[i-1][j]) + 1; 11 | A[i][j] = (A[i - 1][j - 1] << 1) + (A[i - 1][j] << 2) + A[i - 1][j] + 1; 12 | } 13 | } 14 | return; 15 | } 16 | -------------------------------------------------------------------------------- /HLS_Data_Lib/store/store: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 0 3.25 1 --- 2 | -1 -1 -1 4 0 0 0 0 3.25 1 --- 3 | -1 -1 -1 5 0 0 0 0 3.25 1 --- 4 | -1 -1 -1 6 0 0 0 0 3.25 1 --- 5 | -1 -1 -1 7 0 0 0 0 3.25 1 --- 6 | -1 -1 -1 8 0 0 0 0 3.25 1 --- 7 | -1 -1 -1 10 0 0 0 0 3.25 1 --- 8 | -1 -1 -1 12.5 0 0 0 0 3.25 1 --- 9 | -1 -1 -1 15 0 0 0 0 3.25 1 --- 10 | -1 -1 -1 16 0 0 0 0 3.25 1 --- 11 | -1 -1 -1 17.5 0 0 0 0 3.25 1 --- 12 | -1 -1 -1 20 0 0 0 0 3.25 1 --- 13 | -1 -1 -1 25 0 0 0 0 3.25 1 --- 14 | -1 -1 -1 30 0 0 0 0 3.25 1 --- -------------------------------------------------------------------------------- /Implementations/HI_LoopLabeler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_LoopLabeler_SRCS) 14 | add_library(HI_LoopLabeler ${DIR_HI_LoopLabeler_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_MulOrderOpt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_MulOrderOpt_SRCS) 14 | add_library(HI_MulOrderOpt ${DIR_HI_MulOrderOpt_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_FindFunctions/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_FINDFUNCTIONS_SRCS) 14 | add_library(HI_FindFunctions ${DIR_HI_FINDFUNCTIONS_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_IR2SourceCode/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_IR2SourceCode_SRCS) 14 | add_library(HI_IR2SourceCode ${DIR_HI_IR2SourceCode_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_ReplaceSelectAccess/README: -------------------------------------------------------------------------------- 1 | The HI_ReplaceSelectAccess pass is used to handle those accesses which have more than one access target. 2 | 3 | This pass transform the pattern like: 4 | 5 | %4 = xxxxx (Pointer-Type) 6 | %5 = xxxxx (Pointer-Type) 7 | %6 = select i1 %sw, i32* %4, i32* %5 8 | %7 = load i32* %6 ... 9 | 10 | into: 11 | 12 | %4 = xxxxx (Pointer-Type) 13 | %5 = xxxxx (Pointer-Type) 14 | %44 = load i32* %4 ... 15 | %55 = load i32* %5 ... 16 | %7 = select i1 %sw, i32 %44, i32 %55 17 | -------------------------------------------------------------------------------- /Implementations/HI_StringProcess/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_StringProcess_SRCS) 14 | add_library(HI_StringProcess ${DIR_HI_StringProcess_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_TopLoop2Func/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_TopLoop2Func_SRCS) 14 | add_library(HI_TopLoop2Func ${DIR_HI_TopLoop2Func_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Tests/LLVM_exp13b_HI_ReplaceSelectAccess/README: -------------------------------------------------------------------------------- 1 | The HI_ReplaceSelectAccess pass is used to handle those accesses which have more than one access target. 2 | 3 | This pass transform the pattern like: 4 | 5 | %4 = xxxxx (Pointer-Type) 6 | %5 = xxxxx (Pointer-Type) 7 | %6 = select i1 %sw, i32* %4, i32* %5 8 | %7 = load i32* %6 ... 9 | 10 | into: 11 | 12 | %4 = xxxxx (Pointer-Type) 13 | %5 = xxxxx (Pointer-Type) 14 | %44 = load i32* %4 ... 15 | %55 = load i32* %5 ... 16 | %7 = select i1 %sw, i32 %44, i32 %55 17 | -------------------------------------------------------------------------------- /Implementations/HI_DependenceList/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_DEPENDENCELIST_SRCS) 14 | add_library(HI_DependenceList ${DIR_HI_DEPENDENCELIST_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_VarWidthReduce/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_VarWidthReduce_SRCS) 14 | add_library(HI_VarWidthReduce ${DIR_HI_VarWidthReduce_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_DuplicateInstRm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_DuplicateInstRm_SRCS) 14 | add_library(HI_DuplicateInstRm ${DIR_HI_DuplicateInstRm_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/config_2mm.txt: -------------------------------------------------------------------------------- 1 | clock = 5 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | loop_unroll label=Loop_kernel_2mm_3 factor=8 4 | loop_unroll label=Loop_kernel_2mm_6 factor=4 5 | loop_pipeline label=Loop_kernel_2mm_3 II=3 6 | array_partition variable=A scope=kernel_2mm dim=1 factor=8 7 | array_partition variable=B scope=kernel_2mm dim=2 factor=8 8 | array_partition variable=C scope=kernel_2mm dim=2 factor=4 9 | array_partition variable=tmp scope=kernel_2mm dim=1 factor=4 10 | func_dataflow scope=kernel_2mm enable -------------------------------------------------------------------------------- /HLS_Data_Lib/load/load: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 0 0 -2 3.25 1 --- 2 | -1 -1 -1 4 0 0 0 -2 3.25 1 --- 3 | -1 -1 -1 5 0 0 0 -2 3.25 1 --- 4 | -1 -1 -1 6 0 0 0 -2 3.25 1 --- 5 | -1 -1 -1 7 0 0 0 -2 3.25 1 --- 6 | -1 -1 -1 8 0 0 0 -2 3.25 1 --- 7 | -1 -1 -1 10 0 0 0 -2 3.25 1 --- 8 | -1 -1 -1 12.5 0 0 0 -2 3.25 1 --- 9 | -1 -1 -1 15 0 0 0 -2 3.25 1 --- 10 | -1 -1 -1 16 0 0 0 -2 3.25 1 --- 11 | -1 -1 -1 17.5 0 0 0 -2 3.25 1 --- 12 | -1 -1 -1 20 0 0 0 -2 3.25 1 --- 13 | -1 -1 -1 25 0 0 0 -2 3.25 1 --- 14 | -1 -1 -1 30 0 0 0 -2 3.25 1 --- 15 | -------------------------------------------------------------------------------- /Implementations/HI_APIntSrcAnalysis/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_APIntSrcAnalysis_SRCS) 14 | add_library(HI_APIntSrcAnalysis ${DIR_HI_APIntSrcAnalysis_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl/2dloop2darray_pl.1.cc: -------------------------------------------------------------------------------- 1 | void f(int A[50][100], int C[50], int rc) 2 | { 3 | int N = 100; 4 | int M = 50; 5 | for (int j = 1; j < N; j++) 6 | { 7 | // #pragma clang loop unroll_count(4) 8 | for (int i = 1; i < M + 1; i++) 9 | C[rc] += A[i][j] + 1; 10 | } 11 | // A[i][j] = A[i-1][j] + 1; 12 | // for ( int j = 1; j < N; j++ ) 13 | // for ( int i = 1; i < M; i++ ) 14 | // A[i][j] = A[i-1][j-1] + A[i][j-1] + A[i-1][j] + 1; 15 | return; 16 | } 17 | -------------------------------------------------------------------------------- /Implementations/HI_AggressiveLSR_MUL/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_AggressiveLSR_MUL_SRCS) 14 | add_library(HI_AggressiveLSR_MUL ${DIR_HI_AggressiveLSR_MUL_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_ArrayAccessPattern/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_ArrayAccessPattern_SRCS) 14 | add_library(HI_ArrayAccessPattern ${DIR_HI_ArrayAccessPattern_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_ConstantDivisorOpt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_ConstantDivisorOpt_SRCS) 14 | add_library(HI_ConstantDivisorOpt ${DIR_HI_ConstantDivisorOpt_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_GEP_OffsetCombine/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_GEP_OffsetCombine_SRCS) 14 | add_library(HI_GEP_OffsetCombine ${DIR_HI_GEP_OffsetCombine_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_HLSDuplicateInstRm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_HLSDuplicateInstRm_SRCS) 14 | add_library(HI_HLSDuplicateInstRm ${DIR_HI_HLSDuplicateInstRm_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_brAccess/2dloop2darray_pl_brAccess_a.cc: -------------------------------------------------------------------------------- 1 | void f(int A[50][100]) 2 | { 3 | int N = 100; 4 | int M = 50; 5 | int B[50][100]; 6 | for (int j = 0; j < N; j++) 7 | for (int i = 0; i < M; i++) 8 | B[i][j] = i; 9 | for (int j = 0; j < N; j++) 10 | for (int i = 2; i < M; i++) 11 | { 12 | 13 | A[i][j] = A[i - 1][j] * A[i - 1][j] + A[i - 2][j] + B[i][j]; 14 | // A[i][j] = i%2? A[i-1][j]:i;//+A[i-2][j]; 15 | } 16 | return; 17 | } 18 | -------------------------------------------------------------------------------- /Implementations/HI_ReplaceSelectAccess/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_ReplaceSelectAccess_SRCS) 14 | add_library(HI_ReplaceSelectAccess ${DIR_HI_ReplaceSelectAccess_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_FunctionInstantiation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_FunctionInstantiation_SRCS) 14 | add_library(HI_FunctionInstantiation ${DIR_HI_FunctionInstantiation_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_FunctionInterfaceInfo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_FunctionInterfaceInfo_SRCS) 14 | add_library(HI_FunctionInterfaceInfo ${DIR_HI_FunctionInterfaceInfo_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_RemoveRedundantAccess/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_RemoveRedundantAccess_SRCS) 14 | add_library(HI_RemoveRedundantAccess ${DIR_HI_RemoveRedundantAccess_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_LoopDependenceAnalysis/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_LoopDependenceAnalysis_SRCS) 14 | add_library(HI_LoopDependenceAnalysis ${DIR_HI_LoopDependenceAnalysis_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_LoopInFormationCollect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_LoopInFormationCollect_SRCS) 14 | add_library(HI_LoopInFormationCollect ${DIR_HI_LoopInFormationCollect_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_PragmaTargetExtraction/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_PragmaTargetExtraction_SRCS) 14 | add_library(HI_PragmaTargetExtraction ${DIR_HI_PragmaTargetExtraction_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_SimpleTimingEvaluation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_SimpleTimingEvaluation_SRCS) 14 | add_library(HI_SimpleTimingEvaluation ${DIR_HI_SimpleTimingEvaluation_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_IntstructionMoveBackward/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_IntstructionMoveBackward_SRCS) 14 | add_library(HI_IntstructionMoveBackward ${DIR_HI_IntstructionMoveBackward_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_ArraySensitiveToLoopLevel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_ArraySensitiveToLoopLevel_SRCS) 14 | add_library(HI_ArraySensitiveToLoopLevel ${DIR_HI_ArraySensitiveToLoopLevel_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_IR2SourceCode/README: -------------------------------------------------------------------------------- 1 | The HI_IR2SourceCode pass is used to map the IRs back to the source code 2 | based on the debug information (DEWARF) in the IR code. 3 | 4 | Here, the Pass tries to map each IR code to the specific line in the source code, 5 | map each basic block / loop to specifc range in source code. 6 | 7 | The major challenge is that when a function is inlined, it could be hard to do the mapping, 8 | since the debug information might be mixed up. Based on iterating the search for different 9 | subprogram, it seems the challenge is solved. -------------------------------------------------------------------------------- /Implementations/HI_InstructionLatencyCollect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_InstructionLatencyCollect_SRCS) 14 | add_library(HI_InstructionLatencyCollect ${DIR_HI_InstructionLatencyCollect_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /App/allocation_free/allocation_free.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void f ( int *A) { 5 | int N = 100; 6 | int M = 50; 7 | int *aa = (int*)malloc(sizeof(int)*50); 8 | for ( int j = 0; j < N; j++ ) 9 | { 10 | 11 | aa[j] = j; 12 | for ( int i = 0; i < M; i++ ) 13 | A[i + 8] = A[i] + 1 + aa[i]; 14 | 15 | } 16 | for ( int j = 0; j < N; j++ ) 17 | { 18 | 19 | aa[j] = aa[j] + j; 20 | for ( int i = 0; i < M; i++ ) 21 | A[i + 8] = A[i] + 1 + aa[i]; 22 | 23 | } 24 | free(aa); 25 | } 26 | -------------------------------------------------------------------------------- /Implementations/HI_MuxInsertionArrayPartition/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_MuxInsertionArrayPartition_SRCS) 14 | add_library(HI_MuxInsertionArrayPartition ${DIR_HI_MuxInsertionArrayPartition_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_SeparateConstOffsetFromGEP/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_SeparateConstOffsetFromGEP_SRCS) 14 | add_library(HI_SeparateConstOffsetFromGEP ${DIR_HI_SeparateConstOffsetFromGEP_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Implementations/HI_LoopInFormationCollect/README: -------------------------------------------------------------------------------- 1 | The HI_LoopInFormationCollect pass is mainlyed tested by LLVM_exp3_loop_info_extraction 2 | 3 | HI_LoopInFormationCollect pass try to extract and print out different Loop information by involving LoopInfoWrapperPass, e.g. Trip Count, Depth, LoopReport, Sub-Loops, Blocks in Loops 4 | 5 | Detailed explanation can be found in the source code's comments. 6 | 7 | It shows how to use the Loop class and LoopInfo class for processing and analysis. 8 | It also shows how to use function getAnalysisUsage to define the dependence between Passes 9 | -------------------------------------------------------------------------------- /BuildAllFiles.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | 7 | cd Tests/LLVM_Learner_Libs/build 8 | cmake .. 9 | make -j4 10 | cp `find -name *.a` ../ 11 | 12 | cd $curr_path/Tests 13 | 14 | for folder in `ls` 15 | do 16 | if [ -d ./$folder ] 17 | then 18 | cd $folder 19 | if [ ! -d ./build ] 20 | then 21 | mkdir build 22 | fi 23 | cd build 24 | cmake .. 25 | make -j4 26 | cd ../../ 27 | fi 28 | done 29 | 30 | -------------------------------------------------------------------------------- /Implementations/HI_GEP_OffsetCombine/README: -------------------------------------------------------------------------------- 1 | HI_GEP_OffsetCombine pass is tested by LLVM_expXXXX 2 | 3 | The pass will transform those GEP operation with multiple offset operand into the one with single offset 4 | 5 | This transformation pass include the following step: 6 | 7 | 1. find GEP operation 8 | 2. extract GEP offset operands 9 | 3. do some multiplications and additions to obtain the exact offset, by generating some related instruction 10 | 4. generate the GEP with single offset 11 | 12 | for example: 13 | 14 | int A[50][100] -> int A[5000] 15 | A[i][j] -> A+i*100+j -------------------------------------------------------------------------------- /App/2dloop2darray_pl_indeploop/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | 2 | void f(int A[56][100], int B[56][100], int *C) 3 | { 4 | int N = 100; 5 | int M = 56; 6 | for (int j = 1; j < N; j++) 7 | for (int i = 1; i < M + 1; i++) 8 | { 9 | A[i][j] = A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + 1; 10 | } 11 | 12 | { 13 | for (int j = 1; j < N; j++) 14 | for (int i = 1; i < M + 1; i++) 15 | { 16 | B[i][j] = B[i - 1][j - 1] + B[i][j - 1] + B[i - 1][j] + 1; 17 | } 18 | } 19 | return; 20 | } 21 | -------------------------------------------------------------------------------- /Tests/LLVM_exp9_HI_SepConstGEP/README: -------------------------------------------------------------------------------- 1 | HI_SeparateConstOffsetFromGEP pass is tested by LLVM_exp9_HI_SepConstGEP 2 | 3 | The pass is modified from the SeparateConstOffsetFromGEP pass, but in this pass, we lower the GEP considering that in HLS, arrays are not aligned by bytes (like DDR), but elements. 4 | 5 | Therefore, we do not need to consider how wide (how many bytes/bits) a element is. You can trace the modification by the flag "HI-MODIFICATION" in comments. 6 | 7 | 8 | The test can be run with the following command: 9 | 10 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_NoDirectiveTimingResourceEvaluation_SRCS) 14 | add_library(HI_NoDirectiveTimingResourceEvaluation ${DIR_HI_NoDirectiveTimingResourceEvaluation_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /Tests/LLVM_exp3_loop_info_extraction/README: -------------------------------------------------------------------------------- 1 | The HI_LoopInFormationCollect pass is mainlyed tested by LLVM_exp3_loop_info_extraction 2 | 3 | HI_LoopInFormationCollect pass try to extract and print out different Loop information by involving LoopInfoWrapperPass, e.g. Trip Count, Depth, LoopReport, Sub-Loops, Blocks in Loops 4 | 5 | It shows how to use the Loop class and LoopInfo class for processing and analysis. 6 | It also shows how to use function getAnalysisUsage to define the dependence between Passes 7 | 8 | The test can be run with the following command: 9 | 10 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | aux_source_directory(. DIR_HI_WithDirectiveTimingResourceEvaluation_SRCS) 14 | add_library(HI_WithDirectiveTimingResourceEvaluation ${DIR_HI_WithDirectiveTimingResourceEvaluation_SRCS}) 15 | 16 | 17 | # set(llvm_libs "") 18 | 19 | -------------------------------------------------------------------------------- /TestBuiltFiles.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | curr_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | cd $curr_path 5 | 6 | rm ./TestLog/* 7 | 8 | for buildfile in `find -name build` 9 | do 10 | tmparray=(${buildfile//// }) 11 | echo "Testing file $buildfile/${tmparray[2]} " 12 | cd $buildfile 13 | 14 | if [ ${tmparray[2]} == "LLVM_Learner_Libs" ] 15 | then 16 | cd $curr_path 17 | continue 18 | fi 19 | 20 | for testobj in `find ../../../App/ -name *.c` 21 | do 22 | ./LLVM* $testobj f >> ../../../TestLog/testlog_${tmparray[2]} 23 | done 24 | cd $curr_path 25 | done 26 | -------------------------------------------------------------------------------- /Tests/LLVM_exp13d_HI_LoopUnroll/ConfigParse.h: -------------------------------------------------------------------------------- 1 | #ifndef _Gobal_Confic_Parse 2 | #define _Gobal_Confic_Parse 3 | 4 | #include "HI_StringProcess.h" 5 | #include "HI_SysExec.h" 6 | #include "HI_print.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | extern std::string clock_period_str; 13 | extern std::string HLS_lib_path; 14 | 15 | void Parse_Config(const char *config_file_name, std::map &LoopLabel2UnrollFactor); 16 | 17 | void parseArrayPartition(std::stringstream &iss); 18 | 19 | void parseLoopUnroll(std::stringstream &iss, std::map &LoopLabel2UnrollFactor); 20 | 21 | #endif -------------------------------------------------------------------------------- /Tests/LLVM_exp1_dependence_list/README: -------------------------------------------------------------------------------- 1 | The HI_DependenceList pass is mainlyed tested by LLVM_exp1_dependence_list 2 | 3 | The test can be run with the following command: 4 | 5 | ./LLVM_expXXXXX 6 | 7 | HI_DependenceList pass extracts the dependence between Instructions. Detailed explanation can be found in the source code's comments. 8 | 9 | To implement this pass, different iterators are used in to iterate functions, blocks and instrucions (and their successors(users)) 10 | 11 | e.g. 12 | 13 | 14 | for (User *U : (I)->users()) 15 | 16 | for (auto Succ_it : successors(B)) 17 | 18 | for (Instruction &I: B) 19 | 20 | for (BasicBlock &B : F) -------------------------------------------------------------------------------- /App/2dloop2darray_struct/2dloop2darray_struct.c: -------------------------------------------------------------------------------- 1 | typedef struct 2 | { 3 | int a; 4 | char b; 5 | double c; 6 | } Simple2; 7 | 8 | void f ( Simple2 A[50][100]) { 9 | int N = 100; 10 | int M = 50; 11 | for ( int j = 1; j < N; j++ ) 12 | for ( int i = 1; i < M; i++ ) 13 | A[i][j].a = A[i-1][j-1].b + A[i][j-1].b + A[i-1][j].b + 1; 14 | // for ( int j = 1; j < N; j++ ) 15 | // for ( int i = 1; i < M; i++ ) 16 | // A[i][j].c = A[i-1][j-1].a + A[i][j-1].a + A[i-1][j].a + 1.0/3.0; 17 | for ( int j = 1; j < N; j++ ) 18 | for ( int i = 1; i < M; i++ ) 19 | A[i][j].a = A[i-1][j-1].a + A[i][j-1].a + A[i-1][j].a + 1; 20 | return; 21 | } 22 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_moreAccess/2dloop2darray_pl_moreAccess.cc: -------------------------------------------------------------------------------- 1 | // void f ( int A[50][100]) { 2 | // int N = 100; 3 | // int M = 50; 4 | // for ( int j = 3; j < N; j++ ) 5 | // for ( int i = 1; i < M; i++ ) 6 | // A[i][j] = A[i-1][j-1] + A[i][j-1] + A[i-1][j] + A[i-1][j-2] + A[i-1][j-3] + 1; 7 | // return; 8 | // } 9 | 10 | void f(int A[50][100]) 11 | { 12 | int N = 100; 13 | int M = 50; 14 | for (int i = 1; i < M; i++) 15 | for (int j = 3; j < N; j++) 16 | { 17 | int a = A[i][j - 3]; 18 | int b = A[i - 1][j - 1]; 19 | int c = A[i][j - 2]; 20 | A[i][j] = a + b + c; 21 | } 22 | return; 23 | } -------------------------------------------------------------------------------- /Patch_for_LLVM/README: -------------------------------------------------------------------------------- 1 | The D59105 patch to make Clang compatible to arbitrary precision integer is shared by: 2 | 3 | Erich Keane 4 | erich.keane@intel.com 5 | Compiler Engineer 6 | Intel Corporation 7 | 8 | Thanks to his kindly sharing of this patch, I can walk around many tough work. This 9 | patch is under review: https://reviews.llvm.org/D59105. Therefore, people who also 10 | want to make use of the patch, please obey the related license of LLVM and make a proper 11 | citation/reference. Here, I want to mention that the community of LLVM is so active and 12 | I am convinced that it is thoes active and generous developers who make LLVM be a great 13 | tool. 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_LoopLabeler/LLVM_exp10_HI_LoopLabeler.h: -------------------------------------------------------------------------------- 1 | #include "HI_LoopLabeler.h" 2 | 3 | #include "HI_SysExec.h" 4 | #include "HI_print.h" 5 | #include "clang/AST/AST.h" 6 | #include "clang/AST/ASTConsumer.h" 7 | #include "clang/AST/ASTContext.h" 8 | #include "clang/AST/RecursiveASTVisitor.h" 9 | #include "clang/AST/Type.h" 10 | #include "clang/Driver/Options.h" 11 | #include "clang/Frontend/ASTConsumers.h" 12 | #include "clang/Frontend/CompilerInstance.h" 13 | #include "clang/Frontend/FrontendActions.h" 14 | #include "clang/Tooling/CommonOptionsParser.h" 15 | #include "clang/Tooling/Tooling.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include -------------------------------------------------------------------------------- /Implementations/HI_IntstructionMoveBackward/README: -------------------------------------------------------------------------------- 1 | The HI_IntstructionMoveBackward pass is used to move the instructions from the original block to the dominator block if possible, so in HLS, the parallelism can be improved. 2 | 3 | This pass is based on the DominatorTreeWrapperPass in LLVM. As for the problem, what is dominator tree, please refer to https://en.wikipedia.org/wiki/Dominator_(graph_theory). 4 | 5 | 6 | the rought implementation idea is shown below: 7 | 8 | 1. loop to find the lowest node in Dominator Tree, which should not be preocessed previously, to process 9 | 2. mark the block processed 10 | 3. obtain the instruction independent with those PHI nodes 11 | 4. move those specific instructions to dominator block -------------------------------------------------------------------------------- /Implementations/HI_print/HI_print.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_PRINT 2 | #define _HI_PRINT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Helper method for converting the name of a LLVM type to a string 11 | 12 | void print_cmd(const char *tmp); 13 | void print_info(const char *tmp); 14 | void print_status(const char *tmp); 15 | void print_error(const char *tmp); 16 | void print_warning(const char *tmp); 17 | 18 | void print_cmd(std::string tmp_string); 19 | void print_info(std::string tmp_string); 20 | void print_status(std::string tmp_string); 21 | void print_error(std::string tmp_string); 22 | void print_warning(std::string tmp_string); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_APIntegerAnalysis/LLVM_exp10_HI_APIntegerAnalysis.h: -------------------------------------------------------------------------------- 1 | #include "HI_APIntSrcAnalysis.h" 2 | 3 | #include "HI_SysExec.h" 4 | #include "HI_print.h" 5 | #include "clang/AST/AST.h" 6 | #include "clang/AST/ASTConsumer.h" 7 | #include "clang/AST/ASTContext.h" 8 | #include "clang/AST/RecursiveASTVisitor.h" 9 | #include "clang/AST/Type.h" 10 | #include "clang/Driver/Options.h" 11 | #include "clang/Frontend/ASTConsumers.h" 12 | #include "clang/Frontend/CompilerInstance.h" 13 | #include "clang/Frontend/FrontendActions.h" 14 | #include "clang/Tooling/CommonOptionsParser.h" 15 | #include "clang/Tooling/Tooling.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include -------------------------------------------------------------------------------- /Tests/LLVM_exp11_HI_clang_import_test/LLVM_exp11_HI_clang_import_test.h: -------------------------------------------------------------------------------- 1 | #include "HI_APIntSrcAnalysis.h" 2 | 3 | #include "HI_SysExec.h" 4 | #include "HI_print.h" 5 | #include "clang/AST/AST.h" 6 | #include "clang/AST/ASTConsumer.h" 7 | #include "clang/AST/ASTContext.h" 8 | #include "clang/AST/RecursiveASTVisitor.h" 9 | #include "clang/AST/Type.h" 10 | #include "clang/Driver/Options.h" 11 | #include "clang/Frontend/ASTConsumers.h" 12 | #include "clang/Frontend/CompilerInstance.h" 13 | #include "clang/Frontend/FrontendActions.h" 14 | #include "clang/Tooling/CommonOptionsParser.h" 15 | #include "clang/Tooling/Tooling.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include -------------------------------------------------------------------------------- /Implementations/HI_MulOrderOpt/README: -------------------------------------------------------------------------------- 1 | HI_MulOrderOpt will try to transform the series of multiplication (e.g. A*A*A*A*A*B*B) into a parallelism-friendly IR code sequence 2 | 3 | possible computation sequence: 4 | stage0: a0 = A * A 5 | stage1: a1 = A * a0; 6 | stage2: a2 = A * a1; 7 | stage3: a3 = A * a2; 8 | stage4: o0 = a3 * B; 9 | stage5: o1 = o0 * B 10 | 11 | which is not parallelized and cannot take the advantage of FPGA 12 | 13 | Then re-genenrate the multiplication to reduce computation dependence and maximize parallelism 14 | 15 | stage0: a0 = A * A a1 = A * A b = B * B 16 | stage1: a2 = a0 * a1; c0 = A * b 17 | stage2: c1 = a2 * c0 18 | 19 | (a0 and a1 is duplicate, which will be removed by latter passes.) -------------------------------------------------------------------------------- /Tests/LLVM_exp13a_HI_FunctionInstantiation/README: -------------------------------------------------------------------------------- 1 | The HI_FunctionInitiation pass is used to initiate the called sub-functions. 2 | 3 | consider the following code: 4 | 5 | void f0(int A[100],int B[100]) 6 | { 7 | #pragma HLS array_partition variable=A factor=2 8 | #pragma HLS array_partition variable=B factor=4 9 | f1(A); 10 | f1(B); 11 | } 12 | 13 | Althouh the arrays A and B are both processed by functon f1(), but the function with 14 | different input structures will end up with different latency. This will confuse the 15 | process of timing evaluation. To overcome this issue, we can just generate two separate 16 | function f1_0() and f1_1(), for arrays A and B respectively, and forward the pragma information 17 | to the subfunctions. -------------------------------------------------------------------------------- /App/2dloop2darray_pl_subf2/2dloop2darray_pl_subf2.cc: -------------------------------------------------------------------------------- 1 | void subf2(int A[50][100], int B[50][100], int i, int j) 2 | { 3 | A[i][j] = A[i - 1][j] * A[i - 1][j] + A[i - 2][j] + B[i][j]; 4 | } 5 | 6 | void f(int A[50][100]) 7 | { 8 | int N = 100; 9 | int M = 50; 10 | int B[50][100]; 11 | for (int j = 0; j < N; j++) 12 | for (int i = 0; i < M; i++) 13 | B[i][j] = i; 14 | for (int j = 1; j < N; j++) 15 | for (int i = 1; i < M; i++) 16 | { 17 | 18 | A[i][j] = A[i - 1][j] * A[i - 1][j] + A[i - 2][j] + B[i][j]; 19 | } 20 | for (int j = 1; j < N; j++) 21 | for (int i = 1; i < M; i++) 22 | { 23 | subf2(A, B, i, j); 24 | } 25 | return; 26 | } 27 | -------------------------------------------------------------------------------- /App/2dloop2darray_win_filter/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | #include "hls_video.h" 2 | void f(int A[50][100]) 3 | { 4 | hls::Window<500, 500, int> window; 5 | int B[50][100]; 6 | int N = 100; 7 | int M = 50; 8 | for (int j = 1; j < N; j++) 9 | for (int i = 1; i < M; i++) 10 | { 11 | window.insert_pixel(i + j, i, j); 12 | B[i][j] = 1 + i + j; 13 | A[i][j] = A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + 1; 14 | } 15 | for (int j = 1; j < N; j++) 16 | for (int i = 1; i < M; i++) 17 | { 18 | window.shift_left(); 19 | 20 | A[i][j] = A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + window.getval(i, j) + B[i][j]; 21 | } 22 | return; 23 | } 24 | -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/ciinfor: -------------------------------------------------------------------------------- 1 | clang version 9.0.0 (a7c2c8ff4eb589c59ad9ff6e80fa50edf5b97a46) 2 | Target: x86_64-unknown-linux-gnu 3 | Thread model: posix 4 | InstalledDir: /usr/local/bin 5 | Found candidate GCC installation: /usr/lib/gcc/i686-linux-gnu/8 6 | Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/7 7 | Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/7.4.0 8 | Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8 9 | Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/7.4.0 10 | Candidate multilib: .;@m64 11 | Selected multilib: .;@m64 12 | clang-9: warning: treating 'c' input as 'c++' when in C++ mode, this behavior is deprecated [-Wdeprecated] 13 | clang-9: error: no such file or directory: '../testcase/test.c' 14 | -------------------------------------------------------------------------------- /App/2dloop2darray_pl_apint/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | #include "ap_int.h" 2 | 3 | void f(int A[50][100], ap_int<271> *result0, ap_int<13> *result1, ap_int<13> *result2, 4 | ap_int<271> *result3, ap_int<13> *result4) 5 | { 6 | int N = 100; 7 | int M = 50; 8 | ap_int<271> a, b, c, tmp0; 9 | ap_int<13> a1, b1, c1, tmp1; 10 | ap_int<13> BB[5][5]; 11 | a = 1; 12 | b = M; 13 | 14 | b = a + b; 15 | 16 | c = 4; 17 | a = b + c; 18 | 19 | a1 = 1; 20 | b1 = M; 21 | c1 = 4; 22 | a1 = b1 + c1; 23 | 24 | *result0 = a; 25 | *result1 = a1; 26 | 27 | BB[1][1] = 2; 28 | BB[2][2] = BB[1][1] + 4; 29 | 30 | *result2 = a + b1; 31 | *result3 = a1 + b; 32 | 33 | *result4 = BB[2][2]; 34 | 35 | int pp = M + N; 36 | } -------------------------------------------------------------------------------- /HLS_Data_Lib/fcmp/fcmp: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 75 248 3 2.63 1 FCmp 2 | -1 -1 -1 3 0 75 248 3 2.63 1 FCmp 3 | -1 -1 -1 4 0 75 248 3 2.63 1 FCmp 4 | -1 -1 -1 4 0 75 248 3 2.63 1 FCmp 5 | -1 -1 -1 5 0 75 248 3 2.63 1 FCmp 6 | -1 -1 -1 5 0 75 248 3 2.63 1 FCmp 7 | -1 -1 -1 6 0 66 239 2 4.41 1 FCmp 8 | -1 -1 -1 6 0 66 239 2 4.41 1 FCmp 9 | -1 -1 -1 7 0 66 239 1 5.29 1 FCmp 10 | -1 -1 -1 7 0 66 239 1 5.29 1 FCmp 11 | -1 -1 -1 8 0 66 239 0 6.61 1 FCmp 12 | -1 -1 -1 8 0 66 239 0 6.61 1 FCmp 13 | -1 -1 -1 9 0 66 239 0 6.61 1 FCmp 14 | -1 -1 -1 9 0 66 239 0 6.61 1 FCmp 15 | -1 -1 -1 10 0 66 239 0 6.61 1 FCmp 16 | -1 -1 -1 10 0 66 239 0 6.61 1 FCmp 17 | -1 -1 -1 12.5 0 66 239 0 6.61 1 FCmp 18 | -1 -1 -1 12.5 0 66 239 0 6.61 1 FCmp 19 | -1 -1 -1 15 0 66 239 0 6.61 1 FCmp 20 | -1 -1 -1 15 0 66 239 0 6.61 1 FCmp 21 | -------------------------------------------------------------------------------- /Implementations/HI_DependenceList/README: -------------------------------------------------------------------------------- 1 | The HI_DependenceList pass is mainlyed tested by LLVM_exp1_dependence_list 2 | 3 | HI_DependenceList pass extracts the dependence between Instructions. Detailed explanation can be found in the source code's comments. 4 | 5 | To implement this pass, different iterators are used in to iterate functions, blocks and instrucions (and their successors(users)) 6 | 7 | Please note that the pass is implemented with doInitialization() and doFinalization() functions, since sometimes somehow, a pass could be called more than one times, which makes doInitialization and finalization important. 8 | 9 | e.g. 10 | 11 | 12 | for (User *U : (I)->users()) 13 | 14 | for (auto Succ_it : successors(B)) 15 | 16 | for (Instruction &I: B) 17 | 18 | for (BasicBlock &B : F) -------------------------------------------------------------------------------- /HLS_Data_Lib/fcmp/overview_float: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 75 248 3 2.63 1 FCmp 2 | -1 -1 -1 3 0 75 248 3 2.63 1 FCmp 3 | -1 -1 -1 4 0 75 248 3 2.63 1 FCmp 4 | -1 -1 -1 4 0 75 248 3 2.63 1 FCmp 5 | -1 -1 -1 5 0 75 248 3 2.63 1 FCmp 6 | -1 -1 -1 5 0 75 248 3 2.63 1 FCmp 7 | -1 -1 -1 6 0 66 239 2 4.41 1 FCmp 8 | -1 -1 -1 6 0 66 239 2 4.41 1 FCmp 9 | -1 -1 -1 7 0 66 239 1 5.29 1 FCmp 10 | -1 -1 -1 7 0 66 239 1 5.29 1 FCmp 11 | -1 -1 -1 8 0 66 239 0 6.61 1 FCmp 12 | -1 -1 -1 8 0 66 239 0 6.61 1 FCmp 13 | -1 -1 -1 9 0 66 239 0 6.61 1 FCmp 14 | -1 -1 -1 9 0 66 239 0 6.61 1 FCmp 15 | -1 -1 -1 10 0 66 239 0 6.61 1 FCmp 16 | -1 -1 -1 10 0 66 239 0 6.61 1 FCmp 17 | -1 -1 -1 12.5 0 66 239 0 6.61 1 FCmp 18 | -1 -1 -1 12.5 0 66 239 0 6.61 1 FCmp 19 | -1 -1 -1 15 0 66 239 0 6.61 1 FCmp 20 | -1 -1 -1 15 0 66 239 0 6.61 1 FCmp 21 | -------------------------------------------------------------------------------- /Implementations/HI_PragmaTargetExtraction/ClockInfo.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | std::string clockStrs[100] = { 5 | "3", // 1 6 | "4", // 2 7 | "5", // 3 8 | "6", // 4 9 | "7", // 5 10 | "8", // 6 11 | "9", // 7 12 | "10", // 8 13 | "12.5", // 9 14 | "15", // 10 15 | "16", // 11 16 | "17.5", // 12 17 | "20", // 13 18 | "25", // 14 19 | "30" // 15 20 | }; 21 | 22 | const float clockPeriod[100] = { 23 | 3, // 1 24 | 4, // 2 25 | 5, // 3 26 | 6, // 4 27 | 7, // 5 28 | 8, // 6 29 | 9, // 7 30 | 10, // 8 31 | 12.5, // 9 32 | 15, // 10 33 | 16, // 11 34 | 17.5, // 12 35 | 20, // 13 36 | 25, // 14 37 | 30 // 15 38 | }; -------------------------------------------------------------------------------- /Tests/LLVM_expAPINT_test/arb_prec_array.cpp: -------------------------------------------------------------------------------- 1 | // RUN: %clang -cc1 %s -triple x86_64-linux-pc -emit-llvm -o - | FileCheck %s -check-prefix LIN 2 | // RUN: %clang -cc1 %s -triple x86_64-windows-pc -emit-llvm -o - | FileCheck %s -check-prefix WIN 3 | 4 | // CHECK: @[[VAR:[a-zA-Z0-9_]+]] = internal constant { i65 } { i65 17 }, align 8 5 | // CHECK: @[[ARRAY:[a-zA-Za-zA-Z0-9_]+]] = internal constant [3 x i65] [i65 1, i65 2, i65 3], align 16 6 | 7 | // template 8 | // using ap_int = __attribute__((__ap_int(Bits))) unsigned; 9 | 10 | template 11 | using ap_int = __attribute__((__ap_int(Bits))) int; 12 | 13 | void f(ap_int<13> a[15][15]) 14 | { 15 | int i,j; 16 | for (i=0;i<15;i++) 17 | for (j=0;j<15;j++) 18 | { 19 | a[i][j]++; 20 | } 21 | } -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/config.txt: -------------------------------------------------------------------------------- 1 | clock = 20 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | #array_partition variable=A dim=2 factor=4 scope=f block 4 | #loop_unroll label=Loop_f_2 factor=4 5 | #loop_unroll label=Loop_subf_2 factor=4 6 | #loop_pipeline label=Loop_f_2 II=1 7 | #loop_pipeline label=Loop_kernel_2mm_6 II=1 8 | #loop_unroll label=Loop_kernel_2mm_6 factor=8 9 | #array_partition variable=tmp dim=1 factor=8 scope=kernel_2mm cyclic 10 | #array_partition variable=D dim=1 factor=8 scope=kernel_2mm cyclic 11 | #array_partition variable=C dim=2 factor=8 scope=kernel_2mm cyclic 12 | #loop_pipeline label=Loop_kernel_2mm_3 II=1 13 | loop_pipeline label=Loop_kernel_fdtd_2d_2 II=2 14 | loop_pipeline label=Loop_kernel_fdtd_2d_4 II=2 15 | loop_pipeline label=Loop_kernel_fdtd_2d_6 II=2 -------------------------------------------------------------------------------- /Implementations/HI_ArraySensitiveToLoopLevel/ClockInfo.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | std::string clockStrs[100] = { 5 | "3", // 1 6 | "4", // 2 7 | "5", // 3 8 | "6", // 4 9 | "7", // 5 10 | "8", // 6 11 | "9", // 7 12 | "10", // 8 13 | "12.5", // 9 14 | "15", // 10 15 | "16", // 11 16 | "17.5", // 12 17 | "20", // 13 18 | "25", // 14 19 | "30" // 15 20 | }; 21 | 22 | const float clockPeriod[100] = { 23 | 3, // 1 24 | 4, // 2 25 | 5, // 3 26 | 6, // 4 27 | 7, // 5 28 | 8, // 6 29 | 9, // 7 30 | 10, // 8 31 | 12.5, // 9 32 | 15, // 10 33 | 16, // 11 34 | 17.5, // 12 35 | 20, // 13 36 | 25, // 14 37 | 30 // 15 38 | }; -------------------------------------------------------------------------------- /Implementations/HI_MuxInsertionArrayPartition/ClockInfo.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | std::string clockStrs[100] = { 5 | "3", // 1 6 | "4", // 2 7 | "5", // 3 8 | "6", // 4 9 | "7", // 5 10 | "8", // 6 11 | "9", // 7 12 | "10", // 8 13 | "12.5", // 9 14 | "15", // 10 15 | "16", // 11 16 | "17.5", // 12 17 | "20", // 13 18 | "25", // 14 19 | "30" // 15 20 | }; 21 | 22 | const float clockPeriod[100] = { 23 | 3, // 1 24 | 4, // 2 25 | 5, // 3 26 | 6, // 4 27 | 7, // 5 28 | 8, // 6 29 | 9, // 7 30 | 10, // 8 31 | 12.5, // 9 32 | 15, // 10 33 | 16, // 11 34 | 17.5, // 12 35 | 20, // 13 36 | 25, // 14 37 | 30 // 15 38 | }; -------------------------------------------------------------------------------- /Implementations/HI_FunctionInstantiation/README: -------------------------------------------------------------------------------- 1 | The HI_FunctionInitiation pass is used to initiate the called sub-functions. 2 | 3 | consider the following code: 4 | 5 | void f0(int A[100],int B[100]) 6 | { 7 | #pragma HLS array_partition variable=A factor=2 8 | #pragma HLS array_partition variable=B factor=4 9 | f1(A); 10 | f1(B); 11 | } 12 | 13 | Althouh the arrays A and B are both processed by functon f1(), but the function with 14 | different input structures will end up with different latency. This will confuse the 15 | process of timing evaluation. To overcome this issue, we can just generate two separate 16 | function f1_0() and f1_1(), for arrays A and B respectively, and forward the pragma information 17 | to the subfunctions. 18 | 19 | BE CAREFUL OF THOSE "llvm.xxx" functions!!!!!!!!!!!!! -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/ClockInfo.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | std::string clockStrs[100] = { 5 | "3", // 1 6 | "4", // 2 7 | "5", // 3 8 | "6", // 4 9 | "7", // 5 10 | "8", // 6 11 | "9", // 7 12 | "10", // 8 13 | "12.5", // 9 14 | "15", // 10 15 | "16", // 11 16 | "17.5", // 12 17 | "20", // 13 18 | "25", // 14 19 | "30" // 15 20 | }; 21 | 22 | const float clockPeriod[100] = { 23 | 3, // 1 24 | 4, // 2 25 | 5, // 3 26 | 6, // 4 27 | 7, // 5 28 | 8, // 6 29 | 9, // 7 30 | 10, // 8 31 | 12.5, // 9 32 | 15, // 10 33 | 16, // 11 34 | 17.5, // 12 35 | 20, // 13 36 | 25, // 14 37 | 30 // 15 38 | }; -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/ClockInfo.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | std::string clockStrs[100] = { 5 | "3", // 1 6 | "4", // 2 7 | "5", // 3 8 | "6", // 4 9 | "7", // 5 10 | "8", // 6 11 | "9", // 7 12 | "10", // 8 13 | "12.5", // 9 14 | "15", // 10 15 | "16", // 11 16 | "17.5", // 12 17 | "20", // 13 18 | "25", // 14 19 | "30" // 15 20 | }; 21 | 22 | const float clockPeriod[100] = { 23 | 3, // 1 24 | 4, // 2 25 | 5, // 3 26 | 6, // 4 27 | 7, // 5 28 | 8, // 6 29 | 9, // 7 30 | 10, // 8 31 | 12.5, // 9 32 | 15, // 10 33 | 16, // 11 34 | 17.5, // 12 35 | 20, // 13 36 | 25, // 14 37 | 30 // 15 38 | }; -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/HI_InstructionFiles.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_InstructionFiles_HI_NoDirectiveTimingResourceEvaluation 2 | #define _HI_InstructionFiles_HI_NoDirectiveTimingResourceEvaluation 3 | 4 | #include 5 | 6 | // WARNING!!!!!!! 7 | // WARNING!!!!!!! 8 | // when you modify this file, remember that there is another file you need to modify for 9 | // HI_WithDirectiveTimingResourceEvaluation 10 | 11 | // WARNING!!!!!!! 12 | // WARNING!!!!!!! 13 | 14 | const int instructionInfoNum = 35; // should refer to Instruction_list.h 15 | 16 | // The instruction opcodes which are included in the path specified 17 | extern std::string instructionNames[100]; 18 | 19 | // Is the instruction opcode included in the path specified or not 20 | extern bool instructionHasMappingFile[100]; 21 | 22 | #endif -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/HI_InstructionFiles.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_InstructionFiles_HI_WithDirectiveTimingResourceEvaluation 2 | #define _HI_InstructionFiles_HI_WithDirectiveTimingResourceEvaluation 3 | 4 | #include 5 | 6 | // WARNING!!!!!!! 7 | // WARNING!!!!!!! 8 | // when you modify this file, remember that there is another file you need to modify for 9 | // HI_NoDirectiveTimingResourceEvaluation 10 | 11 | // WARNING!!!!!!! 12 | // WARNING!!!!!!! 13 | 14 | const int instructionInfoNum = 35; // should refer to Instruction_list.h 15 | 16 | // The instruction opcodes which are included in the path specified 17 | extern std::string instructionNames[100]; 18 | 19 | // Is the instruction opcode included in the path specified or not 20 | extern bool instructionHasMappingFile[100]; 21 | 22 | #endif -------------------------------------------------------------------------------- /App/2mm/2mm_tiny copy.cc: -------------------------------------------------------------------------------- 1 | 2 | #define NI 16 3 | #define NJ 18 4 | #define NK 22 5 | #define NL 24 6 | 7 | void kernel_2mm(int ni, int nj, int nk, int nl, 8 | int alpha, 9 | int beta, 10 | int tmp[NI][NJ], 11 | int A[NI][NK], 12 | int B[NK][NJ], 13 | int C[NJ][NL], 14 | int D_input[NI][NL], 15 | int D_output[NI][NL]) 16 | { 17 | int i,j,k; 18 | 19 | for (i = 0; i < NI; i++) 20 | for (j = 0; j < NJ; j++) 21 | { 22 | int res = 0; 23 | for (k = 0; k < NK; ++k) 24 | res += alpha * A[i][k] * B[k][j]; 25 | tmp[i][j] = res; 26 | } 27 | 28 | for (i = 0; i < NI; i++) 29 | for (j = 0; j < NL; j++) 30 | { 31 | int sum = D_input[i][j] * beta; 32 | for (k = 0; k < NJ; ++k) 33 | sum+= tmp[i][k] * C[k][j]; 34 | D_output[i][j] = sum; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /App/2mm/2mm.cc: -------------------------------------------------------------------------------- 1 | 2 | #define NI 16 3 | #define NJ 18 4 | #define NK 22 5 | #define NL 24 6 | 7 | void kernel_2mm(int ni, int nj, int nk, int nl, int alpha, int beta, int tmp[NI][NJ], int A[NI][NK], 8 | int B[NK][NJ], int C[NJ][NL], int D_input[NI][NL], int D_output[NI][NL]) 9 | { 10 | int i, j, k; 11 | 12 | for (i = 0; i < NI; i++) 13 | for (j = 0; j < NJ; j++) 14 | { 15 | tmp[i][j] = 0; 16 | for (k = 0; k < NK; ++k) 17 | tmp[i][j] += alpha * A[i][k] * B[k][j]; 18 | } 19 | 20 | for (i = 0; i < NI; i++) 21 | for (j = 0; j < NL; j++) 22 | { 23 | int sum = D_input[i][j] * beta; 24 | for (k = 0; k < NJ; ++k) 25 | sum += tmp[i][k] * C[k][j]; 26 | D_output[i][j] = sum; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Tests/LLVM_exp14_HI_WithDirectiveTimingResourceEvaluation/ConfigParse.h: -------------------------------------------------------------------------------- 1 | #ifndef _Gobal_Confic_Parse 2 | #define _Gobal_Confic_Parse 3 | 4 | #include "HI_StringProcess.h" 5 | #include "HI_SysExec.h" 6 | #include "HI_print.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | extern std::string clock_period_str; 13 | extern std::string HLS_lib_path; 14 | 15 | void Parse_Config(const char *config_file_name, std::map &LoopLabel2UnrollFactor, 16 | std::map &LoopLabel2II); 17 | 18 | void parseArrayPartition(std::stringstream &iss); 19 | 20 | void parseLoopUnroll(std::stringstream &iss, std::map &LoopLabel2UnrollFactor); 21 | 22 | void parseLoopPipeline(std::stringstream &iss, std::map &LoopLabel2II); 23 | 24 | #endif -------------------------------------------------------------------------------- /App/2dloop2darray_pl_subf/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | __attribute__((always_inline)) int subFunc1(int i, int j) 2 | { 3 | int ii = i + 1; 4 | int jj = j * 2; 5 | for (int k = 0; k < i; k++) 6 | if (k % 2) 7 | ii += k; 8 | else 9 | jj += k; 10 | 11 | return ii + jj; 12 | } 13 | 14 | int subFunc2(int i, int j) 15 | { 16 | int ii = i + 1; 17 | int jj = j * 2; 18 | for (int k = 0; k < i; k++) 19 | if (k % 2) 20 | ii += k; 21 | 22 | return ii + jj; 23 | } 24 | 25 | void f(int A[50][100]) 26 | { 27 | int N = 100; 28 | int M = 50; 29 | for (int j = 1; j < N; j++) 30 | for (int i = 1; i < M; i++) 31 | { 32 | A[i][j] = A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + subFunc1(i, j) + subFunc2(i, j); 33 | } 34 | 35 | return; 36 | } 37 | -------------------------------------------------------------------------------- /App/jacobi/jacobi-2d.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #define MINI_DATASET 4 | # ifdef MINI_DATASET 5 | # define TSTEPS 20 6 | # define N 30 7 | # endif 8 | 9 | #define TSTEPS 20 10 | #define DATA_TYPE float 11 | /* Main computational kernel. The whole function will be timed, 12 | including the call and return. */ 13 | 14 | void kernel_jacobi_2d(int tsteps, 15 | int n, 16 | DATA_TYPE A[N][N], 17 | DATA_TYPE B[N][N]) 18 | { 19 | int t, i, j; 20 | 21 | for (i = 1; i < N - 1; i++) 22 | for (j = 1; j < N - 1; j++) 23 | B[i][j] = (DATA_TYPE)(0.2) * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]); 24 | for (i = 1; i < N - 1; i++) 25 | for (j = 1; j < N - 1; j++) 26 | A[i][j] = (DATA_TYPE)(0.2) * (B[i][j] + B[i][j-1] + B[i][1+j] + B[1+i][j] + B[i-1][j]); 27 | 28 | } 29 | 30 | -------------------------------------------------------------------------------- /App/2mm/2mm_tiny.cc: -------------------------------------------------------------------------------- 1 | 2 | #define NI 16 3 | #define NJ 18 4 | #define NK 22 5 | #define NL 24 6 | 7 | void kernel_2mm(int ni, int nj, int nk, int nl, int alpha, int beta, int tmp[NI][NJ], int A[NI][NK], 8 | int B[NK][NJ], int C[NJ][NL], int D_input[NI][NL], int D_output[NI][NL]) 9 | { 10 | int i, j, k; 11 | 12 | for (i = 0; i < NI; i++) 13 | for (j = 0; j < NJ; j++) 14 | { 15 | int res = 0; 16 | for (k = 0; k < NK; ++k) 17 | res += alpha * A[i][k] * B[k][j]; 18 | tmp[i][j] = res; 19 | } 20 | 21 | for (i = 0; i < NI; i++) 22 | for (j = 0; j < NL; j++) 23 | { 24 | int sum = D_input[i][j] * beta; 25 | for (k = 0; k < NJ; ++k) 26 | sum += tmp[i][k] * C[k][j]; 27 | D_output[i][j] = sum; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /HLS_Data_Lib/README: -------------------------------------------------------------------------------- 1 | Here, the template of the information of instruction is explained. 2 | 3 | Take the instruction "mul" as example: 4 | 5 | 34 58 48 7 9 328 82 4 5.6 1 MulnS 6 | 7 | These parameters are: 8 | the bitwidth of input operandA (-1 means unrelated to bitwidth, e.g. double-floating point) 9 | the bitwidth of input operandB 10 | the bitwidth of output operandC 11 | the period of clock 12 | the DSP cost of the instruction at the specific clock 13 | the FF cost of the instruction at the specific clock 14 | the LUT cost of the instruction at the specific clock 15 | the latency (X cycles) of the instruction at the specific clock 16 | the extra delay (Y ns) of the instruction at the specific clock (The total timing cost of the instruction = X cycles + Y ns) 17 | the number of initial interval cycles 18 | the ip core used for the instruction 19 | -------------------------------------------------------------------------------- /Tests/LLVM_exp6_GEP_Transformation/README: -------------------------------------------------------------------------------- 1 | LLVM_exp6_GEP_Transformation 2 | 3 | 4 | Since in HLS, the GEP instruction is actually done by multiplication and additions, which are used to get the 1-D offset of an element in a multiple-dimensional array. 5 | As I found, the SeparateConstOffsetFromGEP pass, with the LowerGEP flag set true, can actually achieve such function. 6 | However, the major problem is that the pass handle the memeory space by byte but in FPGA the memory space is much more flexible. 7 | Therefore, we need to improve the pass to adpapt to the HLS design. 8 | 9 | Note that: 10 | 11 | addRequiredTranstive means that one pass relies on the other one in its lifetime. It is a very strict constraint and we should use it carefully. 12 | 13 | 14 | The test can be run with the following command: 15 | 16 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /HLS_Data_Lib/dcmp/dcmp: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 272 612 4 3.226 1 --- 2 | -1 -1 -1 3 0 272 612 4 3.226 1 --- 3 | -1 -1 -1 4 0 272 612 4 3.226 1 --- 4 | -1 -1 -1 4 0 272 612 4 3.226 1 --- 5 | -1 -1 -1 5 0 271 612 3 4.168 1 --- 6 | -1 -1 -1 5 0 271 612 3 4.168 1 --- 7 | -1 -1 -1 6 0 263 603 3 4.582 1 --- 8 | -1 -1 -1 6 0 263 603 3 4.582 1 --- 9 | -1 -1 -1 7 0 263 603 2 5.498 1 --- 10 | -1 -1 -1 7 0 263 603 2 5.498 1 --- 11 | -1 -1 -1 8 0 135 585 1 6.873 1 --- 12 | -1 -1 -1 8 0 135 585 1 6.873 1 --- 13 | -1 -1 -1 9 0 130 585 0 7.815 1 --- 14 | -1 -1 -1 9 0 130 585 0 7.815 1 --- 15 | -1 -1 -1 10 0 130 585 0 7.815 1 --- 16 | -1 -1 -1 10 0 130 585 0 7.815 1 --- 17 | -1 -1 -1 12.5 0 130 585 0 7.815 1 --- 18 | -1 -1 -1 12.5 0 130 585 0 7.815 1 --- 19 | -1 -1 -1 15 0 130 585 0 7.815 1 --- 20 | -1 -1 -1 15 0 130 585 0 7.815 1 --- 21 | -1 -1 -1 16 0 130 585 0 7.815 1 --- 22 | -1 -1 -1 16 0 130 585 0 7.815 1 --- 23 | -------------------------------------------------------------------------------- /Implementations/HI_Polly_Info/README: -------------------------------------------------------------------------------- 1 | The Polly_Info pass is mainlyed tested by LLVM_exp4_polly_info 2 | 3 | Polly_Info itself do nothing but it shows how to involve the Polly-series Passes in our own pass. Please note that the Passes, which Polly_Info and PollyhedralInfo depend on, should be added in the front in the application (PM.add(XXX)) 4 | 5 | Detailed explanation can be found in the source code's comments. 6 | 7 | Please note that the compilation relies on some parameters and arguments and variable from PollyConfig.cmake, 8 | Please refer to the change in CMakeLists.txt, compared to previous experiment 9 | You can just find the "Polly" in the CMakeLists.txt and notice the modification 10 | 11 | In the source code, you need to add all the dependence passes into passmanager otherwise you might get runtime error such as "Pass 'Unnamed pass: implement Pass::getPassName()' is not initialized" -------------------------------------------------------------------------------- /Tests/LLVM_expAPINT_test/README: -------------------------------------------------------------------------------- 1 | a test for a developed patch for ap_int 2 | 3 | The patch to make Clang compatible to arbitrary precision integer is shared by: 4 | 5 | Erich Keane 6 | erich.keane@intel.com 7 | Compiler Engineer 8 | Intel Corporation 9 | 10 | Thanks to his kindly sharing of this patch, I can walk around many tough work. This 11 | patch is under review: https://reviews.llvm.org/D59105. Therefore, people who also 12 | want to make use of the patch, please obey the related license of LLVM and make a proper 13 | citation/reference. Here, I want to mention that the community of LLVM is so active and 14 | I am convinced that it is thoes active and generous developers who make LLVM be a great 15 | tool. 16 | 17 | 18 | 19 | 20 | 21 | What is metadata in LLVM: 22 | 23 | http://jiten-thakkar.com/posts/how-to-read-and-write-metadata-in-llvm -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/config_conv.txt: -------------------------------------------------------------------------------- 1 | clock = 5 2 | HLS_lib_path = ../../../HLS_Data_Lib/ 3 | loop_unroll label=Loop_convs_4 factor=5 4 | loop_unroll label=Loop_convs_12 factor=3 5 | loop_unroll label=Loop_convs_8 factor=5 6 | loop_unroll label=Loop_convs_3 factor=2 7 | loop_pipeline label=Loop_convs_11 II=4 8 | loop_pipeline label=Loop_convs_7 II=7 9 | loop_pipeline label=Loop_convs_3 II=14 10 | array_partition variable=filter0 scope=convs dim=1 factor=5 cyclic 11 | array_partition variable=filter2 scope=convs dim=1 factor=3 cyclic 12 | array_partition variable=filter1 scope=convs dim=1 factor=5 cyclic 13 | array_partition variable=filter0 scope=convs dim=2 factor=2 cyclic 14 | #array_port variable=input scope=convs port_num=2 15 | func_dataflow scope=convs enable 16 | local_array scope=convs variable=filter0 enable 17 | local_array scope=convs variable=filter1 enable 18 | local_array scope=convs variable=filter2 enable -------------------------------------------------------------------------------- /Tests/LLVM_exp4_polly_info/README: -------------------------------------------------------------------------------- 1 | The Polly_Info pass is mainlyed tested by LLVM_exp4_polly_info 2 | 3 | Polly_Info itself do nothing but it shows how to involve the Polly-series Passes in our own pass. Please note that the Passes, which Polly_Info and PollyhedralInfo depend on, should be added in the front in the application (PM.add(XXX)) 4 | 5 | Please note that the compilation relies on some parameters and arguments and variable from PollyConfig.cmake, 6 | Please refer to the change in CMakeLists.txt, compared to previous experiment 7 | You can just find the "Polly" in the CMakeLists.txt and notice the modification 8 | 9 | In the source code, you need to add all the dependence passes into passmanager otherwise you might get runtime error such as "Pass 'Unnamed pass: implement Pass::getPassName()' is not initialized" 10 | 11 | The test can be run with the following command: 12 | 13 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /Tests/LLVM_exp12a_HI_InstructionMoveBackward/README: -------------------------------------------------------------------------------- 1 | The HI_IntstructionMoveBackward pass is tested by LLVM_exp12a_HI_InstructionMoveBackward. 2 | 3 | The pass is used to move the instructions from the original block to the dominator block if possible, so in HLS, the parallelism can be improved. 4 | 5 | This pass is based on the DominatorTreeWrapperPass in LLVM. As for the problem, what is dominator tree, please refer to https://en.wikipedia.org/wiki/Dominator_(graph_theory). 6 | 7 | The test can be run with the following command: 8 | 9 | ./LLVM_expxxxxx 10 | 11 | the rought implementation idea is shown below: 12 | 13 | 1. loop to find the lowest node in Dominator Tree, which should not be preocessed previously, to process 14 | 2. mark the block processed 15 | 3. obtain the instruction independent with those PHI nodes 16 | 4. move those specific instructions to dominator block -------------------------------------------------------------------------------- /Implementations/HI_SysExec/HI_SysExec.cc: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "HI_print.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | bool sysexec(const char *cmd) 12 | { 13 | std::array buffer; 14 | std::string result; 15 | std::unique_ptr pipe(popen(cmd, "r"), pclose); 16 | if (!pipe) 17 | { 18 | throw std::runtime_error("popen() failed!"); 19 | } 20 | while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) 21 | { 22 | result += buffer.data(); 23 | } 24 | std::string tmp(result); 25 | if (tmp.find("error") != -1 || tmp.find("ERROR") != -1 || tmp.find("Error") != -1) 26 | { 27 | std::string tmp0 = cmd; 28 | tmp0 = "command failure: " + tmp0; 29 | print_error(tmp0.c_str()); 30 | return 0; 31 | } 32 | return 1; 33 | } -------------------------------------------------------------------------------- /App/fdtd2d/fdtd.cc: -------------------------------------------------------------------------------- 1 | #define TMAX 40 2 | #define NX 15 3 | #define NY 80 4 | 5 | void kernel_fdtd_2d(int tmax, int nx, int ny, float ex_input[NX][NY], float ex_output[NX][NY], 6 | float ey_input[NX][NY], float ey_output[NX][NY], float hz_input[NX][NY], 7 | float hz_output[NX][NY]) 8 | { 9 | int t, i, j; 10 | 11 | for (i = 1; i < NX; i++) 12 | for (j = 0; j < NY; j++) 13 | ey_output[i][j] = ey_input[i][j] - (float)(0.5) * (hz_input[i][j] - hz_input[i - 1][j]); 14 | 15 | for (i = 0; i < NX; i++) 16 | for (j = 1; j < NY; j++) 17 | ex_output[i][j] = ex_input[i][j] - (float)(0.5) * (hz_input[i][j] - hz_input[i][j - 1]); 18 | 19 | for (i = 0; i < NX - 1; i++) 20 | for (j = 0; j < NY - 1; j++) 21 | hz_output[i][j] = 22 | hz_input[i][j] - (float)(0.7) * (ex_output[i][j + 1] - ex_output[i][j] + 23 | ey_output[i + 1][j] - ey_output[i][j]); 24 | } 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/ConfigParse.h: -------------------------------------------------------------------------------- 1 | #ifndef _Gobal_Confic_Parse 2 | #define _Gobal_Confic_Parse 3 | 4 | #include "HI_StringProcess.h" 5 | #include "HI_SysExec.h" 6 | #include "HI_print.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | extern std::string clock_period_str; 13 | extern std::string HLS_lib_path; 14 | 15 | void Parse_Config(const char *config_file_name, std::map &LoopLabel2UnrollFactor, 16 | std::map &LoopLabel2II); 17 | 18 | void parseArrayPartition(std::stringstream &iss); 19 | 20 | void parseLoopUnroll(std::stringstream &iss, std::map &LoopLabel2UnrollFactor); 21 | 22 | void parseLoopPipeline(std::stringstream &iss, std::map &LoopLabel2II); 23 | 24 | void parseArrayPortNum(std::stringstream &iss); 25 | 26 | void parseFuncDataflow(std::stringstream &iss); 27 | 28 | void parseLocalArray(std::stringstream &iss); 29 | 30 | void parseResourceLimit(std::stringstream &iss); 31 | 32 | #endif -------------------------------------------------------------------------------- /App/conv/conv_tiny.cc: -------------------------------------------------------------------------------- 1 | 2 | void convs(int input[8][8], int output[8][8], int filter2[3][3], int filter0[5][5]) 3 | { 4 | int mid0[8][8]; 5 | for (int i = 2; i < 6; i++) 6 | { 7 | for (int j = 2; j < 6; j++) 8 | { 9 | int tmp = 0; 10 | for (int k = -2; k <= 2; k++) 11 | { 12 | for (int l = -2; l <= 2; l++) 13 | { 14 | tmp += filter0[2 + k][2 + l] * input[i + k][j + l] * 123; 15 | } 16 | } 17 | mid0[i][j] = tmp > 0 ? tmp * 23 : tmp; 18 | } 19 | } 20 | 21 | for (int i = 1; i < 7; i += 1) 22 | { 23 | for (int j = 1; j < 7; j += 1) 24 | { 25 | int tmp = 0; 26 | for (int k = -1; k <= 1; k++) 27 | { 28 | for (int l = -1; l <= 1; l++) 29 | { 30 | tmp += filter2[1 + k][1 + l] * mid0[i + k][j + l]; 31 | } 32 | } 33 | output[i][j] = tmp; 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /App/spam-filter/sgd.cpp: -------------------------------------------------------------------------------- 1 | #define NUM_FEATURES 800 2 | 3 | // void computeGradient( int grad[NUM_FEATURES], int feature[NUM_FEATURES], int scale) 4 | // { 5 | 6 | // } 7 | 8 | // // Update the parameter vector 9 | // void updateParameter( int param[NUM_FEATURES], int grad[NUM_FEATURES], int scale) 10 | // { 11 | // for (int i = 0; i < NUM_FEATURES ; i++) 12 | // { 13 | // param[i] += scale * grad[i]; 14 | // } 15 | // } 16 | 17 | 18 | void compute(int theta_local[NUM_FEATURES], int training_instance[NUM_FEATURES], int gradient[NUM_FEATURES], int prob, int step ) 19 | { 20 | 21 | // compute gradient 22 | // computeGradient(gradient, training_instance, (prob)); 23 | // update the param vector 24 | // updateParameter(theta_local, gradient, step); 25 | 26 | for (int i = 0; i < NUM_FEATURES ; i++) 27 | { 28 | gradient[i] = (prob * training_instance[i]); 29 | } 30 | for (int i = 0; i < NUM_FEATURES ; i++) 31 | { 32 | theta_local[i] += step * gradient[i]; 33 | } 34 | } 35 | 36 | -------------------------------------------------------------------------------- /Implementations/HI_VarWidthReduce/README: -------------------------------------------------------------------------------- 1 | LLVM_exp8_VarWidthReduce is used to test the pass HI_VarWidthReduce 2 | 3 | The HI_VarWidthReduce pass is used to shrink the bitwidth of the varaibles which have predictale range. This can reduce the hardware cost in HLS. 4 | 5 | T T now you can see that implement a HLS pass is not that easy like you thought before... it could be... a lot of dirty work. 6 | 7 | 0. Using IRBuilder, you can genenrate and insert IR instruction much easier: 8 | IRBuilder<> Builder(XXXX); // tell LLVM insert instruction before XXXX 9 | XXXXInst * tmp_XXXXInst = Builder.createXXXInst(.....) 10 | 11 | 1. When Handling bitwidth in LLVM, you need to be very very very very very careful to ensure those operands have the bit-width, otherwise you may get strange error, during compilation or running. 12 | 13 | 2. When you try to remove an instruction from BasicBlock by using ->eraseFromParent in for (instruction &I : B), you need to be careful, because sometime when you are removing an instruction, you are actually destroy the list of instructions for a block. That could lead to segmentation fault. -------------------------------------------------------------------------------- /App/2dloop2darray_pl/2dloop2darray_pl.cc: -------------------------------------------------------------------------------- 1 | 2 | void subf(int A[56][100]) 3 | { 4 | int N = 100; 5 | int M = 50; 6 | for (int j = 1; j < N; j++) 7 | for (int i = 1; i < M + 1; i++) 8 | A[i][j] = A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + 1; 9 | for (int j = 1; j < N; j++) 10 | for (int i = 1; i < M + 1; i++) 11 | A[i][j] = A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + 1; 12 | return; 13 | } 14 | 15 | void f(int A[56][100]) 16 | { 17 | int N = 100; 18 | int M = 56; 19 | for (int j = 1; j < N; j++) 20 | for (int i = 1; i < M + 1; i++) 21 | { 22 | A[i][j] = 23 | A[i - 1][j - 1] + A[i][j - 1] + A[i - 1][j] + 24 | 1; // load 3 store 1 25 | // A[i+1][j] = A[i][j-1] + A[i+1][j-1] + A[i][j] + 1; // load 1 store 1 26 | // A[i+2][j] = A[i+1][j-1] + A[i+2][j-1] + A[i+1][j] + 1; // load 1 store 1 27 | // A[i+3][j] = A[i+2][j-1] + A[i+3][j-1] + A[i+2][j] + 1; // load 1 store 1 28 | } 29 | subf(A); 30 | subf(A); 31 | return; 32 | } 33 | -------------------------------------------------------------------------------- /HLS_Data_Lib/fadd/fadd: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 636 595 13 3.38 1 FAddSub_nodsp 2 | -1 -1 -1 3 0 636 595 13 3.38 1 FAddSub_nodsp 3 | -1 -1 -1 4 2 365 421 9 3.35 1 FAddSub 4 | -1 -1 -1 4 2 365 421 9 3.35 1 FAddSub 5 | -1 -1 -1 5 2 324 424 8 4.34 1 FAddSub 6 | -1 -1 -1 5 2 324 424 8 4.34 1 FAddSub 7 | -1 -1 -1 6 2 324 424 8 4.34 1 FAddSub 8 | -1 -1 -1 6 2 324 424 8 4.34 1 FAddSub 9 | -1 -1 -1 7 2 296 438 7 5.39 1 FAddSub 10 | -1 -1 -1 7 2 296 438 7 5.39 1 FAddSub 11 | -1 -1 -1 8 2 306 418 6 6.44 1 FAddSub 12 | -1 -1 -1 8 2 306 418 6 6.44 1 FAddSub 13 | -1 -1 -1 9 2 205 390 4 7.25 1 FAddSub 14 | -1 -1 -1 9 2 205 390 4 7.25 1 FAddSub 15 | -1 -1 -1 10 2 205 390 4 7.25 1 FAddSub 16 | -1 -1 -1 10 2 205 390 4 7.25 1 FAddSub 17 | -1 -1 -1 12.5 2 227 403 3 10.5 1 FAddSub 18 | -1 -1 -1 12.5 2 227 403 3 10.5 1 FAddSub 19 | -1 -1 -1 15 2 227 403 3 10.5 1 FAddSub 20 | -1 -1 -1 15 2 227 403 3 10.5 1 FAddSub 21 | -1 -1 -1 16 2 227 403 3 10.5 1 FAddSub 22 | -1 -1 -1 16 2 227 403 3 10.5 1 FAddSub 23 | -1 -1 -1 17.5 2 227 403 3 10.5 1 FAddSub 24 | -1 -1 -1 17.5 2 227 403 3 10.5 1 FAddSub 25 | -1 -1 -1 20 2 227 403 3 10.5 1 FAddSub 26 | -1 -1 -1 20 2 227 403 3 10.5 1 FAddSub -------------------------------------------------------------------------------- /HLS_Data_Lib/fsub/fsub: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 634 596 13 2.97 1 FAddSub_nodsp 2 | -1 -1 -1 3 0 634 596 13 2.97 1 FAddSub_nodsp 3 | -1 -1 -1 4 2 365 419 9 3.21 1 FAddSub 4 | -1 -1 -1 4 2 365 419 9 3.21 1 FAddSub 5 | -1 -1 -1 5 2 365 419 9 3.21 1 FAddSub 6 | -1 -1 -1 5 2 365 419 9 3.21 1 FAddSub 7 | -1 -1 -1 6 2 296 430 7 5.07 1 FAddSub 8 | -1 -1 -1 6 2 296 430 7 5.07 1 FAddSub 9 | -1 -1 -1 7 2 306 416 6 5.97 1 FAddSub 10 | -1 -1 -1 7 2 306 416 6 5.97 1 FAddSub 11 | -1 -1 -1 8 2 306 416 6 5.97 1 FAddSub 12 | -1 -1 -1 8 2 306 416 6 5.97 1 FAddSub 13 | -1 -1 -1 9 2 205 390 4 7.17 1 FAddSub 14 | -1 -1 -1 9 2 205 390 4 7.17 1 FAddSub 15 | -1 -1 -1 10 2 205 390 4 7.17 1 FAddSub 16 | -1 -1 -1 10 2 205 390 4 7.17 1 FAddSub 17 | -1 -1 -1 12.5 2 227 405 3 10.9 1 FAddSub 18 | -1 -1 -1 12.5 2 227 405 3 10.9 1 FAddSub 19 | -1 -1 -1 15 2 227 405 3 10.9 1 FAddSub 20 | -1 -1 -1 15 2 227 405 3 10.9 1 FAddSub 21 | -1 -1 -1 16 2 227 405 3 10.9 1 FAddSub 22 | -1 -1 -1 16 2 227 405 3 10.9 1 FAddSub 23 | -1 -1 -1 17.5 2 227 405 3 10.9 1 FAddSub 24 | -1 -1 -1 17.5 2 227 405 3 10.9 1 FAddSub 25 | -1 -1 -1 20 2 227 405 3 10.9 1 FAddSub 26 | -1 -1 -1 20 2 227 405 3 10.9 1 FAddSub 27 | -------------------------------------------------------------------------------- /HLS_Data_Lib/fmul/fmul: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 3 199 317 7 2.38 1 FMul 2 | -1 -1 -1 3 3 199 317 7 2.38 1 FMul 3 | -1 -1 -1 4 3 199 317 7 2.38 1 FMul 4 | -1 -1 -1 4 3 199 317 7 2.38 1 FMul 5 | -1 -1 -1 5 3 151 326 4 4.35 1 FMul 6 | -1 -1 -1 5 3 151 326 4 4.35 1 FMul 7 | -1 -1 -1 6 3 151 326 4 4.35 1 FMul 8 | -1 -1 -1 6 3 151 326 4 4.35 1 FMul 9 | -1 -1 -1 7 3 143 322 3 5.64 1 FMul 10 | -1 -1 -1 7 3 143 322 3 5.64 1 FMul 11 | -1 -1 -1 8 3 143 322 3 5.64 1 FMul 12 | -1 -1 -1 8 3 143 322 3 5.64 1 FMul 13 | -1 -1 -1 9 3 143 322 3 5.64 1 FMul 14 | -1 -1 -1 9 3 143 322 3 5.64 1 FMul 15 | -1 -1 -1 10 3 143 322 3 5.64 1 FMul 16 | -1 -1 -1 10 3 143 322 3 5.64 1 FMul 17 | -1 -1 -1 12.5 3 128 320 2 10.2 1 FMul 18 | -1 -1 -1 12.5 3 128 320 2 10.2 1 FMul 19 | -1 -1 -1 15 3 128 320 1 12.3 1 FMul 20 | -1 -1 -1 15 3 128 320 1 12.3 1 FMul 21 | -1 -1 -1 16 3 128 320 1 12.3 1 FMul 22 | -1 -1 -1 16 3 128 320 1 12.3 1 FMul 23 | -1 -1 -1 17.5 3 128 320 1 12.3 1 FMul 24 | -1 -1 -1 17.5 3 128 320 1 12.3 1 FMul 25 | -1 -1 -1 20 3 128 320 0 15.4 1 FMul 26 | -1 -1 -1 20 3 128 320 0 15.4 1 FMul 27 | -1 -1 -1 25 3 128 320 0 15.4 1 FMul 28 | -1 -1 -1 25 3 128 320 0 15.4 1 FMul 29 | -1 -1 -1 30 3 128 320 0 15.4 1 FMul 30 | -1 -1 -1 30 3 128 320 0 15.4 1 FMul 31 | -------------------------------------------------------------------------------- /HLS_Data_Lib/dmul/dmul: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 11 2252 1697 17 2.75 1 DMul 2 | -1 -1 -1 3 11 2252 1697 17 2.75 1 DMul 3 | -1 -1 -1 4 11 2202 1697 16 3.38 1 DMul 4 | -1 -1 -1 4 11 2202 1697 16 3.38 1 DMul 5 | -1 -1 -1 5 11 660 997 9 4.35 1 DMul 6 | -1 -1 -1 5 11 660 997 9 4.35 1 DMul 7 | -1 -1 -1 6 11 660 997 9 4.35 1 DMul 8 | -1 -1 -1 6 11 660 997 9 4.35 1 DMul 9 | -1 -1 -1 7 11 601 980 7 5.67 1 DMul 10 | -1 -1 -1 7 11 601 980 7 5.67 1 DMul 11 | -1 -1 -1 8 11 546 968 6 6.67 1 DMul 12 | -1 -1 -1 8 11 546 968 6 6.67 1 DMul 13 | -1 -1 -1 9 11 508 967 5 7.79 1 DMul 14 | -1 -1 -1 9 11 508 967 5 7.79 1 DMul 15 | -1 -1 -1 10 11 508 967 5 7.79 1 DMul 16 | -1 -1 -1 10 11 508 967 5 7.79 1 DMul 17 | -1 -1 -1 12.5 11 490 958 4 9.9 1 DMul 18 | -1 -1 -1 12.5 11 490 958 4 9.9 1 DMul 19 | -1 -1 -1 15 11 490 958 4 9.9 1 DMul 20 | -1 -1 -1 15 11 490 958 4 9.9 1 DMul 21 | -1 -1 -1 16 11 466 958 3 13.8 1 DMul 22 | -1 -1 -1 16 11 466 958 3 13.8 1 DMul 23 | -1 -1 -1 17.5 11 466 958 3 13.8 1 DMul 24 | -1 -1 -1 17.5 11 466 958 3 13.8 1 DMul 25 | -1 -1 -1 20 11 466 958 3 13.8 1 DMul 26 | -1 -1 -1 20 11 466 958 3 13.8 1 DMul 27 | -1 -1 -1 25 11 403 958 3 13.8 1 DMul 28 | -1 -1 -1 25 11 403 958 3 13.8 1 DMul 29 | -1 -1 -1 30 11 403 958 3 13.8 1 DMul 30 | -1 -1 -1 30 11 403 958 3 13.8 1 DMul -------------------------------------------------------------------------------- /HLS_Data_Lib/fdiv/fdiv: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 1439 1038 29 3.59 1 FDiv 2 | -1 -1 -1 3 0 1439 1038 29 3.59 1 FDiv 3 | -1 -1 -1 4 0 1439 1038 29 3.59 1 FDiv 4 | -1 -1 -1 4 0 1439 1038 29 3.59 1 FDiv 5 | -1 -1 -1 5 0 1439 1038 29 3.59 1 FDiv 6 | -1 -1 -1 5 0 1439 1038 29 3.59 1 FDiv 7 | -1 -1 -1 6 0 1439 1038 29 3.59 1 FDiv 8 | -1 -1 -1 6 0 1439 1038 29 3.59 1 FDiv 9 | -1 -1 -1 7 0 763 997 16 5.83 1 FDiv 10 | -1 -1 -1 7 0 763 997 16 5.83 1 FDiv 11 | -1 -1 -1 8 0 762 990 15 6.18 1 FDiv 12 | -1 -1 -1 8 0 762 990 15 6.18 1 FDiv 13 | -1 -1 -1 9 0 762 990 15 6.18 1 FDiv 14 | -1 -1 -1 9 0 762 990 15 6.18 1 FDiv 15 | -1 -1 -1 10 0 564 991 11 8.38 1 FDiv 16 | -1 -1 -1 10 0 564 991 11 8.38 1 FDiv 17 | -1 -1 -1 12.5 0 460 989 10 10.7 1 FDiv 18 | -1 -1 -1 12.5 0 460 989 10 10.7 1 FDiv 19 | -1 -1 -1 15 0 460 987 9 11.0 1 FDiv 20 | -1 -1 -1 15 0 460 987 9 11.0 1 FDiv 21 | -1 -1 -1 16 0 412 987 8 13.6 1 FDiv 22 | -1 -1 -1 16 0 412 987 8 13.6 1 FDiv 23 | -1 -1 -1 17.5 0 412 987 8 13.6 1 FDiv 24 | -1 -1 -1 17.5 0 412 987 8 13.6 1 FDiv 25 | -1 -1 -1 20 0 364 985 7 15.8 1 FDiv 26 | -1 -1 -1 20 0 364 985 7 15.8 1 FDiv 27 | -1 -1 -1 25 0 316 989 6 18.1 1 FDiv 28 | -1 -1 -1 25 0 316 989 6 18.1 1 FDiv 29 | -1 -1 -1 30 0 269 978 5 23.4 1 FDiv 30 | -1 -1 -1 30 0 269 978 5 23.4 1 FDiv 31 | -------------------------------------------------------------------------------- /Tests/LLVM_exp8_VarWidthReduce/README: -------------------------------------------------------------------------------- 1 | LLVM_exp8_VarWidthReduce is used to test the pass HI_VarWidthReduce 2 | 3 | The HI_VarWidthReduce pass is used to shrink the bitwidth of the varaibles which have predictale range. This can reduce the hardware cost in HLS. 4 | 5 | T T now you can see that implement a HLS pass is not that easy like you thought before... it could be... a lot of dirty work. 6 | 7 | 0. Using IRBuilder, you can genenrate and insert IR instruction much easier: 8 | IRBuilder<> Builder(XXXX); // tell LLVM insert instruction before XXXX 9 | XXXXInst * tmp_XXXXInst = Builder.createXXXInst(.....) 10 | 11 | 1. When Handling bitwidth in LLVM, you need to be very very very very very careful to ensure those operands have the bit-width, otherwise you may get strange error, during compilation or running. 12 | 13 | 2. When you try to remove an instruction from BasicBlock by using ->eraseFromParent in for (instruction &I : B), you need to be careful, because sometime when you are removing an instruction, you are actually destroy the list of instructions for a block. That could lead to segmentation fault. 14 | 15 | 16 | The test can be run with the following command: 17 | 18 | ./LLVM_expXXXXX -------------------------------------------------------------------------------- /Tests/LLVM_exp0_find_functions/README: -------------------------------------------------------------------------------- 1 | The HI_FindFunction pass is mainlyed tested by LLVM_exp0_find_functions. 2 | 3 | HI_FindFunction Pass is just a HelloWorld-level pass, showing the template of the construction of Pass in LLVM. 4 | Detailed explanation can be found in the source code's comments. 5 | The test code will replace the function name in the IR code with the demangled ones. 6 | 7 | The test can be run with the following command: 8 | 9 | ./LLVM_expXXXXX 10 | 11 | 12 | 13 | LLVM installation hint: 14 | 15 | 1. I build the following directory hierarchy: 16 | ./LLVM 17 | ./LLVM/LLVM_src 18 | ./LLVM/LLVM_build 19 | 20 | 2. Enter the LLVM_src directory and downloand LLVM-9.0 by following command: 21 | git clone https://github.com/llvm/llvm-project.git 22 | 23 | 3. Enter the LLVM_build directory and first do the CMAKE jobs by the following command: 24 | cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS="clang;lldb;compiler-rt;lld;polly;debuginfo-tests" ../LLVM_src/llvm/ 25 | 26 | 4. Then build LLVM by: 27 | make -j8 28 | 29 | 5. Then install LLVM by: 30 | sudo make install 31 | 32 | ps: There could be some dependences you need to solve but fortunately LLVM will give you the information during installation. 33 | -------------------------------------------------------------------------------- /HLS_Data_Lib/ddiv/ddiv: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 0 6160 3870 58 4.61 1 DDiv 2 | -1 -1 -1 3 0 6160 3870 58 4.61 1 DDiv 3 | -1 -1 -1 4 0 6160 3870 58 4.61 1 DDiv 4 | -1 -1 -1 4 0 6160 3870 58 4.61 1 DDiv 5 | -1 -1 -1 5 0 6160 3870 58 4.61 1 DDiv 6 | -1 -1 -1 5 0 6160 3870 58 4.61 1 DDiv 7 | -1 -1 -1 6 0 6160 3870 58 4.61 1 DDiv 8 | -1 -1 -1 6 0 6160 3870 58 4.61 1 DDiv 9 | -1 -1 -1 7 0 6160 3870 58 4.61 1 DDiv 10 | -1 -1 -1 7 0 6160 3870 58 4.61 1 DDiv 11 | -1 -1 -1 8 0 6160 3870 58 4.61 1 DDiv 12 | -1 -1 -1 8 0 6160 3870 58 4.61 1 DDiv 13 | -1 -1 -1 9 0 3217 3698 50 7.83 1 DDiv 14 | -1 -1 -1 9 0 3217 3698 50 7.83 1 DDiv 15 | -1 -1 -1 10 0 3211 3644 30 8.45 1 DDiv 16 | -1 -1 -1 10 0 3211 3644 30 8.45 1 DDiv 17 | -1 -1 -1 12.5 0 3211 3644 30 8.45 1 DDiv 18 | -1 -1 -1 12.5 0 3211 3644 30 8.45 1 DDiv 19 | -1 -1 -1 15 0 2246 3619 21 11.2 1 DDiv 20 | -1 -1 -1 15 0 2246 3619 21 11.2 1 DDiv 21 | -1 -1 -1 16 0 2246 3619 21 11.2 1 DDiv 22 | -1 -1 -1 16 0 2246 3619 21 11.2 1 DDiv 23 | -1 -1 -1 17.5 0 1718 3625 20 15.0 1 DDiv 24 | -1 -1 -1 17.5 0 1718 3625 20 15.0 1 DDiv 25 | -1 -1 -1 20 0 1710 3628 16 15.5 1 DDiv 26 | -1 -1 -1 20 0 1710 3628 16 15.5 1 DDiv 27 | -1 -1 -1 25 0 1392 3614 13 18.9 1 DDiv 28 | -1 -1 -1 25 0 1392 3614 13 18.9 1 DDiv 29 | -1 -1 -1 30 0 1072 3607 10 26.0 1 DDiv 30 | -1 -1 -1 30 0 1072 3607 10 26.0 1 DDiv 31 | -------------------------------------------------------------------------------- /Implementations/HI_NoDirectiveTimingResourceEvaluation/README.md: -------------------------------------------------------------------------------- 1 | HI_NoDirectiveTimingResourceEvaluation pass is tested by LLVM_exp12_HI_NoDirectiveTimingResourceEvaluation 2 | 3 | Compared to HI_SimpleTimingEvaluation, HI_NoDirectiveTimingResourceEvaluation will access 4 | the database to get the accurate information of the instruction, including timing and 5 | resource, to achieve accurate timing evaluation. 6 | 7 | However, in this pass, the HLS directives will still not work, which will be enabled in 8 | the next step. 9 | 10 | In the implementation, the high-level idea is similar to HI_SimpleTimingEvaluation. 11 | However, the class inst_timing_resource_info and timingBase are used to handle the detailed 12 | information for the timing and resource of instructions in a nicer way. 13 | 14 | Related operators are overloaded, e.g. +, * and >, for the caluation of critical path and 15 | the latency of loop/block/function. For each instruction, HI_NoDirectiveTimingResourceEvaluation 16 | will inquiry the map variables to get the result of timing and resource. 17 | 18 | Besides, the chaining of operations for DSP utilization is considered. 19 | 20 | The memory hazard is not considered in this pass. The hazard is considered in HI_WithDirectiveTimingResourceEvaluation. 21 | This detection relies on SCEV. -------------------------------------------------------------------------------- /Implementations/HI_StringProcess/HI_StringProcess.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_STRINGPROCESS 2 | #define _HI_STRINGPROCESS 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | typedef std::uint64_t hash_t; 22 | 23 | constexpr hash_t prime = 0x100000001B3ull; 24 | constexpr hash_t basis = 0xCBF29CE484222325ull; 25 | 26 | inline bool exists_test(const std::string &name) 27 | { 28 | struct stat buffer; 29 | return (stat(name.c_str(), &buffer) == 0); 30 | } 31 | 32 | hash_t hash_(char const *str); 33 | 34 | void findAndReplaceAll(std::string &data, std::string toSearch, std::string replaceStr); 35 | 36 | std::string removeExtraSpace(std::string tmp_s); 37 | 38 | void consumeEqual(std::stringstream &iss); 39 | 40 | constexpr hash_t hash_compile_time(char const *str, hash_t last_value = basis) 41 | { 42 | return *str ? hash_compile_time(str + 1, (*str ^ last_value) * prime) : last_value; 43 | } 44 | 45 | std::string demangleFunctionName(std::string mangled_name); 46 | 47 | #endif -------------------------------------------------------------------------------- /HLS_Data_Lib/dsub/dsub: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 3 1126 1104 15 3.69 1 DAddSub 2 | -1 -1 -1 3 3 1126 1104 15 3.69 1 DAddSub 3 | -1 -1 -1 4 3 1126 1104 15 3.69 1 DAddSub 4 | -1 -1 -1 4 3 1126 1104 15 3.69 1 DAddSub 5 | -1 -1 -1 5 3 979 1169 12 4.24 1 DAddSub 6 | -1 -1 -1 5 3 979 1169 12 4.24 1 DAddSub 7 | -1 -1 -1 6 3 754 1084 9 5.19 1 DAddSub 8 | -1 -1 -1 6 3 754 1084 9 5.19 1 DAddSub 9 | -1 -1 -1 7 3 686 1091 7 5.52 1 DAddSub 10 | -1 -1 -1 7 3 686 1091 7 5.52 1 DAddSub 11 | -1 -1 -1 8 3 686 1091 7 5.52 1 DAddSub 12 | -1 -1 -1 8 3 686 1091 7 5.52 1 DAddSub 13 | -1 -1 -1 9 3 686 1091 7 5.52 1 DAddSub 14 | -1 -1 -1 9 3 686 1091 7 5.52 1 DAddSub 15 | -1 -1 -1 10 3 509 1165 5 8.37 1 DAddSub 16 | -1 -1 -1 10 3 509 1165 5 8.37 1 DAddSub 17 | -1 -1 -1 12.5 3 445 1144 4 9.54 1 DAddSub 18 | -1 -1 -1 12.5 3 445 1144 4 9.54 1 DAddSub 19 | -1 -1 -1 15 3 445 1144 4 9.54 1 DAddSub 20 | -1 -1 -1 15 3 445 1144 4 9.54 1 DAddSub 21 | -1 -1 -1 16 3 432 1125 3 13.6 1 DAddSub 22 | -1 -1 -1 16 3 432 1125 3 13.6 1 DAddSub 23 | -1 -1 -1 17.5 3 432 1125 3 13.6 1 DAddSub 24 | -1 -1 -1 17.5 3 432 1125 3 13.6 1 DAddSub 25 | -1 -1 -1 20 3 432 1125 3 13.6 1 DAddSub 26 | -1 -1 -1 20 3 432 1125 3 13.6 1 DAddSub 27 | -1 -1 -1 25 3 432 1125 3 13.6 1 DAddSub 28 | -1 -1 -1 25 3 432 1125 3 13.6 1 DAddSub 29 | -1 -1 -1 30 3 342 1065 2 24.8 1 DAddSub 30 | -1 -1 -1 30 3 342 1065 2 24.8 1 DAddSub 31 | -------------------------------------------------------------------------------- /Implementations/HI_InstructionLatencyCollect/README: -------------------------------------------------------------------------------- 1 | HI_SimpleTimingEvaluation pass is tested by LLVM_exp5_LoopSimpleAnalysis 2 | 3 | The pass traverses the top_function, including the blocks he subfunctions and loops to get the total latency, according to the CFG (predecessor-successor) 4 | Detailed explanation can be found in the source code's comments. 5 | 6 | The latency is obtained by ASAP scheduling according to the dependence between blocks. (detailed implementation and explanation can be found in source code.) 7 | 8 | the pass handles the module block by block, with the help of many subfunctions of different LLVM pre-defined classes. 9 | 10 | 11 | 12 | Implementation idea is shown below: 13 | 14 | if the block is in a loop, the latency of the entire loop will be calculated. After that, all the blocks will be merged into a latency node for later processing 15 | for example, if some blocks is successed by the loop, when the pass traverses to the block in loop, it will directly jump the the exiting blocks and accumulate the loop latency 16 | 17 | for subfunctions, if the pass traverses to CallInstruction in the blocks, it will first get the latency of the subfucntion and assign the latency to the CallInstruction. 18 | 19 | Related LLVM Passes: 20 | LoopSimplify 21 | IndVarSimplifyPass --- createIndVarSimplifyPass 22 | LoopStrengthReducePass --- createLoopStrengthReducePass -------------------------------------------------------------------------------- /Tests/LLVM_exp12_HI_NoDirectiveTimingResourceEvaluation/README: -------------------------------------------------------------------------------- 1 | HI_NoDirectiveTimingResourceEvaluation pass is tested by LLVM_exp12_HI_NoDirectiveTimingResourceEvaluation 2 | 3 | Compared to HI_SimpleTimingEvaluation, HI_NoDirectiveTimingResourceEvaluation will access 4 | the database to get the accurate information of the instruction, including timing and 5 | resource, to achieve accurate timing evaluation. However, as an initial test, HI_NoDirectiveTimingResourceEvaluation could be 6 | not accurate enough. 7 | 8 | However, in this pass, the HLS directives will still not work, which will be enabled in 9 | the next step. 10 | 11 | In the implementation, the high-level idea is similar to HI_SimpleTimingEvaluation. 12 | However, the class inst_timing_resource_info and timingBase are used to handle the detailed 13 | information for the timing and resource of instructions in a nicer way. 14 | 15 | Related operators are overloaded, e.g. +, * and >, for the caluation of critical path and 16 | the latency of loop/block/function. For each instruction, HI_NoDirectiveTimingResourceEvaluation 17 | will inquiry the map variables to get the result of timing and resource. 18 | 19 | Besides, the chaining of operations for DSP utilization is considered. 20 | 21 | The test can be run with the following command: 22 | 23 | ./LLVM_expxxxxx -------------------------------------------------------------------------------- /Tests/LLVM_exp5a_InstrucitonList/README: -------------------------------------------------------------------------------- 1 | mainly test the HI_SimpleTimingEvaluation pass 2 | 3 | 4 | The pass traverses the top_function, including the blocks he subfunctions and loops to get the total latency, according to the CFG (predecessor-successor) 5 | The latency is obtained by ASAP scheduling according to the dependence between blocks. (detailed implementation and explanation can be found in source code.) 6 | 7 | the pass handles the module block by block, with the help of many subfunctions of different LLVM pre-defined classes. 8 | 9 | 10 | The test can be run with the following command: 11 | 12 | ./LLVM_expXXXXX 13 | 14 | 15 | Implementation idea is shown below: 16 | 17 | if the block is in a loop, the latency of the entire loop will be calculated. After that, all the blocks will be merged into a latency node for later processing 18 | for example, if some blocks is successed by the loop, when the pass traverses to the block in loop, it will directly jump the the exiting blocks and accumulate the loop latency 19 | 20 | for subfunctions, if the pass traverses to CallInstruction in the blocks, it will first get the latency of the subfucntion and assign the latency to the CallInstruction. 21 | 22 | 23 | Related LLVM Passes: 24 | LoopSimplify 25 | IndVarSimplifyPass --- createIndVarSimplifyPass 26 | LoopStrengthReducePass --- createLoopStrengthReducePass -------------------------------------------------------------------------------- /Tests/LLVM_exp5_SimpleTimingAnalysis/README: -------------------------------------------------------------------------------- 1 | mainly test the HI_SimpleTimingEvaluation pass 2 | 3 | 4 | The pass traverses the top_function, including the blocks he subfunctions and loops to get the total latency, according to the CFG (predecessor-successor) 5 | The latency is obtained by ASAP scheduling according to the dependence between blocks. (detailed implementation and explanation can be found in source code.) 6 | 7 | the pass handles the module block by block, with the help of many subfunctions of different LLVM pre-defined classes. 8 | 9 | 10 | The test can be run with the following command: 11 | 12 | ./LLVM_expXXXXX 13 | 14 | 15 | Implementation idea is shown below: 16 | 17 | if the block is in a loop, the latency of the entire loop will be calculated. After that, all the blocks will be merged into a latency node for later processing 18 | for example, if some blocks is successed by the loop, when the pass traverses to the block in loop, it will directly jump the the exiting blocks and accumulate the loop latency 19 | 20 | for subfunctions, if the pass traverses to CallInstruction in the blocks, it will first get the latency of the subfucntion and assign the latency to the CallInstruction. 21 | 22 | 23 | Related LLVM Passes: 24 | LoopSimplify 25 | IndVarSimplifyPass --- createIndVarSimplifyPass 26 | LoopStrengthReducePass --- createLoopStrengthReducePass -------------------------------------------------------------------------------- /App/2dloop2darray_pl_functionAccess/2dloop2darray_pl_functionAccess.cc: -------------------------------------------------------------------------------- 1 | // void f1 ( int A[50][100]) { 2 | // int N = 100; 3 | // int M = 50; 4 | // int B[50][100]; 5 | // for ( int j = 0; j < N; j++ ) 6 | // for ( int i = 0; i < M; i++ ) 7 | // B[i][j]=i; 8 | // for ( int j = 1; j < N; j++ ) 9 | // for ( int i = 1; i < M; i++ ) 10 | // A[i][j] = i%2? A[i-1][j]:B[i][j]; 11 | // return; 12 | // } 13 | 14 | // void f3 ( int A[50][100]) { 15 | // int N = 100; 16 | // int M = 50; 17 | // int B[50][100]; 18 | // for ( int j = 0; j < N; j++ ) 19 | // for ( int i = 0; i < M; i++ ) 20 | // B[i][j]=i; 21 | // for ( int j = 1; j < N; j++ ) 22 | // for ( int i = 1; i < M; i++ ) 23 | // A[i][j] = i%2? A[i-1][j]:B[i][j]; 24 | // return; 25 | // } 26 | 27 | void f3(int A[50][100]) 28 | { 29 | int N = 100; 30 | int M = 50; 31 | // int B[50][100]; 32 | // for ( int j = 0; j < N; j++ ) 33 | // for ( int i = 0; i < M; i++ ) 34 | // B[i][j]=i; 35 | for (int j = 1; j < N; j++) 36 | for (int i = 1; i < M; i++) 37 | { 38 | switch (i % 3) 39 | { 40 | case 0: 41 | A[i][j] = i; 42 | break; 43 | case 1: 44 | A[i][j] = A[i - 1][j]; 45 | break; 46 | case 2: 47 | A[i][j] = A[i - 1][j - 1]; 48 | default: 49 | break; 50 | } 51 | } 52 | return; 53 | } 54 | -------------------------------------------------------------------------------- /App/conv/conv.cc: -------------------------------------------------------------------------------- 1 | 2 | void convs(int input[32][32], int output[32][32], int filter2[3][3], int filter1[5][5], 3 | int filter0[5][5]) 4 | { 5 | int mid0[32][32]; 6 | for (int i = 2; i < 30; i++) 7 | { 8 | for (int j = 2; j < 30; j++) 9 | { 10 | int tmp = 0; 11 | for (int k = -2; k <= 2; k++) 12 | { 13 | for (int l = -2; l <= 2; l++) 14 | { 15 | tmp += filter0[2 + k][2 + l] * input[i + k][j + l]; 16 | } 17 | } 18 | mid0[i][j] = tmp; 19 | } 20 | } 21 | 22 | int mid1[32][32]; 23 | for (int i = 2; i < 30; i += 1) 24 | { 25 | for (int j = 2; j < 30; j += 1) 26 | { 27 | int tmp = 0; 28 | for (int k = -2; k <= 2; k++) 29 | { 30 | for (int l = -2; l <= 2; l++) 31 | { 32 | tmp += filter1[2 + k][2 + l] * mid0[i + k][j + l] * 321; 33 | } 34 | } 35 | mid1[i][j] = tmp > 0 ? tmp * 23 : tmp; 36 | } 37 | } 38 | 39 | for (int i = 1; i < 31; i += 1) 40 | { 41 | for (int j = 1; j < 31; j += 1) 42 | { 43 | int tmp = 0; 44 | for (int k = -1; k <= 1; k++) 45 | { 46 | for (int l = -1; l <= 1; l++) 47 | { 48 | tmp += filter2[1 + k][1 + l] * mid1[i + k][j + l]; 49 | } 50 | } 51 | output[i][j] = tmp; 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /App/conv/conv_tiny3.cc: -------------------------------------------------------------------------------- 1 | 2 | void convs(int input[8][8], int output[8][8], int filter2[3][3], int filter1[5][5], 3 | int filter0[5][5]) 4 | { 5 | int mid0[8][8]; 6 | for (int i = 2; i < 6; i++) 7 | { 8 | for (int j = 2; j < 6; j++) 9 | { 10 | int tmp = 0; 11 | for (int k = -2; k <= 2; k++) 12 | { 13 | for (int l = -2; l <= 2; l++) 14 | { 15 | tmp += filter0[2 + k][2 + l] * input[i + k][j + l] * 123; 16 | } 17 | } 18 | mid0[i][j] = tmp > 0 ? tmp * 23 : tmp; 19 | } 20 | } 21 | 22 | int mid1[8][8]; 23 | for (int i = 2; i < 6; i++) 24 | { 25 | for (int j = 2; j < 6; j++) 26 | { 27 | int tmp = 0; 28 | for (int k = -2; k <= 2; k++) 29 | { 30 | for (int l = -2; l <= 2; l++) 31 | { 32 | tmp += filter1[2 + k][2 + l] * mid0[i + k][j + l] * 321; 33 | } 34 | } 35 | mid1[i][j] = tmp > 0 ? tmp * 23 : tmp; 36 | } 37 | } 38 | 39 | for (int i = 1; i < 7; i += 1) 40 | { 41 | for (int j = 1; j < 7; j += 1) 42 | { 43 | int tmp = 0; 44 | for (int k = -1; k <= 1; k++) 45 | { 46 | for (int l = -1; l <= 1; l++) 47 | { 48 | tmp += filter2[1 + k][1 + l] * mid1[i + k][j + l]; 49 | } 50 | } 51 | output[i][j] = tmp; 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /App/2mm/2mm_float.cc: -------------------------------------------------------------------------------- 1 | #define MINI_DATASET 2 | 3 | #ifdef MINI_DATASET 4 | #define NI 16 5 | #define NJ 18 6 | #define NK 22 7 | #define NL 24 8 | #endif 9 | 10 | #ifdef SMALL_DATASET 11 | #define NI 40 12 | #define NJ 50 13 | #define NK 70 14 | #define NL 80 15 | #endif 16 | 17 | #ifdef MEDIUM_DATASET 18 | #define NI 180 19 | #define NJ 190 20 | #define NK 210 21 | #define NL 220 22 | #endif 23 | 24 | #ifdef LARGE_DATASET 25 | #define NI 800 26 | #define NJ 900 27 | #define NK 1100 28 | #define NL 1200 29 | #endif 30 | 31 | #ifdef EXTRALARGE_DATASET 32 | #define NI 1600 33 | #define NJ 1800 34 | #define NK 2200 35 | #define NL 2400 36 | #endif 37 | 38 | #define NI 16 39 | #define NJ 18 40 | #define NK 22 41 | #define NL 24 42 | 43 | /* 44 | %0 = ptrtoint [18 x i32]* %tmp to i64 45 | %1 = ptrtoint [22 x i32]* %A to i64 46 | %2 = ptrtoint [18 x i32]* %B to i64 47 | %3 = ptrtoint [24 x i32]* %D to i64 48 | %4 = ptrtoint [24 x i32]* %C to i64 49 | */ 50 | 51 | void kernel_2mm(int ni, int nj, int nk, int nl, double alpha, double beta, double tmp[NI][NJ], 52 | double A[NI][NK], double B[NK][NJ], double C[NJ][NL], double D[NI][NL]) 53 | { 54 | int i, j, k; 55 | 56 | /* D := alpha*A*B*C + beta*D */ 57 | for (i = 0; i < NI; i++) 58 | for (j = 0; j < NJ; j++) 59 | { 60 | tmp[i][j] = 0; 61 | for (k = 0; k < NK; ++k) 62 | tmp[i][j] += alpha * A[i][k] * B[k][j]; 63 | } 64 | for (i = 0; i < NI; i++) 65 | for (j = 0; j < NL; j++) 66 | { 67 | D[i][j] *= beta; 68 | for (k = 0; k < NJ; ++k) 69 | D[i][k] = tmp[i][k] * C[k][j]; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /Implementations/HI_GEP_OffsetCombine/HI_GEP_OffsetCombine.cc: -------------------------------------------------------------------------------- 1 | #include "HI_GEP_OffsetCombine.h" 2 | #include "HI_print.h" 3 | #include "llvm/IR/LegacyPassManager.h" 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IRReader/IRReader.h" 6 | #include "llvm/Pass.h" 7 | #include "llvm/Support/SourceMgr.h" 8 | #include "llvm/Support/raw_ostream.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace llvm; 16 | 17 | bool HI_GEP_OffsetCombine::runOnFunction(Function &F) 18 | { 19 | print_status("Running HI_GEP_OffsetCombine pass."); 20 | if (F.getName().find("llvm.") != std::string::npos) 21 | { 22 | return false; 23 | } 24 | 25 | const DataLayout &DL = F.getParent()->getDataLayout(); 26 | // if (skipFunction(F)) 27 | // return false; 28 | 29 | if (DisableSeparateConstOffsetFromGEP) 30 | return false; 31 | 32 | // TLI = &getAnalysis().getTLI(); 33 | bool Changed = false; 34 | for (BasicBlock &B : F) 35 | { 36 | for (Instruction &I : B) 37 | if (GetElementPtrInst *GEP = dyn_cast(&I)) 38 | { 39 | Changed |= 0; // splitGEP(GEP); 40 | } 41 | // No need to split GEP ConstantExprs because all its indices are constant 42 | // already. 43 | } 44 | return Changed; 45 | } 46 | 47 | char HI_GEP_OffsetCombine::ID = 48 | 0; // the ID for pass should be initialized but the value does not matter, since LLVM uses the 49 | // address of this variable as label instead of its value. 50 | 51 | // introduce the dependence of Pass 52 | void HI_GEP_OffsetCombine::getAnalysisUsage(AnalysisUsage &AU) const 53 | { 54 | AU.setPreservesCFG(); 55 | } 56 | -------------------------------------------------------------------------------- /Implementations/HI_SimpleTimingEvaluation/README: -------------------------------------------------------------------------------- 1 | HI_SimpleTimingEvaluation pass is tested by LLVM_exp5_SimpleTimingEvaluation 2 | 3 | The pass traverses the top_function, including the blocks he subfunctions and loops to get the total latency, according to the CFG (predecessor-successor) 4 | Detailed explanation can be found in the source code's comments. 5 | 6 | The latency is obtained by ASAP scheduling according to the dependence between blocks. (detailed implementation and explanation can be found in source code.) 7 | 8 | the pass handles the module block by block, with the help of many subfunctions of different LLVM pre-defined classes. 9 | 10 | 11 | 12 | Rough Implementation idea is shown below: 13 | 14 | if the block is in a loop, the latency of the entire loop will be calculated. After that, all the blocks will be merged into a latency node for later processing 15 | for example, if some blocks is successed by the loop, when the pass traverses to the block in loop, it will directly jump the the exiting blocks and accumulate the loop latency 16 | 17 | for subfunctions, if the pass traverses to CallInstruction in the blocks, it will first get the latency of the subfucntion and assign the latency to the CallInstruction. 18 | 19 | Therefore, the recursion path can be depicted as: top_function--->block(loop)--->instruction(subfunction) 20 | 21 | Related LLVM Passes: 22 | LoopSimplify 23 | IndVarSimplifyPass --- createIndVarSimplifyPass 24 | LoopStrengthReducePass --- createLoopStrengthReducePass 25 | 26 | Originally, I want to forward the Loop information generated by HI_LoopInFormationCollect to HI_SimpleTimingEvaluation, 27 | However, it seems that, LLVM does not support the dependence between plugin pass, for example, addRequired is 28 | unavailable. If I want to implement the dependency, I need to register it via registry.... -------------------------------------------------------------------------------- /Implementations/HI_print/HI_print.cc: -------------------------------------------------------------------------------- 1 | #ifndef _HI_PRINT 2 | #define _HI_PRINT 3 | 4 | #include "HI_print.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | void print_cmd(const char *tmp) 11 | { 12 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 40, 33, "Hi-LLVM CMD: ", 0, tmp); 13 | } 14 | 15 | void print_info(const char *tmp) 16 | { 17 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 40, 34, "Hi-LLVM INFO: ", 0, tmp); 18 | } 19 | 20 | void print_status(const char *tmp) 21 | { 22 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 40, 32, "Hi-LLVM STATUS: ", 0, tmp); 23 | } 24 | 25 | void print_error(const char *tmp) 26 | { 27 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 43, 31, "Hi-LLVM ERROR: ", 0, tmp); 28 | } 29 | 30 | void print_warning(const char *tmp) 31 | { 32 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 43, 31, "Hi-LLVM WARNING: ", 0, tmp); 33 | } 34 | 35 | void print_cmd(std::string tmp_string) 36 | { 37 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 40, 33, "Hi-LLVM CMD: ", 0, 38 | tmp_string.c_str()); 39 | } 40 | 41 | void print_info(std::string tmp_string) 42 | { 43 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 40, 34, "Hi-LLVM INFO: ", 0, 44 | tmp_string.c_str()); 45 | } 46 | 47 | void print_status(std::string tmp_string) 48 | { 49 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 40, 32, "Hi-LLVM STATUS: ", 0, 50 | tmp_string.c_str()); 51 | } 52 | 53 | void print_error(std::string tmp_string) 54 | { 55 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 43, 31, "Hi-LLVM ERROR: ", 0, 56 | tmp_string.c_str()); 57 | } 58 | 59 | void print_warning(std::string tmp_string) 60 | { 61 | printf("\x1b[%d;%dm%s\x1b[%dm \x1b[0;0m\x1b[0m %s\n", 43, 31, "Hi-LLVM WARNING: ", 0, 62 | tmp_string.c_str()); 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /Tests/LLVM_exp1_dependence_list/LLVM_exp1_dependence_list.cc: -------------------------------------------------------------------------------- 1 | #include "HI_DependenceList.h" 2 | #include "HI_FindFunction.h" 3 | #include "HI_SysExec.h" 4 | #include "HI_print.h" 5 | #include "llvm/IR/LegacyPassManager.h" 6 | #include "llvm/IR/Module.h" 7 | #include "llvm/IRReader/IRReader.h" 8 | #include "llvm/Pass.h" 9 | #include "llvm/Support/SourceMgr.h" 10 | #include "llvm/Support/raw_ostream.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | int main(int argc, char **argv) 19 | { 20 | if (argc < 2) 21 | { 22 | errs() << "Usage: " << argv[0] << " \n"; 23 | return 1; 24 | } 25 | 26 | // Compile the source code into IR and Parse the input LLVM IR file into a module 27 | SMDiagnostic Err; 28 | LLVMContext Context; 29 | std::string cmd_str = "clang -O1 -emit-llvm -S " + std::string(argv[1]) + " -o top.bc 2>&1"; 30 | print_cmd(cmd_str.c_str()); 31 | bool result = sysexec(cmd_str.c_str()); 32 | assert(result); // ensure the cmd is executed successfully 33 | // system(cmd_str.c_str()); 34 | 35 | std::unique_ptr Mod(parseIRFile("top.bc", Err, Context)); 36 | if (!Mod) 37 | { 38 | Err.print(argv[0], errs()); 39 | return 1; 40 | } 41 | 42 | // Create a pass manager and fill it with the passes we want to run. 43 | legacy::PassManager PM; 44 | 45 | auto dominatortreewrapperpass = new DominatorTreeWrapperPass(); 46 | PM.add(dominatortreewrapperpass); 47 | print_info("Enable DominatorTreeWrapperPass Pass"); 48 | 49 | print_info("Enable HI_FindFunctions Pass"); 50 | print_info("Enable HI_DependenceList Pass"); 51 | PM.add(new HI_FindFunctions()); 52 | PM.add(new HI_DependenceList("Instructions", "Instruction_Dep")); 53 | 54 | print_status("Start LLVM processing"); 55 | PM.run(*Mod); 56 | print_status("Accomplished LLVM processing"); 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /Implementations/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | 4 | 5 | find_package(LLVM REQUIRED CONFIG) 6 | # Needed to use support library 7 | 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14") 9 | 10 | include_directories(${LLVM_INCLUDE_DIRS}) 11 | add_definitions(${LLVM_DEFINITIONS}) 12 | 13 | # aux_source_directory(. DIR_IMPL_SRCS) 14 | # add_library(hi_findfunctions ${DIR_IMPL_SRCS}) 15 | 16 | # include_directories(./ ./HI_FindFunctions ./HI_print) 17 | add_subdirectory(HI_FindFunctions) 18 | add_subdirectory(HI_print) 19 | add_subdirectory(HI_DependenceList) 20 | add_subdirectory(HI_SysExec) 21 | add_subdirectory(HI_LoopInFormationCollect) 22 | add_subdirectory(HI_Polly_Info) 23 | add_subdirectory(HI_LoopDependenceAnalysis) 24 | add_subdirectory(HI_SimpleTimingEvaluation) 25 | add_subdirectory(HI_GEP_OffsetCombine) 26 | add_subdirectory(HI_DuplicateInstRm) 27 | add_subdirectory(HI_VarWidthReduce) 28 | add_subdirectory(HI_SeparateConstOffsetFromGEP) 29 | add_subdirectory(HI_APIntSrcAnalysis) 30 | add_subdirectory(HI_NoDirectiveTimingResourceEvaluation) 31 | add_subdirectory(HI_IntstructionMoveBackward) 32 | add_subdirectory(HI_HLSDuplicateInstRm) 33 | add_subdirectory(HI_AggressiveLSR_MUL) 34 | add_subdirectory(HI_ArrayAccessPattern) 35 | add_subdirectory(HI_WithDirectiveTimingResourceEvaluation) 36 | add_subdirectory(HI_StringProcess) 37 | add_subdirectory(HI_FunctionInstantiation) 38 | add_subdirectory(HI_ReplaceSelectAccess) 39 | add_subdirectory(HI_IR2SourceCode) 40 | add_subdirectory(HI_LoopUnroll) 41 | add_subdirectory(HI_RemoveRedundantAccess) 42 | add_subdirectory(HI_LoopLabeler) 43 | add_subdirectory(HI_FunctionInterfaceInfo) 44 | add_subdirectory(HI_Mul2Shl) 45 | add_subdirectory(HI_MulOrderOpt) 46 | add_subdirectory(HI_MuxInsertionArrayPartition) 47 | add_subdirectory(HI_LoadALAP) 48 | add_subdirectory(HI_PragmaTargetExtraction) 49 | add_subdirectory(HI_ArraySensitiveToLoopLevel) 50 | add_subdirectory(HI_TopLoop2Func) 51 | add_subdirectory(HI_ConstantDivisorOpt) -------------------------------------------------------------------------------- /Implementations/HI_APIntSrcAnalysis/README: -------------------------------------------------------------------------------- 1 | LLVM_exp10_HI_APIntegerAnalysis is used to test the the frontend action HI_APIntegerAnalysis 2 | 3 | HI_APIntegerAnalysis is based on Clang LibTooling 4 | 5 | It implement a frontend action to find the declartions of arbitrary precision integer and add comments in the source code to mark those declartions. 6 | 7 | The rewriter shoud be passed as reference but not pointer, otherwise errors will come out. 8 | 9 | Such implementation is based on the following hierarchy: 10 | 11 | 12 | // According the official template of Clang, this is a frontend factory with function createASTConsumer(), which 13 | // will generator a AST consumer. We can first create a rewriter and pass the reference of the 14 | // rewriter to the factory. Finally, we can pass the rewriter reference to the inner visitor. 15 | // rewriter -> factory -> frontend-action -> ASTconsumer -> Visitor 16 | 17 | // declare a rewriter 18 | // | pass the pointer to 19 | // create V 20 | // frontend Factory -----> FrontEnd Action 21 | // | | create / pass the rewriter 22 | // | Src Code V 23 | // -------------> AST consumer 24 | // | 25 | // | generate AST 26 | // V 27 | // Visitor (visit the nodes in AST and do the rewritting) 28 | 29 | IMPORTANT: Compare the differences between the following two implementation 30 | 31 | // virtual void HandleAnalysislationUnit(ASTContext &Context) 32 | // { 33 | // visitor.TraverseDecl(Context.getTranslationUnitDecl()); 34 | // } 35 | 36 | bool HandleTopLevelDecl(DeclGroupRef DR) override 37 | { 38 | for (DeclGroupRef::iterator b = DR.begin(), e = DR.end(); b != e; ++b) 39 | { 40 | visitor.TraverseDecl(*b); //(*b)->dump(); 41 | } 42 | return true; 43 | } -------------------------------------------------------------------------------- /App/bin_conv/bin_conv.h: -------------------------------------------------------------------------------- 1 | #include "ap_int.h" 2 | //------------------------------------------------------------------- 3 | // Constants 4 | //------------------------------------------------------------------- 5 | const unsigned CONVOLVERS = 2; 6 | 7 | const unsigned WORD_SIZE = 64; 8 | const unsigned WT_SIZE = 9; 9 | const unsigned CONV_W_PER_WORD = 7; 10 | const unsigned CONV1_W_PER_WORD = 4; 11 | const unsigned KH_PER_WORD = 4; 12 | const unsigned BYTE_SIZE = 8; 13 | const unsigned K = 3; 14 | const unsigned WT_L = 16 * 4 * 512; // parameter to control wt mem size 15 | const unsigned C_WT_WORDS = ((WT_L + CONV_W_PER_WORD - 1) / CONV_W_PER_WORD + CONVOLVERS - 1) / 16 | CONVOLVERS; // wt words per convolver 17 | const unsigned WT_WORDS = C_WT_WORDS * CONVOLVERS; 18 | const unsigned KH_WORDS = WT_L / 128 * 16 / WORD_SIZE; 19 | 20 | const unsigned DMEM_WORDS = 128 * 32 * 32 / WORD_SIZE; 21 | const unsigned C_DMEM_WORDS = DMEM_WORDS / CONVOLVERS; 22 | const unsigned DMEM_O_WORDS = 512 * 4 * 4 / WORD_SIZE; 23 | const unsigned DB_MEM_WORDS = 32 * 32; 24 | 25 | const unsigned PIX_PER_PHASE = 2 * 32 * 32; 26 | 27 | const unsigned MAX_WIDTH = WORD_SIZE; 28 | const unsigned BANK_WIDTH = 8; 29 | const unsigned LOG_BANK_WIDTH = 3; 30 | 31 | const unsigned CONV_ROWS = 3; 32 | const unsigned CONV_COLS = BANK_WIDTH + 2; 33 | const unsigned CONV_BANKS = WORD_SIZE / BANK_WIDTH; 34 | 35 | //------------------------------------------------------------------- 36 | // Typedefs 37 | //------------------------------------------------------------------- 38 | enum LayerTypeEnum 39 | { 40 | LAYER_CONV1, 41 | LAYER_CONV, 42 | LAYER_DENSE, 43 | LAYER_LAST 44 | }; 45 | 46 | typedef ap_int Word; 47 | typedef ap_int WtType; 48 | typedef ap_uint<16> Address; 49 | typedef ap_int<12> ConvSum; 50 | typedef ap_int<5> ConvOut; 51 | typedef ap_uint<10> IdxType; 52 | typedef ap_int<16> NormComp; 53 | typedef ap_int<16> DenseSum; 54 | typedef ap_uint<1> Bit; 55 | typedef ap_uint<2> TwoBit; 56 | -------------------------------------------------------------------------------- /App/jacobi/jacobi-2d.h: -------------------------------------------------------------------------------- 1 | /** 2 | * This version is stamped on May 10, 2016 3 | * 4 | * Contact: 5 | * Louis-Noel Pouchet 6 | * Tomofumi Yuki 7 | * 8 | * Web address: http://polybench.sourceforge.net 9 | */ 10 | #ifndef _JACOBI_2D_H 11 | #define _JACOBI_2D_H 12 | 13 | /* Default to LARGE_DATASET. */ 14 | #if !defined(MINI_DATASET) && !defined(SMALL_DATASET) && !defined(MEDIUM_DATASET) && \ 15 | !defined(LARGE_DATASET) && !defined(EXTRALARGE_DATASET) 16 | #define LARGE_DATASET 17 | #endif 18 | 19 | #if !defined(TSTEPS) && !defined(N) 20 | /* Define sample dataset sizes. */ 21 | #ifdef MINI_DATASET 22 | #define TSTEPS 20 23 | #define N 30 24 | #endif 25 | 26 | #ifdef SMALL_DATASET 27 | #define TSTEPS 40 28 | #define N 90 29 | #endif 30 | 31 | #ifdef MEDIUM_DATASET 32 | #define TSTEPS 100 33 | #define N 250 34 | #endif 35 | 36 | #ifdef LARGE_DATASET 37 | #define TSTEPS 500 38 | #define N 1300 39 | #endif 40 | 41 | #ifdef EXTRALARGE_DATASET 42 | #define TSTEPS 1000 43 | #define N 2800 44 | #endif 45 | 46 | #endif /* !(TSTEPS N) */ 47 | 48 | #define _PB_TSTEPS POLYBENCH_LOOP_BOUND(TSTEPS, tsteps) 49 | #define _PB_N POLYBENCH_LOOP_BOUND(N, n) 50 | 51 | /* Default data type */ 52 | #if !defined(DATA_TYPE_IS_INT) && !defined(DATA_TYPE_IS_FLOAT) && !defined(DATA_TYPE_IS_DOUBLE) 53 | #define DATA_TYPE_IS_DOUBLE 54 | #endif 55 | 56 | #ifdef DATA_TYPE_IS_INT 57 | #define DATA_TYPE int 58 | #define DATA_PRINTF_MODIFIER "%d " 59 | #endif 60 | 61 | #ifdef DATA_TYPE_IS_FLOAT 62 | #define DATA_TYPE float 63 | #define DATA_PRINTF_MODIFIER "%0.2f " 64 | #define SCALAR_VAL(x) x##f 65 | #define SQRT_FUN(x) sqrtf(x) 66 | #define EXP_FUN(x) expf(x) 67 | #define POW_FUN(x, y) powf(x, y) 68 | #endif 69 | 70 | #ifdef DATA_TYPE_IS_DOUBLE 71 | #define DATA_TYPE double 72 | #define DATA_PRINTF_MODIFIER "%0.2lf " 73 | #define SCALAR_VAL(x) x 74 | #define SQRT_FUN(x) sqrt(x) 75 | #define EXP_FUN(x) exp(x) 76 | #define POW_FUN(x, y) pow(x, y) 77 | #endif 78 | 79 | #endif /* !_JACOBI_2D_H */ 80 | -------------------------------------------------------------------------------- /Implementations/HI_Polly_Info/HI_Polly_Info.cc: -------------------------------------------------------------------------------- 1 | #include "HI_Polly_Info.h" 2 | #include "HI_print.h" 3 | #include "polly/PolyhedralInfo.h" 4 | #include "llvm/IR/LegacyPassManager.h" 5 | #include "llvm/IR/Module.h" 6 | #include "llvm/IRReader/IRReader.h" 7 | #include "llvm/Pass.h" 8 | #include "llvm/Support/SourceMgr.h" 9 | #include "llvm/Support/raw_ostream.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | bool HI_Polly_Info::runOnFunction( 19 | Function &F) // The runOnModule declaration will overide the virtual one in ModulePass, which 20 | // will be executed for each Module. 21 | { 22 | 23 | auto &SE = getAnalysis().getSE(); 24 | auto &LI = getAnalysis().getLoopInfo(); 25 | auto *LAA = &getAnalysis(); 26 | auto *PI = &getAnalysis(); 27 | return false; 28 | } 29 | 30 | char HI_Polly_Info::ID = 31 | 0; // the ID for pass should be initialized but the value does not matter, since LLVM uses the 32 | // address of this variable as label instead of its value. 33 | 34 | void HI_Polly_Info::getAnalysisUsage(AnalysisUsage &AU) const 35 | { 36 | AU.setPreservesAll(); 37 | AU.addRequired(); 38 | AU.addRequired(); 39 | 40 | // AU.addRequired(); 41 | // AU.addRequired(); 42 | // AU.addPreserved(); 43 | AU.addRequired(); 44 | AU.addRequired(); 45 | // AU.addPreserved(); 46 | AU.addRequired(); 47 | AU.addRequiredTransitive(); 48 | AU.addRequired(); 49 | AU.addRequiredTransitive(); 50 | AU.addRequired(); 51 | // AU.addPreserved(); 52 | } 53 | -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_APIntegerAnalysis/README: -------------------------------------------------------------------------------- 1 | LLVM_exp10_HI_APIntegerAnalysis is used to test the the frontend action HI_APIntegerAnalysis 2 | 3 | HI_APIntegerAnalysis is based on Clang LibTooling 4 | 5 | 6 | The test can be run with the following command: 7 | 8 | ./LLVM_expXXXXX 9 | 10 | It implement a frontend action to find the declartions of arbitrary precision integer and add comments in the source code to mark those declartions. 11 | 12 | The rewriter shoud be passed as reference but not pointer, otherwise errors will come out. 13 | 14 | Such implementation is based on the following hierarchy: 15 | 16 | 17 | // According the official template of Clang, this is a frontend factory with function createASTConsumer(), which 18 | // will generator a AST consumer. We can first create a rewriter and pass the reference of the 19 | // rewriter to the factory. Finally, we can pass the rewriter reference to the inner visitor. 20 | // rewriter -> factory -> frontend-action -> ASTconsumer -> Visitor 21 | 22 | // declare a rewriter 23 | // | pass the pointer to 24 | // create V 25 | // frontend Factory -----> FrontEnd Action 26 | // | | create / pass the rewriter 27 | // | Src Code V 28 | // -------------> AST consumer 29 | // | 30 | // | generate AST 31 | // V 32 | // Visitor (visit the nodes in AST and do the rewritting) 33 | 34 | IMPORTANT: Compare the differences between the following two implementation 35 | 36 | // virtual void HandleAnalysislationUnit(ASTContext &Context) 37 | // { 38 | // visitor.TraverseDecl(Context.getTranslationUnitDecl()); 39 | // } 40 | 41 | bool HandleTopLevelDecl(DeclGroupRef DR) override 42 | { 43 | for (DeclGroupRef::iterator b = DR.begin(), e = DR.end(); b != e; ++b) 44 | { 45 | visitor.TraverseDecl(*b); //(*b)->dump(); 46 | } 47 | return true; 48 | } -------------------------------------------------------------------------------- /Tests/LLVM_exp14_HI_WithDirectiveTimingResourceEvaluation/README: -------------------------------------------------------------------------------- 1 | HI_WithDirectiveTimingResourceEvaluation pass is used to evaluate the timing and resource of the application, considering the effect of HLS directives. 2 | 3 | Here, I mainly consider array partitioning, loop pipelining and loop unrolling. 4 | 5 | Basic implementation idea: 6 | 7 | 0. Mapping some information between IR and source code, so we can determine the outermost size of the array in the parameters of function and set the directives of loops. 8 | 9 | 1. Array partitioning: Using SCEV, the pass can get the pattern of array accesses. In the analysis of array partitioning, we emulate the loop, and find which partitons the access will tough during the entire process of loop. Based on the access pattern, the map from accesses to partitions can be determined. According to the map and the port limitation of BRAMs, we can schedule the array accesses. Please note that we do not split arrays in IR but just record the accesses and their corresponding partitions for scheduling. We suppose that the array partitioning should be done at back-end (CodeGen) 10 | 11 | 2. Loop unrolling: we mimic the procedure of loop unrolling passes from LLVM and we transform the IR before we do the timing/resource evaluation. By doing so, we can exactly check which component among the unrolled iterations can be reused, e.g. address calculation. Moreover, there are some BRAM access optimization can be done to remove the redundant accesses to BRAM. Finally, after IR transformation, we can exactly check access patterns. All these situations cannot be analyzed by using mathematic models like the one proposed by COMBA. 12 | 13 | 3. Loop pipelining: since we cannot implement pipeline in the IR level, we use loop label to mark which loop should be pipelined. During the evaluation, the loop requested to be pipelined will be checked and the proper initial interval will be found. 14 | 15 | 4. Muxes will be inserted for array partitions. 16 | 17 | 18 | Moreover, many other optimizations are involved for this test: 19 | a) Mul2Shl 20 | b) MulOrderOpt 21 | c) RemoveRedundantAccess 22 | d) SwitchLower -------------------------------------------------------------------------------- /Implementations/HI_FindFunctions/HI_FindFunction.cc: -------------------------------------------------------------------------------- 1 | #include "HI_FindFunction.h" 2 | #include "HI_print.h" 3 | #include "llvm/IR/LegacyPassManager.h" 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IRReader/IRReader.h" 6 | #include "llvm/Pass.h" 7 | #include "llvm/Support/SourceMgr.h" 8 | #include "llvm/Support/raw_ostream.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | bool HI_FindFunctions::runOnModule( 19 | Module &M) // The runOnModule declaration will overide the virtual one in ModulePass, which will 20 | // be executed for each Module. 21 | { 22 | for (Module::iterator GI = M.begin(), GE = M.end(); GI != GE; 23 | ++GI) // Module iterator is used to iterate the functions in the module, ++GI is better 24 | // than GI ++, for the sake of stability 25 | { 26 | *Function_Demangle << "Found function Definition named == " << GI->getName() 27 | << "\n"; // a Function class is inherited from Value class, which has a 28 | // function to get the name of the value (function). 29 | std::string fname(GI->getName()); 30 | if (fname[0] == '_') 31 | { 32 | *Function_Demangle << " its demangled name == " 33 | << DemangleFunctionName(GI->getName()) << "\n"; 34 | Function_Demangle_Map[GI->getName()] = DemangleFunctionName(fname); 35 | } 36 | } 37 | 38 | *Function_Demangle 39 | << "===============printing the module ====================================\n"; 40 | *Function_Demangle << M << "\n"; 41 | Function_Demangle->flush(); 42 | return false; 43 | } 44 | 45 | char HI_FindFunctions::ID = 46 | 0; // the ID for pass should be initialized but the value does not matter, since LLVM uses the 47 | // address of this variable as label instead of its value. 48 | 49 | void HI_FindFunctions::getAnalysisUsage(AnalysisUsage &AU) const 50 | { 51 | AU.setPreservesAll(); 52 | } 53 | -------------------------------------------------------------------------------- /Tests/LLVM_exp10_HI_LoopLabeler/README: -------------------------------------------------------------------------------- 1 | LLVM_exp10_HI_LoopLabeler is used to test the frontend action HI_LoopLabeler, which will set label for loops in the source code. 2 | 3 | HI_LoopLabeler is based on Clang LibTooling 4 | 5 | 6 | The test can be run with the following command: 7 | 8 | ./LLVM_expXXXXX 9 | 10 | 11 | It implement a frontend action to find the declartions of arbitrary precision integer and add comments in the source code to mark those declartions. 12 | 13 | The rewriter shoud be passed as reference but not pointer, otherwise errors will come out. 14 | 15 | Such implementation is based on the following hierarchy: 16 | 17 | 18 | // According the official template of Clang, this is a frontend factory with function createASTConsumer(), which 19 | // will generator a AST consumer. We can first create a rewriter and pass the reference of the 20 | // rewriter to the factory. Finally, we can pass the rewriter reference to the inner visitor. 21 | // rewriter -> factory -> frontend-action -> ASTconsumer -> Visitor 22 | 23 | // declare a rewriter 24 | // | pass the pointer to 25 | // create V 26 | // frontend Factory -----> FrontEnd Action 27 | // | | create / pass the rewriter 28 | // | Src Code V 29 | // -------------> AST consumer 30 | // | 31 | // | generate AST 32 | // V 33 | // Visitor (visit the nodes in AST and do the rewritting) 34 | 35 | IMPORTANT: Compare the differences between the following two implementation 36 | 37 | // virtual void HandleAnalysislationUnit(ASTContext &Context) 38 | // { 39 | // visitor.TraverseDecl(Context.getTranslationUnitDecl()); 40 | // } 41 | 42 | bool HandleTopLevelDecl(DeclGroupRef DR) override 43 | { 44 | for (DeclGroupRef::iterator b = DR.begin(), e = DR.end(); b != e; ++b) 45 | { 46 | visitor.TraverseDecl(*b); //(*b)->dump(); 47 | } 48 | return true; 49 | } -------------------------------------------------------------------------------- /Implementations/HI_WithDirectiveTimingResourceEvaluation/README.md: -------------------------------------------------------------------------------- 1 | HI_WithDirectiveTimingResourceEvaluation pass is used to evaluate the timing and resource of the application, considering the effect of HLS directives. 2 | 3 | Here, I mainly consider array partitioning (cyclic), loop pipelining and loop unrolling. 4 | 5 | Basic implementation idea: 6 | 0. Mapping some information between IR and source code, so we can determine the outermost size of the array in the parameters of function and set the directives of loops. 7 | 8 | 1. Array partitioning: Using SCEV, the pass can get the pattern of array accesses. In the analysis of array partitioning, we emulate the loop, and find which partitons the access will tough during the entire process of loop. Based on the access pattern, the map from accesses to partitions can be determined. According to the map and the port limitation of BRAMs, we can schedule the array accesses. Please note that we do not split arrays in IR but just record the accesses and their corresponding partitions for scheduling. We suppose that the array partitioning should be done at back-end (CodeGen) 9 | 10 | 2. Loop unrolling: we mimic the procedure of loop unrolling passes from LLVM and we transform the IR before we do the timing/resource evaluation. By doing so, we can exactly check which component among the unrolled iterations can be reused, e.g. address calculation. Moreover, there are some BRAM access optimization can be done to remove the redundant accesses to BRAM. Finally, after IR transformation, we can exactly check access patterns. All these situations cannot be analyzed by using mathematic models like the one proposed by COMBA. 11 | 12 | 3. Loop pipelining: since we cannot implement pipeline in the IR level, we use loop label to mark which loop should be pipelined. During the evaluation, the loop requested to be pipelined will be checked and the proper initial interval will be found. 13 | 14 | 4. Muxes will be inserted for array partitions. 15 | 16 | 17 | Moreover, many other optimizations are involved for this test: 18 | a) Mul2Shl 19 | b) MulOrderOpt 20 | c) RemoveRedundantAccess 21 | d) SwitchLower 22 | e) The program will be entirely flatten and re-grouped 23 | -------------------------------------------------------------------------------- /Implementations/HI_TopLoop2Func/HI_TopLoop2Func.cc: -------------------------------------------------------------------------------- 1 | #include "HI_TopLoop2Func.h" 2 | #include "HI_StringProcess.h" 3 | #include "HI_print.h" 4 | #include "llvm/Analysis/LoopInfo.h" 5 | #include "llvm/IR/Function.h" 6 | #include "llvm/IR/Instructions.h" 7 | #include "llvm/IR/LegacyPassManager.h" 8 | #include "llvm/IR/Module.h" 9 | #include "llvm/IRReader/IRReader.h" 10 | #include "llvm/Pass.h" 11 | #include "llvm/Support/SourceMgr.h" 12 | #include "llvm/Support/raw_ostream.h" 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace llvm; 19 | 20 | bool HI_TopLoop2Func::runOnModule( 21 | Module &M) // The runOnModule declaration will overide the virtual one in ModulePass, which will 22 | // be executed for each Module. 23 | { 24 | print_status("Running HI_TopLoop2Func pass."); 25 | FuncNames.clear(); 26 | for (auto &F : M) 27 | { 28 | if (F.getName().find("llvm.") != std::string::npos) 29 | continue; 30 | 31 | if (demangleFunctionName(F.getName()) != top_str || 32 | F.getName().find(".") != std::string::npos) 33 | FuncNames.push_back(F.getName()); 34 | 35 | auto &LI = getAnalysis(F).getLoopInfo(); 36 | auto &SE = getAnalysis(F).getSE(); 37 | for (auto Loop : LI.getLoopsInPreorder()) 38 | { 39 | if (Loop->getLoopDepth() == 1) 40 | { 41 | std::string tmp_loop_name = Loop->getHeader()->getParent()->getName(); 42 | tmp_loop_name += "-"; 43 | tmp_loop_name += Loop->getHeader()->getName(); 44 | TopLoopIR2FuncName[tmp_loop_name] = Loop->getHeader()->getParent()->getName(); 45 | } 46 | } 47 | } 48 | 49 | return false; 50 | } 51 | 52 | char HI_TopLoop2Func::ID = 53 | 0; // the ID for pass should be initialized but the value does not matter, since LLVM uses the 54 | // address of this variable as label instead of its value. 55 | 56 | void HI_TopLoop2Func::getAnalysisUsage(AnalysisUsage &AU) const 57 | { 58 | AU.addRequired(); 59 | AU.addRequired(); 60 | AU.setPreservesCFG(); 61 | } -------------------------------------------------------------------------------- /Implementations/HI_AggressiveLSR_MUL/README: -------------------------------------------------------------------------------- 1 | The HI_AggressiveLSR_MUL pass is used to remove unnecessary multiplications 2 | which can be transformed into addition operations if they are actually related 3 | to loop induction variables. These multiplications are usally the array index 4 | variables. 5 | 6 | 7 | 1. find the mul for array offset calculation 8 | 2. get the incremental value by find the induction varialble (addition) and check its step 9 | 3. insert a new PHI (carefully select the initial constant) 10 | 4. replace multiplication with addition 11 | 12 | 13 | example: 14 | 15 | -------------------------------------------------------------- 16 | from 17 | -------------------------------------------------------------- 18 | for.cond31.preheader: ; preds = %for.inc62, %for.cond31.preheader.preheader 19 | %HI.lsr.iv1new26 = phi i9 [ 0, %for.cond31.preheader.preheader ], [ %HI.lsr.iv.next2new49, %for.inc62 ] 20 | %HI.indvars.iv105new27 = phi i5 [ %HI.indvars.iv.next106new48, %for.inc62 ], [ 0, %for.cond31.preheader.preheader ] 21 | %bcast28 = zext i5 %HI.indvars.iv105new27 to i9 22 | %HI.new29 = mul i9 %bcast28, 24 23 | %HI.indvars.iv.next106new48 = add i5 %HI.indvars.iv105new27, 1 24 | %HI.lsr.iv.next2new49 = add i9 %HI.lsr.iv1new26, 18 25 | %HI.exitcond107new50 = icmp eq i5 %HI.indvars.iv.next106new48, -16 26 | br label %for.body33 27 | 28 | -------------------------------------------------------------- 29 | to 30 | -------------------------------------------------------------- 31 | for.cond31.preheader: ; preds = %for.inc62, %for.cond31.preheader.preheader 32 | %HI.lsr.iv1new26 = phi i9 [ 0, %for.cond31.preheader.preheader ], [ %HI.lsr.iv.next2new49, %for.inc62 ] 33 | %HI.indvars.iv105new27 = phi i5 [ %HI.indvars.iv.next106new48, %for.inc62 ], [ 0, %for.cond31.preheader.preheader ] 34 | %bcast28 = zext i5 %HI.indvars.iv105new27 to i9 35 | %HI.new29.PHI = phi i9 [ 0, %for.cond31.preheader.preheader ], [ %HI.new29.Add, %for.inc62 ] 36 | %HI.new29.Add = add i9 %HI.new29.PHI, 24 37 | %HI.indvars.iv.next106new48 = add i5 %HI.indvars.iv105new27, 1 38 | %HI.lsr.iv.next2new49 = add i9 %HI.lsr.iv1new26, 18 39 | %HI.exitcond107new50 = icmp eq i5 %HI.indvars.iv.next106new48, -16 40 | br label %for.body33 41 | 42 | --------------------------------------------------- 43 | -------------------------------------------------------------------------------- /Tests/LLVM_exp0_find_functions/LLVM_exp0_find_functions.cc: -------------------------------------------------------------------------------- 1 | #include "HI_FindFunction.h" 2 | #include "HI_print.h" 3 | #include "llvm/IR/LegacyPassManager.h" 4 | #include "llvm/IR/Module.h" 5 | #include "llvm/IRReader/IRReader.h" 6 | #include "llvm/Pass.h" 7 | #include "llvm/Support/SourceMgr.h" 8 | #include "llvm/Support/raw_ostream.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace llvm; 17 | 18 | void ReplaceAll(std::string &strSource, const std::string &strOld, const std::string &strNew) 19 | { 20 | int nPos = 0; 21 | while ((nPos = strSource.find(strOld, nPos)) != strSource.npos) 22 | { 23 | strSource.replace(nPos, strOld.length(), strNew); 24 | nPos += strNew.length(); 25 | } 26 | } 27 | 28 | int main(int argc, char **argv) 29 | { 30 | if (argc < 2) 31 | { 32 | errs() << "Usage: " << argv[0] << " \n"; 33 | return 1; 34 | } 35 | 36 | // Compile the source code into IR and Parse the input LLVM IR file into a module 37 | SMDiagnostic Err; 38 | LLVMContext Context; 39 | std::string cmd_str = "clang -O1 -emit-llvm -S " + std::string(argv[1]) + " -o top.bc"; 40 | print_cmd(cmd_str.c_str()); 41 | system(cmd_str.c_str()); 42 | 43 | std::unique_ptr Mod(parseIRFile("top.bc", Err, Context)); 44 | if (!Mod) 45 | { 46 | Err.print(argv[0], errs()); 47 | return 1; 48 | } 49 | 50 | // Create a pass manager and fill it with the passes we want to run. 51 | legacy::PassManager PM; 52 | HI_FindFunctions *hi_findfunctions = new HI_FindFunctions(); 53 | PM.add(hi_findfunctions); 54 | PM.run(*Mod); 55 | 56 | std::ifstream infile("top.bc"); 57 | std::string line; 58 | std::ofstream outfile("ttoopp.ll"); 59 | 60 | while (std::getline(infile, line)) 61 | { 62 | 63 | for (auto it = hi_findfunctions->Function_Demangle_Map.begin(); 64 | it != hi_findfunctions->Function_Demangle_Map.end(); it++) 65 | { 66 | if (line.find(it->first) != std::string::npos) 67 | { 68 | ReplaceAll(line, it->first, it->second); 69 | break; 70 | } 71 | } 72 | outfile << line << std::endl; 73 | } 74 | outfile.close(); 75 | infile.close(); 76 | 77 | return 0; 78 | } 79 | -------------------------------------------------------------------------------- /Tests/LLVM_exp12c_HI_AggressiveLSR_MUL/README: -------------------------------------------------------------------------------- 1 | The HI_AggressiveLSR_MUL pass is used to remove unnecessary multiplications 2 | which can be transformed into addition operations if they are actually related 3 | to loop induction variables. These multiplications are usally the array index 4 | variables. 5 | 6 | The test can be run with the following command: 7 | 8 | ./LLVM_expxxxxx 9 | 10 | 1. find the mul for array offset calculation 11 | 2. get the incremental value by using SCEV 12 | 3. insert a new PHI (carefully select the initial constant) 13 | 4. replace multiplication with addition 14 | 15 | 16 | example: 17 | 18 | -------------------------------------------------------------- 19 | from 20 | -------------------------------------------------------------- 21 | for.cond31.preheader: ; preds = %for.inc62, %for.cond31.preheader.preheader 22 | %HI.lsr.iv1new26 = phi i9 [ 0, %for.cond31.preheader.preheader ], [ %HI.lsr.iv.next2new49, %for.inc62 ] 23 | %HI.indvars.iv105new27 = phi i5 [ %HI.indvars.iv.next106new48, %for.inc62 ], [ 0, %for.cond31.preheader.preheader ] 24 | %bcast28 = zext i5 %HI.indvars.iv105new27 to i9 25 | %HI.new29 = mul i9 %bcast28, 24 26 | %HI.indvars.iv.next106new48 = add i5 %HI.indvars.iv105new27, 1 27 | %HI.lsr.iv.next2new49 = add i9 %HI.lsr.iv1new26, 18 28 | %HI.exitcond107new50 = icmp eq i5 %HI.indvars.iv.next106new48, -16 29 | br label %for.body33 30 | 31 | -------------------------------------------------------------- 32 | to 33 | -------------------------------------------------------------- 34 | for.cond31.preheader: ; preds = %for.inc62, %for.cond31.preheader.preheader 35 | %HI.lsr.iv1new26 = phi i9 [ 0, %for.cond31.preheader.preheader ], [ %HI.lsr.iv.next2new49, %for.inc62 ] 36 | %HI.indvars.iv105new27 = phi i5 [ %HI.indvars.iv.next106new48, %for.inc62 ], [ 0, %for.cond31.preheader.preheader ] 37 | %bcast28 = zext i5 %HI.indvars.iv105new27 to i9 38 | %HI.new29.PHI = phi i9 [ 0, %for.cond31.preheader.preheader ], [ %HI.new29.Add, %for.inc62 ] 39 | %HI.new29.Add = add i9 %HI.new29.PHI, 24 40 | %HI.indvars.iv.next106new48 = add i5 %HI.indvars.iv105new27, 1 41 | %HI.lsr.iv.next2new49 = add i9 %HI.lsr.iv1new26, 18 42 | %HI.exitcond107new50 = icmp eq i5 %HI.indvars.iv.next106new48, -16 43 | br label %for.body33 44 | 45 | --------------------------------------------------- 46 | -------------------------------------------------------------------------------- /Implementations/HI_LoopLabeler/HI_LoopLabeler.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "HI_LoopLabeler.h" 3 | 4 | // print the detailed information of the type 5 | void HI_LoopLabeler_Visitor::printTypeInfo(const clang::Type *T) 6 | { 7 | 8 | *parseLog << " TypeClassName: " << T->getTypeClassName() << " "; 9 | if (const BuiltinType *BiT = dyn_cast(T)) 10 | { 11 | *parseLog << " BuiltinType : " << BiT->getNameAsCString(PP()) << "\n"; 12 | } 13 | else if (const RecordType *RT = dyn_cast(T)) 14 | { 15 | // if (ST->isStructureType()) 16 | *parseLog << " RecordType : " << RT->getDecl()->getKindName() << "\n"; 17 | } 18 | else if (const TagType *TT = dyn_cast(T)) 19 | { 20 | // if (ST->isStructureType()) 21 | *parseLog << " TagType : " << TT->getDecl()->getKindName() << " at " 22 | << TT->getDecl()->getBeginLoc().printToString(CI.getSourceManager()) << " \n"; 23 | } 24 | else if (const TemplateSpecializationType *TST = dyn_cast(T)) 25 | { 26 | // if (ST->isStructureType()) 27 | *parseLog << " TemplateSpecializationType : "; 28 | TemplateName TN = TST->getTemplateName(); 29 | TN.print(*parseLog, PP()); 30 | *parseLog << " with args: "; 31 | auto Args = llvm::makeArrayRef(TST->getArgs(), TST->getNumArgs()); 32 | for (auto arg : Args) 33 | { 34 | arg.print(PP(), *parseLog); 35 | *parseLog << " "; 36 | } 37 | *parseLog << " \n"; 38 | } 39 | parseLog->flush(); 40 | } 41 | 42 | // check whether it is a template structure like XXXX 43 | bool HI_LoopLabeler_Visitor::isAPInt(VarDecl *VD) 44 | { 45 | const Type *T = VD->getType().getTypePtr(); 46 | if (const TemplateSpecializationType *TST = dyn_cast(T)) 47 | { 48 | return true; 49 | } 50 | return false; 51 | } 52 | 53 | // get tht template name 54 | std::string HI_LoopLabeler_Visitor::getAPIntName(VarDecl *VD) 55 | { 56 | const Type *T = VD->getType().getTypePtr(); 57 | if (const TemplateSpecializationType *TST = dyn_cast(T)) 58 | { 59 | std::string tmp = ""; 60 | llvm::raw_string_ostream APIntName(tmp); 61 | TST->getTemplateName().print(APIntName, PP()); 62 | return APIntName.str(); 63 | } 64 | return "NULL"; 65 | } -------------------------------------------------------------------------------- /Implementations/HI_APIntSrcAnalysis/HI_APIntSrcAnalysis.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "HI_APIntSrcAnalysis.h" 3 | 4 | // print the detailed information of the type 5 | void HI_APIntSrcAnalysis_Visitor::printTypeInfo(const Type *T) 6 | { 7 | 8 | *parseLog << " TypeClassName: " << T->getTypeClassName() << " "; 9 | if (const BuiltinType *BiT = dyn_cast(T)) 10 | { 11 | *parseLog << " BuiltinType : " << BiT->getNameAsCString(PP()) << "\n"; 12 | } 13 | else if (const RecordType *RT = dyn_cast(T)) 14 | { 15 | // if (ST->isStructureType()) 16 | *parseLog << " RecordType : " << RT->getDecl()->getKindName() << "\n"; 17 | } 18 | else if (const TagType *TT = dyn_cast(T)) 19 | { 20 | // if (ST->isStructureType()) 21 | *parseLog << " TagType : " << TT->getDecl()->getKindName() << " at " 22 | << TT->getDecl()->getBeginLoc().printToString(CI.getSourceManager()) << " \n"; 23 | } 24 | else if (const TemplateSpecializationType *TST = dyn_cast(T)) 25 | { 26 | // if (ST->isStructureType()) 27 | *parseLog << " TemplateSpecializationType : "; 28 | TemplateName TN = TST->getTemplateName(); 29 | TN.print(*parseLog, PP()); 30 | *parseLog << " with args: "; 31 | auto Args = llvm::makeArrayRef(TST->getArgs(), TST->getNumArgs()); 32 | for (auto arg : Args) 33 | { 34 | arg.print(PP(), *parseLog); 35 | *parseLog << " "; 36 | } 37 | *parseLog << " \n"; 38 | } 39 | parseLog->flush(); 40 | } 41 | 42 | // check whether it is a template structure like XXXX 43 | bool HI_APIntSrcAnalysis_Visitor::isAPInt(VarDecl *VD) 44 | { 45 | const Type *T = VD->getType().getTypePtr(); 46 | if (const TemplateSpecializationType *TST = dyn_cast(T)) 47 | { 48 | return true; 49 | } 50 | return false; 51 | } 52 | 53 | // get tht template name 54 | std::string HI_APIntSrcAnalysis_Visitor::getAPIntName(VarDecl *VD) 55 | { 56 | const Type *T = VD->getType().getTypePtr(); 57 | if (const TemplateSpecializationType *TST = dyn_cast(T)) 58 | { 59 | std::string tmp = ""; 60 | llvm::raw_string_ostream APIntName(tmp); 61 | TST->getTemplateName().print(APIntName, PP()); 62 | return APIntName.str(); 63 | } 64 | return "NULL"; 65 | } -------------------------------------------------------------------------------- /Implementations/HI_FunctionInterfaceInfo/HI_FunctionInterfaceInfo.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "HI_FunctionInterfaceInfo.h" 3 | 4 | // print the detailed information of the type 5 | void HI_FunctionInterfaceInfo_Visitor::printTypeInfo(const clang::Type *T) 6 | { 7 | 8 | *parseLog << " TypeClassName: " << T->getTypeClassName() << " "; 9 | if (const BuiltinType *BiT = dyn_cast(T)) 10 | { 11 | *parseLog << " BuiltinType : " << BiT->getNameAsCString(PP()) << "\n"; 12 | } 13 | else if (const RecordType *RT = dyn_cast(T)) 14 | { 15 | // if (ST->isStructureType()) 16 | *parseLog << " RecordType : " << RT->getDecl()->getKindName() << "\n"; 17 | } 18 | else if (const TagType *TT = dyn_cast(T)) 19 | { 20 | // if (ST->isStructureType()) 21 | *parseLog << " TagType : " << TT->getDecl()->getKindName() << " at " 22 | << TT->getDecl()->getBeginLoc().printToString(CI.getSourceManager()) << " \n"; 23 | } 24 | else if (const TemplateSpecializationType *TST = dyn_cast(T)) 25 | { 26 | // if (ST->isStructureType()) 27 | *parseLog << " TemplateSpecializationType : "; 28 | TemplateName TN = TST->getTemplateName(); 29 | TN.print(*parseLog, PP()); 30 | *parseLog << " with args: "; 31 | auto Args = llvm::makeArrayRef(TST->getArgs(), TST->getNumArgs()); 32 | for (auto arg : Args) 33 | { 34 | arg.print(PP(), *parseLog); 35 | *parseLog << " "; 36 | } 37 | *parseLog << " \n"; 38 | } 39 | parseLog->flush(); 40 | } 41 | 42 | // check whether it is a template structure like XXXX 43 | bool HI_FunctionInterfaceInfo_Visitor::isAPInt(VarDecl *VD) 44 | { 45 | const Type *T = VD->getType().getTypePtr(); 46 | if (const TemplateSpecializationType *TST = dyn_cast(T)) 47 | { 48 | return true; 49 | } 50 | return false; 51 | } 52 | 53 | // get tht template name 54 | std::string HI_FunctionInterfaceInfo_Visitor::getAPIntName(VarDecl *VD) 55 | { 56 | const Type *T = VD->getType().getTypePtr(); 57 | if (const TemplateSpecializationType *TST = dyn_cast(T)) 58 | { 59 | std::string tmp = ""; 60 | llvm::raw_string_ostream APIntName(tmp); 61 | TST->getTemplateName().print(APIntName, PP()); 62 | return APIntName.str(); 63 | } 64 | return "NULL"; 65 | } -------------------------------------------------------------------------------- /App/deriche/deriche_int.cc: -------------------------------------------------------------------------------- 1 | #define W 64 2 | #define H 64 3 | 4 | /* Main computational kernel. The whole function will be timed, 5 | including the call and return. */ 6 | /* Original code provided by Gael Deest */ 7 | 8 | void kernel_deriche(int w, int h, int alpha, int imgIn[W][H], int imgOut[W][H], int y1[W][H], 9 | int y2[W][H], int xm1, int tm1, int ym1, int ym2, int xp1, int xp2, int tp1, 10 | int tp2, int yp1, int yp2, 11 | 12 | int k, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int b1, 13 | int b2, int c1, int c2) 14 | { 15 | int i, j; 16 | 17 | for (i = 0; i < W; i++) 18 | { 19 | ym1 = (int)(0); 20 | ym2 = (int)(0); 21 | xm1 = (int)(0); 22 | for (j = 0; j < H; j++) 23 | { 24 | y1[i][j] = a1 * imgIn[i][j] + a2 * xm1 + b1 * ym1 + b2 * ym2; 25 | xm1 = imgIn[i][j]; 26 | ym2 = ym1; 27 | ym1 = y1[i][j]; 28 | } 29 | } 30 | 31 | for (i = 0; i < W; i++) 32 | { 33 | yp1 = (int)(0); 34 | yp2 = (int)(0); 35 | xp1 = (int)(0); 36 | xp2 = (int)(0); 37 | for (j = H - 1; j >= 0; j--) 38 | { 39 | y2[i][j] = a3 * xp1 + a4 * xp2 + b1 * yp1 + b2 * yp2; 40 | xp2 = xp1; 41 | xp1 = imgIn[i][j]; 42 | yp2 = yp1; 43 | yp1 = y2[i][j]; 44 | } 45 | } 46 | 47 | for (i = 0; i < W; i++) 48 | for (j = 0; j < H; j++) 49 | { 50 | imgOut[i][j] = c1 * (y1[i][j] + y2[i][j]); 51 | } 52 | 53 | for (j = 0; j < H; j++) 54 | { 55 | tm1 = (int)(0); 56 | ym1 = (int)(0); 57 | ym2 = (int)(0); 58 | for (i = 0; i < W; i++) 59 | { 60 | y1[i][j] = a5 * imgOut[i][j] + a6 * tm1 + b1 * ym1 + b2 * ym2; 61 | tm1 = imgOut[i][j]; 62 | ym2 = ym1; 63 | ym1 = y1[i][j]; 64 | } 65 | } 66 | 67 | for (j = 0; j < H; j++) 68 | { 69 | tp1 = (int)(0); 70 | tp2 = (int)(0); 71 | yp1 = (int)(0); 72 | yp2 = (int)(0); 73 | for (i = W - 1; i >= 0; i--) 74 | { 75 | y2[i][j] = a7 * tp1 + a8 * tp2 + b1 * yp1 + b2 * yp2; 76 | tp2 = tp1; 77 | tp1 = imgOut[i][j]; 78 | yp2 = yp1; 79 | yp1 = y2[i][j]; 80 | } 81 | } 82 | 83 | for (i = 0; i < W; i++) 84 | for (j = 0; j < H; j++) 85 | imgOut[i][j] = c2 * (y1[i][j] + y2[i][j]); 86 | } 87 | -------------------------------------------------------------------------------- /Tests/Light_HLS_Top/README: -------------------------------------------------------------------------------- 1 | Light-HLS is implemented as a light-weight HLS framework for academic exploration, proposed in the following paper: 2 | 3 | Hi-ClockFlow: Multi-Clock Dataflow Automation and Throughput Optimization in High-Level Synthesis. IEEE/ACM 2019 International Conference On Computer Aided Design (ICCAD) 4 | 5 | 6 | 7 | This example of Light-HLS is used to evaluate the timing and resource of the application, considering the effect of HLS directives. 8 | 9 | run with the command: 10 | 11 | ./Light_HLS_Top [DEBUG] 12 | 13 | 14 | Here, We mainly consider dataflow, array partitioning, loop pipelining and loop unrolling. 15 | 16 | Basic implementation idea: 17 | 18 | 0. Mapping some information between IR and source code, so we can determine the outermost size of the array in the parameters of function and set the directives of loops. 19 | 20 | 1. Array partitioning: Using SCEV, the pass can get the pattern of array accesses. In the analysis of array partitioning, we emulate the loop, and find which partitons the access will tough during the entire process of loop. Based on the access pattern, the map from accesses to partitions can be determined. According to the map and the port limitation of BRAMs, we can schedule the array accesses. Please note that we do not split arrays in IR but just record the accesses and their corresponding partitions for scheduling. We suppose that the array partitioning should be done at back-end (CodeGen) 21 | 22 | 2. Loop unrolling: we mimic the procedure of loop unrolling passes from LLVM and we transform the IR before we do the timing/resource evaluation. By doing so, we can exactly check which component among the unrolled iterations can be reused, e.g. address calculation. Moreover, there are some BRAM access optimization can be done to remove the redundant accesses to BRAM. Finally, after IR transformation, we can exactly check access patterns. All these situations cannot be analyzed by using mathematic models like the one proposed by COMBA. 23 | 24 | 3. Loop pipelining: since we cannot implement pipeline in the IR level, we use loop label to mark which loop should be pipelined. During the evaluation, the loop requested to be pipelined will be checked and the proper initial interval will be found. 25 | 26 | 4. Muxes will be inserted for array partitions. 27 | 28 | 29 | Moreover, many other optimizations are involved for this test: 30 | a) Mul2Shl 31 | b) MulOrderOpt 32 | c) RemoveRedundantAccess 33 | d) SwitchLower 34 | e) The program will be entirely flatten and re-grouped 35 | -------------------------------------------------------------------------------- /Tests/LLVM_Learner_Libs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(LLVM_Learner_Libs) 2 | 3 | set(Outputname "LLVM_Learner_Libs") 4 | 5 | cmake_minimum_required(VERSION 3.10) 6 | 7 | find_package(LLVM REQUIRED CONFIG) 8 | find_package(Polly) 9 | find_package(Clang) 10 | 11 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 12 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 13 | 14 | message(STATUS "Using PollyConfig.cmake in: ${Polly_CMAKE_DIR}") 15 | message(STATUS "Using ClangConfig.cmake in: ${CLANG_CMAKE_DIR}") 16 | # Needed to use support library 17 | 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -std=c++14 -g") 19 | 20 | include_directories(${Clang_INCLUDE_DIRS}) 21 | include_directories(${LLVM_INCLUDE_DIRS}) 22 | include_directories(${Polly_INCLUDE_DIRS}) 23 | 24 | include_directories(./ ../../Implementations/ 25 | ../../Implementations/HI_FindFunctions 26 | ../../Implementations/HI_print 27 | ../../Implementations/HI_LoopInFormationCollect 28 | ../../Implementations/HI_DependenceList 29 | ../../Implementations/HI_SysExec 30 | ../../Implementations/HI_Polly_Info 31 | ../../Implementations/HI_LoopDependenceAnalysis 32 | ../../Implementations/HI_SimpleTimingEvaluation 33 | ../../Implementations/HI_GEP_OffsetCombine 34 | ../../Implementations/HI_DuplicateInstRm 35 | ../../Implementations/HI_VarWidthReduce 36 | ../../Implementations/HI_SeparateConstOffsetFromGEP 37 | ../../Implementations/HI_APIntSrcAnalysis 38 | ../../Implementations/HI_NoDirectiveTimingResourceEvaluation 39 | ../../Implementations/HI_IntstructionMoveBackward 40 | ../../Implementations/HI_HLSDuplicateInstRm 41 | ../../Implementations/HI_AggressiveLSR_MUL 42 | ../../Implementations/HI_ArrayAccessPattern 43 | ../../Implementations/HI_WithDirectiveTimingResourceEvaluation 44 | ../../Implementations/HI_StringProcess 45 | ../../Implementations/HI_FunctionInstantiation 46 | ../../Implementations/HI_ReplaceSelectAccess 47 | ../../Implementations/HI_IR2SourceCode 48 | ../../Implementations/HI_LoopUnroll 49 | ../../Implementations/HI_RemoveRedundantAccess 50 | ../../Implementations/HI_LoopLabeler 51 | ../../Implementations/HI_FunctionInterfaceInfo 52 | ../../Implementations/HI_Mul2Shl 53 | ../../Implementations/HI_MulOrderOpt 54 | ../../Implementations/HI_MuxInsertionArrayPartition 55 | ../../Implementations/HI_DSE_WithDirectiveTimingResourceEvaluation 56 | ../../Implementations/HI_LoadALAP 57 | ../../Implementations/HI_PragmaTargetExtraction 58 | ../../Implementations/HI_ArraySensitiveToLoopLevel 59 | ../../Implementations/HI_TopLoop2Func 60 | ) 61 | 62 | add_subdirectory(../../Implementations ./building) 63 | 64 | -------------------------------------------------------------------------------- /Implementations/HI_Polly_Info/HI_Polly_Info.h: -------------------------------------------------------------------------------- 1 | #ifndef _HI_HI_POLLY_INFO 2 | #define _HI_HI_POLLY_INFO 3 | // related headers should be included. 4 | #include "HI_print.h" 5 | #include "polly/DependenceInfo.h" 6 | #include "polly/LinkAllPasses.h" 7 | #include "polly/Options.h" 8 | #include "polly/PolyhedralInfo.h" 9 | #include "polly/ScopInfo.h" 10 | #include "llvm/ADT/Statistic.h" 11 | #include "llvm/Analysis/LoopAccessAnalysis.h" 12 | #include "llvm/Analysis/LoopInfo.h" 13 | #include "llvm/Analysis/LoopPass.h" 14 | #include "llvm/Analysis/ScalarEvolution.h" 15 | #include "llvm/IR/Constants.h" 16 | #include "llvm/IR/Function.h" 17 | #include "llvm/IR/InstrTypes.h" 18 | #include "llvm/IR/Instructions.h" 19 | #include "llvm/IR/Intrinsics.h" 20 | #include "llvm/IR/LLVMContext.h" 21 | #include "llvm/IR/LegacyPassManager.h" 22 | #include "llvm/IR/Module.h" 23 | #include "llvm/IRReader/IRReader.h" 24 | #include "llvm/Pass.h" 25 | #include "llvm/PassAnalysisSupport.h" 26 | #include "llvm/Support/GraphWriter.h" 27 | #include "llvm/Support/SourceMgr.h" 28 | #include "llvm/Support/raw_ostream.h" 29 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 | #include "llvm/Transforms/Utils/Cloning.h" 31 | #include "llvm/Transforms/Utils/LoopUtils.h" 32 | #include "llvm/Transforms/Utils/LoopVersioning.h" 33 | #include "llvm/Transforms/Utils/ValueMapper.h" 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | using namespace llvm; 42 | 43 | class HI_Polly_Info : public FunctionPass 44 | { 45 | public: 46 | HI_Polly_Info(const char *Loop_out_file) : FunctionPass(ID) 47 | { 48 | Loop_Counter = 0; 49 | Loop_out = new raw_fd_ostream(Loop_out_file, ErrInfo, sys::fs::F_None); 50 | } // define a pass, which can be inherited from ModulePass, LoopPass, FunctionPass and etc. 51 | ~HI_Polly_Info() 52 | { 53 | Loop_out->flush(); 54 | delete Loop_out; 55 | } 56 | virtual bool doInitialization(Module &M) 57 | { 58 | print_status("Initilizing HI_Polly_Info pass."); 59 | return false; 60 | } 61 | void getAnalysisUsage(AnalysisUsage &AU) const; 62 | virtual bool runOnFunction(Function &F); 63 | 64 | static char ID; 65 | 66 | int Loop_Counter; 67 | 68 | std::map Loop_id; 69 | std::map *> Loop2Blocks; 70 | std::error_code ErrInfo; 71 | raw_ostream *Loop_out; 72 | 73 | /// Timer 74 | 75 | struct timeval tv_begin; 76 | struct timeval tv_end; 77 | }; 78 | #endif 79 | -------------------------------------------------------------------------------- /Tests/LLVM_exp3_loop_info_extraction/LLVM_exp3_loop_info_extraction.h: -------------------------------------------------------------------------------- 1 | #include "HI_DependenceList.h" 2 | #include "HI_FindFunction.h" 3 | #include "HI_LoopInFormationCollect.h" 4 | #include "HI_SysExec.h" 5 | #include "HI_print.h" 6 | #include "llvm/ADT/DepthFirstIterator.h" 7 | #include "llvm/ADT/ScopeExit.h" 8 | #include "llvm/ADT/SetOperations.h" 9 | #include "llvm/ADT/SetVector.h" 10 | #include "llvm/ADT/SmallVector.h" 11 | #include "llvm/ADT/Statistic.h" 12 | #include "llvm/Analysis/AliasAnalysis.h" 13 | #include "llvm/Analysis/AssumptionCache.h" 14 | #include "llvm/Analysis/BasicAliasAnalysis.h" 15 | #include "llvm/Analysis/DependenceAnalysis.h" 16 | #include "llvm/Analysis/GlobalsModRef.h" 17 | #include "llvm/Analysis/InstructionSimplify.h" 18 | #include "llvm/Analysis/LoopAnalysisManager.h" 19 | #include "llvm/Analysis/LoopInfo.h" 20 | #include "llvm/Analysis/LoopPass.h" 21 | #include "llvm/Analysis/MustExecute.h" 22 | #include "llvm/Analysis/ScalarEvolution.h" 23 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 | #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 | #include "llvm/Analysis/TargetTransformInfo.h" 27 | #include "llvm/Analysis/ValueTracking.h" 28 | #include "llvm/Bitcode/BitcodeWriter.h" 29 | #include "llvm/IR/CFG.h" 30 | #include "llvm/IR/Constants.h" 31 | #include "llvm/IR/DIBuilder.h" 32 | #include "llvm/IR/DataLayout.h" 33 | #include "llvm/IR/DomTreeUpdater.h" 34 | #include "llvm/IR/Dominators.h" 35 | #include "llvm/IR/Function.h" 36 | #include "llvm/IR/IRPrintingPasses.h" 37 | #include "llvm/IR/Instructions.h" 38 | #include "llvm/IR/IntrinsicInst.h" 39 | #include "llvm/IR/LLVMContext.h" 40 | #include "llvm/IR/LegacyPassManager.h" 41 | #include "llvm/IR/Module.h" 42 | #include "llvm/IR/PatternMatch.h" 43 | #include "llvm/IR/Type.h" 44 | #include "llvm/IR/ValueHandle.h" 45 | #include "llvm/IRReader/IRReader.h" 46 | #include "llvm/Pass.h" 47 | #include "llvm/Support/Debug.h" 48 | #include "llvm/Support/SourceMgr.h" 49 | #include "llvm/Support/raw_ostream.h" 50 | #include "llvm/Transforms/Scalar.h" 51 | #include "llvm/Transforms/Utils.h" 52 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 53 | #include "llvm/Transforms/Utils/Local.h" 54 | #include "llvm/Transforms/Utils/LoopSimplify.h" 55 | #include "llvm/Transforms/Utils/LoopUtils.h" 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | #include -------------------------------------------------------------------------------- /HLS_Data_Lib/dadd/dadd: -------------------------------------------------------------------------------- 1 | -1 -1 -1 3 3 1126 1104 15 3.69 1 DAddSub 2 | -1 -1 -1 3 3 1126 1104 15 3.69 1 DAddSub 3 | -1 -1 -1 4 3 1126 1104 15 3.69 1 DAddSub 4 | -1 -1 -1 4 3 1126 1104 15 3.69 1 DAddSub 5 | -1 -1 -1 5 3 979 1169 12 4.24 1 DAddSub 6 | -1 -1 -1 5 3 979 1169 12 4.24 1 DAddSub 7 | -1 -1 -1 6 3 754 1084 9 5.19 1 DAddSub 8 | -1 -1 -1 6 3 754 1084 9 5.19 1 DAddSub 9 | -1 -1 -1 7 3 686 1091 7 5.52 1 DAddSub 10 | -1 -1 -1 7 3 686 1091 7 5.52 1 DAddSub 11 | -1 -1 -1 8 3 686 1091 7 5.52 1 DAddSub 12 | -1 -1 -1 8 3 686 1091 7 5.52 1 DAddSub 13 | -1 -1 -1 9 3 686 1091 7 5.52 1 DAddSub 14 | -1 -1 -1 9 3 686 1091 7 5.52 1 DAddSub 15 | -1 -1 -1 10 3 509 1165 5 8.37 1 DAddSub 16 | -1 -1 -1 10 3 509 1165 5 8.37 1 DAddSub 17 | -1 -1 -1 12.5 3 445 1144 4 9.54 1 DAddSub 18 | -1 -1 -1 12.5 3 445 1144 4 9.54 1 DAddSub 19 | -1 -1 -1 15 3 445 1144 4 9.54 1 DAddSub 20 | -1 -1 -1 15 3 445 1144 4 9.54 1 DAddSub 21 | -1 -1 -1 16 3 432 1125 3 13.6 1 DAddSub 22 | -1 -1 -1 16 3 432 1125 3 13.6 1 DAddSub 23 | -1 -1 -1 17.5 3 432 1125 3 13.6 1 DAddSub 24 | -1 -1 -1 17.5 3 432 1125 3 13.6 1 DAddSub 25 | -1 -1 -1 20 3 432 1125 3 13.6 1 DAddSub 26 | -1 -1 -1 20 3 432 1125 3 13.6 1 DAddSub 27 | -1 -1 -1 25 3 432 1125 3 13.6 1 DAddSub 28 | -1 -1 -1 25 3 432 1125 3 13.6 1 DAddSub 29 | -1 -1 -1 30 3 342 1065 2 24.8 1 DAddSub 30 | -1 -1 -1 30 3 342 1065 2 24.8 1 DAddSub 31 | -1 -1 -1 3 3 2674 2119 15 3.64 1 DAddSub 32 | -1 -1 -1 3 3 2674 2119 15 3.64 1 DAddSub 33 | -1 -1 -1 4 3 2674 2119 15 3.64 1 DAddSub 34 | -1 -1 -1 4 3 2674 2119 15 3.64 1 DAddSub 35 | -1 -1 -1 5 3 1961 1637 13 4.36 1 DAddSub 36 | -1 -1 -1 5 3 1961 1637 13 4.36 1 DAddSub 37 | -1 -1 -1 6 3 904 1476 8 5.07 1 DAddSub 38 | -1 -1 -1 6 3 904 1476 8 5.07 1 DAddSub 39 | -1 -1 -1 7 3 891 1453 7 5.76 1 DAddSub 40 | -1 -1 -1 7 3 891 1453 7 5.76 1 DAddSub 41 | -1 -1 -1 8 3 878 1453 7 5.76 1 DAddSub 42 | -1 -1 -1 8 3 878 1453 7 5.76 1 DAddSub 43 | -1 -1 -1 9 3 821 1523 6 7.29 1 DAddSub 44 | -1 -1 -1 9 3 821 1523 6 7.29 1 DAddSub 45 | -1 -1 -1 10 3 636 1531 4 8.23 1 DAddSub 46 | -1 -1 -1 10 3 636 1531 4 8.23 1 DAddSub 47 | -1 -1 -1 12.5 3 636 1531 4 8.23 1 DAddSub 48 | -1 -1 -1 12.5 3 636 1531 4 8.23 1 DAddSub 49 | -1 -1 -1 15 3 636 1531 4 8.23 1 DAddSub 50 | -1 -1 -1 15 3 636 1531 4 8.23 1 DAddSub 51 | -1 -1 -1 16 3 636 1531 4 8.23 1 DAddSub 52 | -1 -1 -1 16 3 636 1531 4 8.23 1 DAddSub 53 | -1 -1 -1 17.5 3 621 1512 3 14.7 1 DAddSub 54 | -1 -1 -1 17.5 3 621 1512 3 14.7 1 DAddSub 55 | -1 -1 -1 20 3 621 1512 3 14.7 1 DAddSub 56 | -1 -1 -1 20 3 621 1512 3 14.7 1 DAddSub 57 | -1 -1 -1 25 3 558 1512 3 14.7 1 DAddSub 58 | -1 -1 -1 25 3 558 1512 3 14.7 1 DAddSub 59 | -1 -1 -1 30 3 533 1447 2 25.8 1 DAddSub 60 | -1 -1 -1 30 3 533 1447 2 25.8 1 DAddSub -------------------------------------------------------------------------------- /Tests/LLVM_exp4_polly_info/LLVM_exp4_polly_info.h: -------------------------------------------------------------------------------- 1 | #include "HI_DependenceList.h" 2 | #include "HI_FindFunction.h" 3 | #include "HI_LoopInFormationCollect.h" 4 | #include "HI_SysExec.h" 5 | #include "HI_print.h" 6 | #include "llvm/ADT/DepthFirstIterator.h" 7 | #include "llvm/ADT/ScopeExit.h" 8 | #include "llvm/ADT/SetOperations.h" 9 | #include "llvm/ADT/SetVector.h" 10 | #include "llvm/ADT/SmallVector.h" 11 | #include "llvm/ADT/Statistic.h" 12 | #include "llvm/Analysis/AliasAnalysis.h" 13 | #include "llvm/Analysis/AssumptionCache.h" 14 | #include "llvm/Analysis/BasicAliasAnalysis.h" 15 | #include "llvm/Analysis/DependenceAnalysis.h" 16 | #include "llvm/Analysis/GlobalsModRef.h" 17 | #include "llvm/Analysis/InstructionSimplify.h" 18 | #include "llvm/Analysis/LoopAnalysisManager.h" 19 | #include "llvm/Analysis/LoopInfo.h" 20 | #include "llvm/Analysis/LoopPass.h" 21 | #include "llvm/Analysis/MustExecute.h" 22 | #include "llvm/Analysis/ScalarEvolution.h" 23 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 24 | #include "llvm/Analysis/ScalarEvolutionExpander.h" 25 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 | #include "llvm/Analysis/TargetTransformInfo.h" 27 | #include "llvm/Analysis/ValueTracking.h" 28 | #include "llvm/Bitcode/BitcodeWriter.h" 29 | #include "llvm/IR/CFG.h" 30 | #include "llvm/IR/Constants.h" 31 | #include "llvm/IR/DIBuilder.h" 32 | #include "llvm/IR/DataLayout.h" 33 | #include "llvm/IR/DomTreeUpdater.h" 34 | #include "llvm/IR/Dominators.h" 35 | #include "llvm/IR/Function.h" 36 | #include "llvm/IR/IRPrintingPasses.h" 37 | #include "llvm/IR/Instructions.h" 38 | #include "llvm/IR/IntrinsicInst.h" 39 | #include "llvm/IR/LLVMContext.h" 40 | #include "llvm/IR/LegacyPassManager.h" 41 | #include "llvm/IR/Module.h" 42 | #include "llvm/IR/PatternMatch.h" 43 | #include "llvm/IR/Type.h" 44 | #include "llvm/IR/ValueHandle.h" 45 | #include "llvm/IRReader/IRReader.h" 46 | #include "llvm/Pass.h" 47 | #include "llvm/Support/Debug.h" 48 | #include "llvm/Support/SourceMgr.h" 49 | #include "llvm/Support/raw_ostream.h" 50 | #include "llvm/Transforms/Scalar.h" 51 | #include "llvm/Transforms/Utils.h" 52 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" 53 | #include "llvm/Transforms/Utils/Local.h" 54 | #include "llvm/Transforms/Utils/LoopSimplify.h" 55 | #include "llvm/Transforms/Utils/LoopUtils.h" 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include 62 | #include 63 | #include 64 | #include 65 | // #include "polly/LinkAllPasses.h" 66 | #include "HI_Polly_Info.h" 67 | #include "polly/PolyhedralInfo.h" -------------------------------------------------------------------------------- /App/deriche/deriche.cc: -------------------------------------------------------------------------------- 1 | #define W 64 2 | #define H 64 3 | 4 | /* Main computational kernel. The whole function will be timed, 5 | including the call and return. */ 6 | /* Original code provided by Gael Deest */ 7 | 8 | void kernel_deriche(int w, int h, float alpha, float imgIn[W][H], float imgOut[W][H], 9 | float y1[W][H], float y2[W][H], float xm1, float tm1, float ym1, float ym2, 10 | float xp1, float xp2, float tp1, float tp2, float yp1, float yp2, 11 | 12 | float k, float a1, float a2, float a3, float a4, float a5, float a6, float a7, 13 | float a8, float b1, float b2, float c1, float c2) 14 | { 15 | int i, j; 16 | 17 | for (i = 0; i < W; i++) 18 | { 19 | ym1 = (float)(0.0); 20 | ym2 = (float)(0.0); 21 | xm1 = (float)(0.0); 22 | for (j = 0; j < H; j++) 23 | { 24 | y1[i][j] = a1 * imgIn[i][j] + a2 * xm1 + b1 * ym1 + b2 * ym2; 25 | xm1 = imgIn[i][j]; 26 | ym2 = ym1; 27 | ym1 = y1[i][j]; 28 | } 29 | } 30 | 31 | for (i = 0; i < W; i++) 32 | { 33 | yp1 = (float)(0.0); 34 | yp2 = (float)(0.0); 35 | xp1 = (float)(0.0); 36 | xp2 = (float)(0.0); 37 | for (j = H - 1; j >= 0; j--) 38 | { 39 | y2[i][j] = a3 * xp1 + a4 * xp2 + b1 * yp1 + b2 * yp2; 40 | xp2 = xp1; 41 | xp1 = imgIn[i][j]; 42 | yp2 = yp1; 43 | yp1 = y2[i][j]; 44 | } 45 | } 46 | 47 | for (i = 0; i < W; i++) 48 | for (j = 0; j < H; j++) 49 | { 50 | imgOut[i][j] = c1 * (y1[i][j] + y2[i][j]); 51 | } 52 | 53 | for (j = 0; j < H; j++) 54 | { 55 | tm1 = (float)(0.0); 56 | ym1 = (float)(0.0); 57 | ym2 = (float)(0.0); 58 | for (i = 0; i < W; i++) 59 | { 60 | y1[i][j] = a5 * imgOut[i][j] + a6 * tm1 + b1 * ym1 + b2 * ym2; 61 | tm1 = imgOut[i][j]; 62 | ym2 = ym1; 63 | ym1 = y1[i][j]; 64 | } 65 | } 66 | 67 | for (j = 0; j < H; j++) 68 | { 69 | tp1 = (float)(0.0); 70 | tp2 = (float)(0.0); 71 | yp1 = (float)(0.0); 72 | yp2 = (float)(0.0); 73 | for (i = W - 1; i >= 0; i--) 74 | { 75 | y2[i][j] = a7 * tp1 + a8 * tp2 + b1 * yp1 + b2 * yp2; 76 | tp2 = tp1; 77 | tp1 = imgOut[i][j]; 78 | yp2 = yp1; 79 | yp1 = y2[i][j]; 80 | } 81 | } 82 | 83 | for (i = 0; i < W; i++) 84 | for (j = 0; j < H; j++) 85 | imgOut[i][j] = c2 * (y1[i][j] + y2[i][j]); 86 | } 87 | --------------------------------------------------------------------------------