├── .circleci └── config.yml ├── .clang-format ├── CHANGELOG ├── LICENSE.txt ├── MachSuite ├── LICENSE ├── Makefile ├── README.md ├── aes │ └── aes │ │ ├── Makefile │ │ ├── aes.c │ │ ├── aes.h │ │ ├── aes_dir │ │ ├── aes_test.c │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c ├── backprop │ └── backprop │ │ ├── Makefile │ │ ├── backprop.c │ │ ├── backprop.h │ │ ├── backprop_dir │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c ├── bfs │ ├── bulk │ │ ├── Makefile │ │ ├── bfs.c │ │ ├── bfs.h │ │ ├── bfs_dir │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ └── matspy.py │ └── queue │ │ ├── Makefile │ │ ├── bfs.c │ │ ├── bfs.h │ │ ├── bfs_dir │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c ├── common │ ├── Makefile.gem5 │ ├── Makefile.tracer │ ├── data_transfer_utils.h │ ├── harness.c │ ├── support.c │ ├── support.h │ └── test │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── input_sections │ │ └── test_support.c ├── fft │ ├── strided │ │ ├── Makefile │ │ ├── check.data │ │ ├── fft.c │ │ ├── fft.h │ │ ├── fft_dir │ │ ├── fft_test.c │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c │ └── transpose │ │ ├── Makefile │ │ ├── check.data │ │ ├── fft.c │ │ ├── fft.h │ │ ├── fft_dir │ │ ├── fft_test.c │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c ├── gemm │ ├── blocked │ │ ├── Makefile │ │ ├── check.data │ │ ├── gemm.c │ │ ├── gemm.h │ │ ├── gemm_dir │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c │ └── ncubed │ │ ├── Makefile │ │ ├── check.data │ │ ├── gemm.c │ │ ├── gemm.h │ │ ├── gemm_dir │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ └── local_support.c ├── kmp │ └── kmp │ │ ├── Makefile │ │ ├── TR.h │ │ ├── TR.txt │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── kmp.c │ │ ├── kmp.h │ │ ├── kmp_dir │ │ ├── kmp_test.c │ │ └── local_support.c ├── md │ ├── grid │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── md.c │ │ └── md.h │ └── knn │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── knn_dir │ │ ├── local_support.c │ │ ├── md.c │ │ ├── md.h │ │ ├── md_kernel_test.c │ │ └── point_stats.py ├── nw │ └── nw │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── nw.c │ │ ├── nw.h │ │ ├── nw_dir │ │ └── nw_test.c ├── script │ ├── config.py │ ├── llvm_compile.py │ └── run_aladdin.py ├── sort │ ├── merge │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── sort.c │ │ ├── sort.h │ │ └── sort_dir │ └── radix │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── inline_dir │ │ ├── input.data │ │ ├── local_support.c │ │ ├── sort.c │ │ ├── sort.h │ │ └── sort_dir ├── spmv │ ├── crs │ │ ├── 494_bus.mtx │ │ ├── 494_bus_full.mtx │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── spmv.c │ │ ├── spmv.h │ │ ├── spmv_dir │ │ ├── spmv_test.c │ │ └── symmetry.py │ └── ellpack │ │ ├── 494_bus.mtx │ │ ├── 494_bus_full.mtx │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── spmv.c │ │ ├── spmv.h │ │ ├── spmv_dir │ │ └── spmv_test.c ├── stencil │ ├── stencil2d │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── stencil.c │ │ ├── stencil.h │ │ └── stencil_dir │ └── stencil3d │ │ ├── Makefile │ │ ├── check.data │ │ ├── generate.c │ │ ├── hls.tcl │ │ ├── input.data │ │ ├── local_support.c │ │ ├── stencil.c │ │ ├── stencil.h │ │ └── stencil_dir ├── templates │ ├── Makefile │ ├── Makefile_template │ ├── generate_template.c │ ├── harness_interface_template.h │ └── support.c ├── v2-status.md └── viterbi │ └── viterbi │ ├── Makefile │ ├── check.data │ ├── generate.c │ ├── hls.tcl │ ├── input.data │ ├── local_support.c │ ├── viterbi.c │ ├── viterbi.h │ ├── viterbi_dir │ └── viterbi_test.c ├── README.md ├── SHOC ├── Makefile ├── bb_gemm │ ├── Makefile │ ├── bb_gemm.c │ └── bb_gemm.h ├── common │ ├── Makefile.common │ ├── Makefile.gem5 │ └── Makefile.tracer ├── fft │ ├── Makefile │ ├── fft.c │ └── fft.h ├── md │ ├── Makefile │ ├── md.c │ └── md.h ├── pp_scan │ ├── Makefile │ ├── pp_scan.c │ └── pp_scan.h ├── reduction │ ├── Makefile │ ├── reduction.c │ └── reduction.h ├── ss_sort │ ├── Makefile │ ├── ss_sort.c │ └── ss_sort.h ├── stencil │ ├── Makefile │ ├── stencil.c │ └── stencil.h └── triad │ ├── Makefile │ ├── example │ └── config_example │ ├── triad.c │ └── triad.h ├── common ├── AladdinExceptions.cpp ├── AladdinExceptions.h ├── BaseDatapath.cpp ├── BaseDatapath.h ├── DDDG.cpp ├── DDDG.h ├── DatabaseConfig.cpp ├── DatabaseConfig.h ├── DatabaseDeps.h ├── DynamicEntity.h ├── ExecNode.h ├── LogicalArray.cpp ├── LogicalArray.h ├── LoopInfo.cpp ├── LoopInfo.h ├── Makefile ├── MemoryType.h ├── Partition.cpp ├── Partition.h ├── Program.cpp ├── Program.h ├── ProgressTracker.h ├── ReadyPartition.cpp ├── ReadyPartition.h ├── Registers.cpp ├── Registers.h ├── Scratchpad.cpp ├── Scratchpad.h ├── ScratchpadDatapath.cpp ├── ScratchpadDatapath.h ├── SourceEntity.h ├── SourceManager.cpp ├── SourceManager.h ├── aladdin.cpp ├── cacti-p │ ├── README │ ├── Ucache.cc │ ├── Ucache.h │ ├── arbiter.cc │ ├── arbiter.h │ ├── area.cc │ ├── area.h │ ├── bank.cc │ ├── bank.h │ ├── basic_circuit.cc │ ├── basic_circuit.h │ ├── cache.cfg │ ├── cacti.mk │ ├── cacti_interface.cc │ ├── cacti_interface.h │ ├── component.cc │ ├── component.h │ ├── const.h │ ├── crossbar.cc │ ├── crossbar.h │ ├── decoder.cc │ ├── decoder.h │ ├── htree2.cc │ ├── htree2.h │ ├── io.cc │ ├── io.h │ ├── main.cc │ ├── makefile │ ├── mat.cc │ ├── mat.h │ ├── nuca.cc │ ├── nuca.h │ ├── parameter.cc │ ├── parameter.h │ ├── powergating.cc │ ├── powergating.h │ ├── router.cc │ ├── router.h │ ├── subarray.cc │ ├── subarray.h │ ├── technology.cc │ ├── uca.cc │ ├── uca.h │ ├── version_cacti.h │ ├── wire.cc │ └── wire.h ├── debugger │ ├── debugger.cpp │ ├── debugger_commands.cpp │ ├── debugger_commands.h │ ├── debugger_graph.h │ ├── debugger_print.cpp │ ├── debugger_print.h │ ├── debugger_prompt.cpp │ └── debugger_prompt.h ├── file_func.cpp ├── file_func.h ├── graph_opts │ ├── all_graph_opts.h │ ├── base_address_init.cpp │ ├── base_address_init.h │ ├── base_opt.cpp │ ├── base_opt.h │ ├── consecutive_branch_fusion.cpp │ ├── consecutive_branch_fusion.h │ ├── dma_base_address_init.cpp │ ├── dma_base_address_init.h │ ├── global_loop_pipelining.cpp │ ├── global_loop_pipelining.h │ ├── induction_dependence_removal.cpp │ ├── induction_dependence_removal.h │ ├── load_buffering.cpp │ ├── load_buffering.h │ ├── loop_unrolling.cpp │ ├── loop_unrolling.h │ ├── memory_ambiguation.cpp │ ├── memory_ambiguation.h │ ├── per_loop_pipelining.cpp │ ├── per_loop_pipelining.h │ ├── phi_node_removal.cpp │ ├── phi_node_removal.h │ ├── reg_load_store_fusion.cpp │ ├── reg_load_store_fusion.h │ ├── repeated_store_removal.cpp │ ├── repeated_store_removal.h │ ├── store_buffering.cpp │ ├── store_buffering.h │ ├── tree_height_reduction.cpp │ └── tree_height_reduction.h ├── opcode_func.cpp ├── opcode_func.h ├── opcode_func_llvm34.h ├── opcode_func_llvm60.h ├── power_func.cpp ├── power_func.h ├── typedefs.h └── user_config.h ├── gem5 ├── Gem5Datapath.h ├── HybridDatapath.cpp ├── HybridDatapath.h ├── HybridDatapath.py ├── Makefile ├── MemoryQueue.h ├── SConscript ├── aladdin_sys_connection.cpp ├── aladdin_sys_connection.h ├── aladdin_sys_constants.cpp ├── aladdin_sys_constants.h ├── aladdin_tlb.cc ├── aladdin_tlb.hh ├── dma_interface.c ├── dma_interface.h ├── gem5_harness.h ├── invoke_aladdin.c ├── pybind_aladdin.cpp ├── sampling_interface.c └── sampling_interface.h ├── integration-test ├── common │ ├── Makefile.gem5 │ ├── Makefile.tracer │ ├── conftest.py │ ├── gem5_aladdin_test.py │ ├── machsuite_cache.xe │ ├── machsuite_dma.xe │ ├── run_cpu_tests.py │ ├── run_ruby_tests.py │ ├── run_sweep_tests.py │ ├── test_cacti_cache.cfg │ ├── test_cacti_lq.cfg │ ├── test_cacti_sq.cfg │ └── test_cacti_tlb.cfg └── with-cpu │ ├── test_acp │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_acp │ ├── test_acp.c │ └── test_acp.cfg │ ├── test_aes │ ├── Makefile │ ├── aes │ ├── check.data │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── input.data │ ├── run.sh │ └── test_aes.cfg │ ├── test_array_func_arg │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_array_func_arg │ ├── test_array_func_arg.c │ └── test_array_func_arg.cfg │ ├── test_array_indexing │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_array_indexing │ ├── test_array_indexing.c │ └── test_array_indexing.cfg │ ├── test_command_queue │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_command_queue │ ├── test_command_queue.c │ └── test_command_queue.cfg │ ├── test_dma_load_store │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_dma_load_store │ ├── test_dma_load_store.c │ └── test_dma_load_store.cfg │ ├── test_dma_store_order │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_dma_store_order │ ├── test_dma_store_order.c │ └── test_dma_store_order.cfg │ ├── test_double_buffering │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_double_buffering │ ├── test_double_buffering.c │ └── test_double_buffering.cfg │ ├── test_host_load_store │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_host_load_store │ ├── test_host_load_store.c │ └── test_host_load_store.cfg │ ├── test_hybrid │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_aes_hybrid.cfg │ ├── test_hybrid │ ├── test_hybrid.c │ └── test_hybrid.cfg │ ├── test_hybrid_simd │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_hybrid_simd │ ├── test_hybrid_simd.c │ └── test_hybrid_simd.cfg │ ├── test_load_store │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_load_store │ ├── test_load_store.c │ └── test_load_store.cfg │ ├── test_loop_sampling │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_loop_sampling │ ├── test_loop_sampling.c │ └── test_loop_sampling.cfg │ ├── test_mmap │ ├── Makefile │ ├── run.sh │ ├── test_mmap │ └── test_mmap.c │ ├── test_multiple_accelerators │ ├── Makefile │ ├── dynamic_trace_acc0.gz │ ├── dynamic_trace_acc1.gz │ ├── dynamic_trace_acc2.gz │ ├── dynamic_trace_acc3.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_multiple_accelerators │ ├── test_multiple_accelerators.c │ └── test_multiple_accelerators.cfg │ ├── test_multiple_invocations │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_multiple_invocations │ ├── test_multiple_invocations.c │ └── test_multiple_invocations.cfg │ └── test_streaming_dma │ ├── Makefile │ ├── dynamic_trace.gz │ ├── gem5.cfg │ ├── run.sh │ ├── test_streaming_dma │ ├── test_streaming_dma.c │ └── test_streaming_dma.cfg └── unit-test ├── Makefile ├── catch.hpp ├── catch_common.cpp ├── inputs ├── aes-aes-trace.gz ├── config-aes-aes ├── config-loop-sampling ├── config-loop-sampling-inner ├── config-loop-sampling-inner-pipelining ├── config-loop-sampling-pipelined ├── config-loop-sampling-unrolling ├── config-memory-ambiguation ├── config-pp_scan-p4-u4-P1 ├── config-reduction-p4-u4-P1 ├── config-reg-ls-fusion ├── config-sort-radix ├── config-special-math-op ├── config-store-buffer ├── config-triad-dma-p2-u2-P1 ├── config-triad-initbase-p1-u1-P1 ├── config-triad-p2-u2-P1 ├── double_buffering.cfg ├── double_buffering_trace.gz ├── loop-sampling-inner-loops-ref-trace.gz ├── loop-sampling-inner-loops-trace.gz ├── loop-sampling-inner-pipelined-ref-trace.gz ├── loop-sampling-inner-pipelined-trace.gz ├── loop-sampling-multiple-invoc-ref-trace.gz ├── loop-sampling-multiple-invoc-trace.gz ├── loop-sampling-multiple-loops-ref-trace.gz ├── loop-sampling-multiple-loops-trace.gz ├── loop-sampling-nested-ref-trace.gz ├── loop-sampling-nested-trace.gz ├── loop-sampling-pipelined-flattened-ref-trace.gz ├── loop-sampling-pipelined-flattened-trace.gz ├── loop-sampling-single-loop-ref-trace.gz ├── loop-sampling-single-loop-trace.gz ├── loop-sampling-single-pipelined-ref-trace.gz ├── loop-sampling-single-pipelined-trace.gz ├── loop-sampling-unrolling-trace.gz ├── memory_ambiguation_trace.gz ├── pp_scan-128-trace.gz ├── reduction-128-trace.gz ├── reg-ls-fusion-trace.gz ├── sort-radix-trace.gz ├── special-math-op-trace.gz ├── store_buffer.gz ├── triad-128-trace.gz ├── triad-dma-trace.gz └── triad-initbase-trace.gz ├── node_matchers.h ├── test_dddg_generation.cpp ├── test_dma.cpp ├── test_dynamic_method_name.cpp ├── test_init_base_address.cpp ├── test_loop_flatten.cpp ├── test_loop_pipelining.cpp ├── test_loop_sampling.cpp ├── test_loop_unrolling.cpp ├── test_memory_ambiguation.cpp ├── test_performance.cpp ├── test_reg_load_store_fusion.cpp ├── test_rm_induction_var.cpp ├── test_rm_phi.cpp ├── test_special_math_op.cpp ├── test_spm_part.cpp ├── test_srcs ├── Makefile.tracer ├── aes ├── double_buffering ├── memory_ambiguation │ ├── Makefile │ └── memory_ambiguation.c ├── pp_scan ├── reduction ├── reg_load_store_fusion │ ├── Makefile │ └── reg_load_store_fusion.c ├── store_buffer │ ├── Makefile │ └── store_buffer.c └── triad ├── test_store_buffer.cpp └── test_tree_height_reduction.cpp /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | docker: circleci/docker@1.3.0 4 | jobs: 5 | build: 6 | docker: 7 | - image: xyzsam/smaug:latest 8 | environment: 9 | ALADDIN_HOME: /root/project 10 | steps: 11 | - checkout 12 | - run: 13 | name: Build 14 | command: | 15 | cd common 16 | make all -j2 17 | cd ../unit-test 18 | make all -j4 19 | - run: 20 | name: Run unit tests 21 | environment: 22 | TEST_REPORT_DIR: ~/aladdin-junit-report-dir 23 | command: | 24 | cd unit-test 25 | make junit_test 26 | - store_test_results: 27 | path: ~/aladdin-junit-report-dir 28 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | # BasedOnStyle: LLVM 3 | AccessModifierOffset: -1 4 | ConstructorInitializerIndentWidth: 4 5 | AlignEscapedNewlinesLeft: false 6 | AlignTrailingComments: true 7 | AllowAllParametersOfDeclarationOnNextLine: true 8 | AllowShortIfStatementsOnASingleLine: false 9 | AllowShortLoopsOnASingleLine: false 10 | AlwaysBreakTemplateDeclarations: false 11 | AlwaysBreakBeforeMultilineStrings: false 12 | BreakBeforeBinaryOperators: false 13 | BreakBeforeTernaryOperators: true 14 | BreakConstructorInitializersBeforeComma: false 15 | BinPackParameters: false 16 | ColumnLimit: 80 17 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 18 | DerivePointerBinding: false 19 | ExperimentalAutoDetectBinPacking: true 20 | IndentCaseLabels: true 21 | MaxEmptyLinesToKeep: 1 22 | NamespaceIndentation: None 23 | ObjCSpaceBeforeProtocolList: true 24 | PenaltyBreakBeforeFirstCallParameter: 0 25 | PenaltyBreakComment: 1000 26 | PenaltyBreakString: 1000 27 | PenaltyBreakFirstLessLess: 120 28 | PenaltyExcessCharacter: 1000000 29 | PenaltyReturnTypeOnItsOwnLine: 10000 30 | PointerBindsToType: true 31 | SpacesBeforeTrailingComments: 2 32 | Cpp11BracedListStyle: false 33 | Standard: Cpp11 34 | IndentWidth: 2 35 | TabWidth: 2 36 | UseTab: Never 37 | BreakBeforeBraces: Attach 38 | IndentFunctionDeclarationAfterType: true 39 | SpacesInParentheses: false 40 | SpacesInAngles: false 41 | SpaceInEmptyParentheses: false 42 | SpacesInCStyleCastParentheses: false 43 | SpaceAfterControlStatementKeyword: true 44 | SpaceBeforeAssignmentOperators: true 45 | ContinuationIndentWidth: 4 46 | ... 47 | -------------------------------------------------------------------------------- /MachSuite/README.md: -------------------------------------------------------------------------------- 1 | # MachSuite 2 | 3 | [![build status](https://travis-ci.org/breagen/MachSuite.svg?branch=master)](https://travis-ci.org/breagen/MachSuite) 4 | 5 | MachSuite is a benchmark suite intended for accelerator-centric research. 6 | 7 | There is a Makefile in the top direcrory as well as one within each benchmark 8 | subdirectory. 9 | 10 | We suggest running the benchmarks locally (from their own directory) for now. 11 | 12 | Also, our validation approach does is not portable across machines. 13 | For now, the final check to see if the output is correct is not performed. 14 | We are working on fixing it. However, this should not change the computation 15 | or behavior of the benchmarks at all. 16 | 17 | 18 | ## Licensing 19 | 20 | All code is open source (BSD-compatible) and free to use and distribute. Please 21 | look in the LICENSE file for details. 22 | 23 | ## Citing 24 | 25 | If you use the code, we would appreciate it if you cite the following paper: 26 | 27 | > Brandon Reagen, Robert Adolf, Sophia Yakun Shao, Gu-Yeon Wei, and David Brooks. 28 | > *"MachSuite: Benchmarks for Accelerator Design and Customized Architectures."* 29 | 2014 IEEE International Symposium on Workload Characterization. 30 | 31 | For any questions/concerns, please email [reagen@fas.harvard.edu](reagen@fas.harvard.edu) 32 | 33 | Enjoy!! 34 | -------------------------------------------------------------------------------- /MachSuite/aes/aes/Makefile: -------------------------------------------------------------------------------- 1 | KERN=aes 2 | ALG=aes 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | # For the tracer makefile 26 | ACCEL_NAME = aes_aes 27 | TEST_BIN = $(ACCEL_NAME) 28 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 29 | ifndef WORKLOAD 30 | export WORKLOAD=aes256_encrypt_ecb 31 | endif 32 | include ../../common/Makefile.tracer 33 | include ../../common/Makefile.gem5 34 | -------------------------------------------------------------------------------- /MachSuite/aes/aes/aes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Byte-oriented AES-256 implementation. 3 | * All lookup tables replaced with 'on the fly' calculations. 4 | */ 5 | #include "support.h" 6 | 7 | typedef struct { 8 | uint8_t key[32]; 9 | uint8_t enckey[32]; 10 | uint8_t deckey[32]; 11 | } aes256_context; 12 | 13 | void aes256_encrypt_ecb( 14 | aes256_context *host_ctx, uint8_t* host_k, uint8_t* host_buf, 15 | aes256_context *ctx, uint8_t* k, uint8_t* buf); 16 | 17 | //////////////////////////////////////////////////////////////////////////////// 18 | // Test harness interface code. 19 | 20 | struct bench_args_t { 21 | aes256_context ctx; 22 | uint8_t k[32]; 23 | uint8_t buf[16]; 24 | }; 25 | 26 | -------------------------------------------------------------------------------- /MachSuite/aes/aes/check.data: -------------------------------------------------------------------------------- 1 | %% 2 | 142 3 | 162 4 | 183 5 | 202 6 | 81 7 | 103 8 | 69 9 | 191 10 | 234 11 | 252 12 | 73 13 | 144 14 | 75 15 | 73 16 | 96 17 | 137 18 | -------------------------------------------------------------------------------- /MachSuite/aes/aes/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "aes.h" 11 | 12 | int main(int argc, char **argv) { 13 | struct bench_args_t data; 14 | uint8_t initial_contents[16] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; 15 | int i, fd; 16 | 17 | // Fill data structure 18 | for(i=0; i<32; i++) 19 | data.k[i] = i; 20 | memcpy(data.buf, initial_contents, 16); 21 | 22 | // Open and write 23 | fd = open("input.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); 24 | assert( fd>0 && "Couldn't open input data file" ); 25 | data_to_input(fd, &data); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /MachSuite/aes/aes/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project aes_syn 2 | 3 | add_files aes.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | #add_files -tb aes_test.c 9 | 10 | set_top aes256_encrypt_ecb 11 | 12 | open_solution -reset solution 13 | set_part virtex7 14 | create_clock -period 10 15 | source ./aes_dir 16 | #config_rtl -reset all -reset_level low 17 | csynth_design 18 | cosim_design -rtl verilog -tool modelsim -trace_level all 19 | 20 | exit 21 | -------------------------------------------------------------------------------- /MachSuite/aes/aes/input.data: -------------------------------------------------------------------------------- 1 | %% 2 | 0 3 | 1 4 | 2 5 | 3 6 | 4 7 | 5 8 | 6 9 | 7 10 | 8 11 | 9 12 | 10 13 | 11 14 | 12 15 | 13 16 | 14 17 | 15 18 | 16 19 | 17 20 | 18 21 | 19 22 | 20 23 | 21 24 | 22 25 | 23 26 | 24 27 | 25 28 | 26 29 | 27 30 | 28 31 | 29 32 | 30 33 | 31 34 | %% 35 | 0 36 | 17 37 | 34 38 | 51 39 | 68 40 | 85 41 | 102 42 | 119 43 | 136 44 | 153 45 | 170 46 | 187 47 | 204 48 | 221 49 | 238 50 | 255 51 | -------------------------------------------------------------------------------- /MachSuite/backprop/backprop/Makefile: -------------------------------------------------------------------------------- 1 | KERN=backprop 2 | ALG=backprop 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c -lm 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c -lm 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | -------------------------------------------------------------------------------- /MachSuite/backprop/backprop/backprop.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../../common/support.h" 5 | 6 | // Fixed parameters 7 | #define input_dimension 13 8 | #define possible_outputs 3 9 | #define training_sets 163 10 | #define nodes_per_layer 64 11 | #define layers 2 12 | #define learning_rate 0.01 13 | #define epochs 1 14 | #define test_sets 15 15 | #define norm_param 0.005 16 | 17 | #define max 1.0 18 | #define offset 0.5 19 | 20 | //Data Bounds 21 | #define TYPE double 22 | #define MAX 1000 23 | #define MIN 1 24 | 25 | void backprop( 26 | TYPE weights1[input_dimension*nodes_per_layer], 27 | TYPE weights2[nodes_per_layer*nodes_per_layer], 28 | TYPE weights3[nodes_per_layer*possible_outputs], 29 | TYPE biases1[nodes_per_layer], 30 | TYPE biases2[nodes_per_layer], 31 | TYPE biases3[possible_outputs], 32 | TYPE training_data[training_sets*input_dimension], 33 | TYPE training_targets[training_sets*possible_outputs]); 34 | //////////////////////////////////////////////////////////////////////////////// 35 | // Test harness interface code. 36 | 37 | struct bench_args_t { 38 | TYPE weights1[input_dimension*nodes_per_layer]; 39 | TYPE weights2[nodes_per_layer*nodes_per_layer]; 40 | TYPE weights3[nodes_per_layer*possible_outputs]; 41 | TYPE biases1[nodes_per_layer]; 42 | TYPE biases2[nodes_per_layer]; 43 | TYPE biases3[possible_outputs]; 44 | TYPE training_data[training_sets*input_dimension]; 45 | TYPE training_targets[training_sets*possible_outputs]; 46 | }; 47 | -------------------------------------------------------------------------------- /MachSuite/backprop/backprop/backprop_dir: -------------------------------------------------------------------------------- 1 | #mem 2 | #set_directive_resource -core RAM_1P_BRAM "backprop" weights 3 | #set_directive_resource -core RAM_1P_BRAM "backprop" inputs 4 | #set_directive_resource -core RAM_1P_BRAM "backprop" targets 5 | 6 | #partitioning 7 | #set_directive_array_partition -factor 64 -type cyclic backprop weights 8 | #set_directive_array_partition -factor 64 -type cyclic backprop inputs 9 | #set_directive_array_partition -factor 64 -type cyclic backprop targets 10 | 11 | #unrolling 12 | #set_directive_unroll -factor 8 update_layer/ul_1 13 | #set_directive_unroll -factor 8 propagate_error_layer/pel_1 14 | 15 | #pipeline 16 | #set_directive_pipeline update_layer/ul_1 17 | #set_directive_pipeline propagate_error_layer/pel_1 18 | 19 | #resources 20 | #set_directive_resource -core Mul "update_weights" change 21 | -------------------------------------------------------------------------------- /MachSuite/backprop/backprop/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project backprop_syn 2 | 3 | add_files backprop.c 4 | add_files input.data 5 | add_files check.data 6 | add_files local_support.c 7 | 8 | add_files -tb ../../common/support.c 9 | add_files -tb ../../common/support.h 10 | add_files -tb ../../common/harness.c 11 | 12 | 13 | set_top backprop 14 | open_solution -reset solution 15 | 16 | set_part virtex7 17 | create_clock -period 10 18 | #source ./stencil_dir 19 | 20 | csim_design 21 | 22 | csynth_design 23 | cosim_design -rtl verilog -tool modelsim -trace_level all 24 | 25 | exit 26 | -------------------------------------------------------------------------------- /MachSuite/bfs/bulk/Makefile: -------------------------------------------------------------------------------- 1 | KERN=bfs 2 | ALG=bulk 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = bfs_bulk 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=bfs 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/bfs/bulk/bfs_dir: -------------------------------------------------------------------------------- 1 | #select functional units you want 2 | #none.. 3 | 4 | #select memory resources 5 | set_directive_resource -core RAM_1P_BRAM "bfs" nodes 6 | set_directive_resource -core RAM_1P_BRAM "bfs" edges 7 | set_directive_resource -core RAM_1P_BRAM "bfs" level 8 | set_directive_resource -core RAM_1P_BRAM "bfs" level_counts 9 | 10 | #loop pipelining factors 11 | #set_directive_pipeline bfs/init_horizions 12 | #set_directive_pipeline bfs/init_levels 13 | #set_directive_pipeline bfs/loop_horizons 14 | #set_directive_pipeline bfs/loop_nodes 15 | set_directive_pipeline bfs/loop_neighbors 16 | 17 | #set_directive_unroll -factor 2 bfs/init_horizions 18 | #set_directive_unroll -factor 2 bfs/init_levels 19 | #set_directive_unroll -factor 2 bfs/loop_horizons 20 | #set_directive_unroll -factor 2 bfs/loop_nodes 21 | #set_directive_unroll -factor 2 bfs/loop_neighbors 22 | 23 | #Array partitioning 24 | #set_directive_array_partition -factor 2 -type cyclic "bfs" nodes 25 | #set_directive_array_partition -factor 2 -type cyclic "bfs" edges 26 | #set_directive_array_partition -factor 2 -type cyclic "bfs" levels 27 | #set_directive_array_partition -factor 2 -type cyclic "bfs" level_counts 28 | -------------------------------------------------------------------------------- /MachSuite/bfs/bulk/check.data: -------------------------------------------------------------------------------- 1 | %% 2 | 1 3 | 26 4 | 184 5 | 22 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | -------------------------------------------------------------------------------- /MachSuite/bfs/bulk/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project bfs_syn 2 | 3 | add_files bfs.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top bfs 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | #source ./bfs_dir 14 | #config_rtl -reset all -reset_level low 15 | csynth_design 16 | cosim_design -rtl verilog -tool modelsim -trace_level all 17 | 18 | exit 19 | -------------------------------------------------------------------------------- /MachSuite/bfs/bulk/matspy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from mat import mat, mat2 6 | 7 | #(fig,ax)=plt.subplots() 8 | plt.imshow(mat); 9 | plt.savefig('mat.png') 10 | plt.imshow(mat2); 11 | plt.savefig('mat2.png') 12 | 13 | print sum(map(sum,mat)) 14 | print sum(map(sum,mat2)) 15 | 16 | #d=dict(zip(list('ABCD'),[0,0,0,0])) 17 | #for c in mat: 18 | # d[c] += 1 19 | #s=sum(d.values()) 20 | #for k in d: 21 | # print 100*d[k]/s 22 | -------------------------------------------------------------------------------- /MachSuite/bfs/queue/Makefile: -------------------------------------------------------------------------------- 1 | KERN=bfs 2 | ALG=queue 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = bfs_queue 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=bfs 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/bfs/queue/bfs_dir: -------------------------------------------------------------------------------- 1 | #select functional units you want 2 | #none.. 3 | 4 | #select memory resources 5 | set_directive_resource -core RAM_1P_BRAM "bfs" nodes 6 | set_directive_resource -core RAM_1P_BRAM "bfs" edges 7 | set_directive_resource -core RAM_1P_BRAM "bfs" level 8 | set_directive_resource -core RAM_1P_BRAM "bfs" level_counts 9 | 10 | #loop pipelining factors 11 | #set_directive_pipeline bfs/init_horizions 12 | #set_directive_pipeline bfs/init_levels 13 | #set_directive_pipeline bfs/loop_queue 14 | set_directive_pipeline bfs/loop_neighbors 15 | 16 | #set_directive_unroll -factor 2 bfs/init_horizions 17 | #set_directive_unroll -factor 2 bfs/init_levels 18 | #set_directive_unroll -factor 2 bfs/loop_queue 19 | #set_directive_unroll -factor 2 bfs/loop_neighbors 20 | 21 | #Array partitioning 22 | #set_directive_array_partition -factor 2 -type cyclic "bfs" nodes 23 | #set_directive_array_partition -factor 2 -type cyclic "bfs" edges 24 | #set_directive_array_partition -factor 2 -type cyclic "bfs" levels 25 | #set_directive_array_partition -factor 2 -type cyclic "bfs" level_counts 26 | -------------------------------------------------------------------------------- /MachSuite/bfs/queue/check.data: -------------------------------------------------------------------------------- 1 | %% 2 | 1 3 | 26 4 | 184 5 | 22 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | -------------------------------------------------------------------------------- /MachSuite/bfs/queue/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project bfs_syn 2 | 3 | add_files bfs.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top bfs 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | source ./bfs_dir 14 | #config_rtl -reset all -reset_level low 15 | csynth_design 16 | cosim_design -rtl verilog -tool modelsim -trace_level all 17 | 18 | exit 19 | -------------------------------------------------------------------------------- /MachSuite/common/test/.gitignore: -------------------------------------------------------------------------------- 1 | test_support 2 | testfile 3 | -------------------------------------------------------------------------------- /MachSuite/common/test/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-Wall -Werror 2 | 3 | test: input_* test_support 4 | ./test_support 5 | 6 | test_support: test_support.c ../support.h ../support.c 7 | cc -o test_support $(CFLAGS) -I.. test_support.c ../support.c 8 | 9 | clean: 10 | rm -f test_support 11 | rm -rf test_support.dSYM 12 | rm -f testfile 13 | -------------------------------------------------------------------------------- /MachSuite/common/test/input_sections: -------------------------------------------------------------------------------- 1 | %% 2 | 1 3 | %% 4 | %% 5 | -------------------------------------------------------------------------------- /MachSuite/fft/strided/Makefile: -------------------------------------------------------------------------------- 1 | KERN=fft 2 | ALG=strided 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c -lm 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c -lm 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = fft_strided 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=fft 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/fft/strided/fft.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "support.h" 4 | 5 | #define FFT_SIZE 1024 6 | #define twoPI 6.28318530717959 7 | 8 | void fft(double* host_real, 9 | double* host_img, 10 | double* host_real_twid, 11 | double* host_img_twid, 12 | double* real, 13 | double* img, 14 | double* real_twid, 15 | double* img_twid); 16 | 17 | //////////////////////////////////////////////////////////////////////////////// 18 | // Test harness interface code. 19 | 20 | struct bench_args_t { 21 | double real[FFT_SIZE]; 22 | double img[FFT_SIZE]; 23 | double real_twid[FFT_SIZE/2]; 24 | double img_twid[FFT_SIZE/2]; 25 | }; 26 | -------------------------------------------------------------------------------- /MachSuite/fft/strided/fft_dir: -------------------------------------------------------------------------------- 1 | #mem 2 | set_directive_resource -core RAM_1P_BRAM "fft" x 3 | set_directive_resource -core RAM_1P_BRAM "fft" y 4 | 5 | #partitioning 6 | #set_directive_array_partition -factor 64 -type cyclic fft x 7 | #set_directive_array_partition -factor 64 -type cyclic fft y 8 | 9 | #unrolling 10 | #set_directive_unroll -factor 8 fft/points_loop 11 | #set_directive_unroll -factor 8 fft/fft_out 12 | #set_directive_unroll -factor 8 fft/fft_mid 13 | #set_directive_unroll -factor 8 fft/fft_in 14 | #set_directive_unroll -factor 8 fft/scale 15 | 16 | #pipeline 17 | set_directive_pipeline fft/points_loop 18 | #set_directive_pipeline fft/fft_out 19 | #set_directive_pipeline fft/fft_mid 20 | #set_directive_pipeline fft/fft_in 21 | #set_directive_pipeline fft/scale 22 | 23 | #resources 24 | #set_directive_resource -core Mul "fft" T1 25 | #set_directive_resource -core Mul "fft" T2 26 | #set_directive_resource -core Mul "fft" T3 27 | #set_directive_resource -core Mul "fft" T4 28 | #set_directive_resource -core Mul "fft" T5 29 | #set_directive_resource -core Mul "fft" T6 30 | #set_directive_resource -core Mul "fft" T7 31 | #set_directive_resource -core Mul "fft" T8 32 | -------------------------------------------------------------------------------- /MachSuite/fft/strided/fft_test.c: -------------------------------------------------------------------------------- 1 | #include "fft.h" 2 | 3 | int main(){ 4 | double data_x[size]; 5 | double data_y[size]; 6 | double img[size]; 7 | double real[size]; 8 | int i; 9 | 10 | //set up twiddles... 11 | double twoPI = 6.28318530717959; 12 | double typed; 13 | int n, N; 14 | N = size; 15 | 16 | //Pre-calc twiddles 17 | for(n=0; n<(N>>1); n++){ 18 | typed = (double)(twoPI*n/N); 19 | real[n] = cos(typed); 20 | img[n] = (-1.0)*sin(typed); 21 | } 22 | 23 | //Init data 24 | for(i=0; i < size; i++) { 25 | data_x[i] = (double)(i); 26 | data_y[i] = (double)(i); 27 | } 28 | 29 | /* 30 | printf("Input:\n"); 31 | for(i=0; i < size; i++) { 32 | printf("%f : %f\n", data_x[i], data_y[i]); 33 | } 34 | 35 | printf("fft:\n"); 36 | */ 37 | 38 | fft(data_x, data_y, real, img); 39 | 40 | printf("Output:\n"); 41 | 42 | 43 | for(i=0; i < size; i++) { 44 | printf("%.2f : %.2f\n", data_x[i], data_y[i]); 45 | } 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /MachSuite/fft/strided/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "fft.h" 12 | 13 | int main(int argc, char **argv) 14 | { 15 | struct bench_args_t data; 16 | int i, n, fd; 17 | double typed; 18 | struct prng_rand_t state; 19 | 20 | // Fill data structure 21 | prng_srand(1, &state); 22 | for(i=0; i>1); n++){ 29 | typed = (double)(twoPI*n/FFT_SIZE); 30 | data.real_twid[n] = cos(typed); 31 | data.img_twid[n] = (-1.0)*sin(typed); 32 | } 33 | 34 | // Open and write 35 | fd = open("input.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); 36 | assert( fd>0 && "Couldn't open input data file" ); 37 | data_to_input(fd, (void *)(&data)); 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /MachSuite/fft/strided/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project fft_syn 2 | 3 | add_files fft.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top fft 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | #source ./fft_dir 14 | #config_rtl -reset all -reset_level low 15 | csynth_design 16 | 17 | cosim_design -rtl verilog -tool modelsim -trace_level all 18 | 19 | exit 20 | -------------------------------------------------------------------------------- /MachSuite/fft/transpose/Makefile: -------------------------------------------------------------------------------- 1 | KERN=fft 2 | ALG=transpose 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c -lm 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c -lm 17 | 18 | hls: $(KERN).c $(KERN).h 19 | vivado_hls hls.tcl 20 | 21 | clean: 22 | rm -f $(KERN) generate output.data 23 | 24 | ACCEL_NAME = fft_transpose 25 | TEST_BIN = $(ACCEL_NAME) 26 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 27 | ifndef WORKLOAD 28 | export WORKLOAD=fft1D_512 29 | endif 30 | include ../../common/Makefile.tracer 31 | include ../../common/Makefile.gem5 32 | -------------------------------------------------------------------------------- /MachSuite/fft/transpose/fft.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implementations based on: 3 | V. Volkov and B. Kazian. Fitting fft onto the g80 architecture. 2008. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "support.h" 10 | 11 | #define TYPE double 12 | 13 | typedef struct complex_t { 14 | TYPE x; 15 | TYPE y; 16 | } complex; 17 | 18 | #define PI 3.1415926535 19 | #ifndef M_SQRT1_2 20 | #define M_SQRT1_2 0.70710678118654752440f 21 | #endif 22 | void fft1D_512(TYPE* host_work_x, TYPE* host_work_y, TYPE* work_x, TYPE* work_y); 23 | 24 | //////////////////////////////////////////////////////////////////////////////// 25 | // Test harness interface code. 26 | 27 | struct bench_args_t { 28 | TYPE work_x[512]; 29 | TYPE work_y[512]; 30 | }; 31 | -------------------------------------------------------------------------------- /MachSuite/fft/transpose/fft_test.c: -------------------------------------------------------------------------------- 1 | #include "fft.h" 2 | 3 | int main(){ 4 | TYPE a_x[512]; 5 | TYPE a_y[512]; 6 | int i; 7 | float max, min; 8 | max = 2147483646.0; 9 | min = -2147483646.0; 10 | 11 | for( i = 0; i < 512; i++){ 12 | a_x[i] = (TYPE)(i);//(TYPE)(((double) rand() / (RAND_MAX)) * (max-min) + min);//i; 13 | a_y[i] = (TYPE)(i);//(((double) rand() / (RAND_MAX)) * (max-min) + min);//i; 14 | } 15 | 16 | printf("ORIG!\n"); 17 | for( i = 0; i < 512; i++){ 18 | printf("x = %f y = %f \n", a_x[i], a_y[i]); 19 | } 20 | 21 | fft1D_512(a_x, a_y); 22 | 23 | printf("OUTPUT\n"); 24 | for( i = 0; i < 512; i++){ 25 | printf("x = %f y = %f \n", a_x[i], a_y[i]); 26 | } 27 | 28 | fft1D_512(a_x, a_y); 29 | 30 | printf("NORMAL\n"); 31 | for( i = 0; i < 512; i++){ 32 | printf("x = %f y = %f \n", a_x[i]/512, a_y[i]/512); 33 | } 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /MachSuite/fft/transpose/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "fft.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // Fill data structure 19 | prng_srand(1,&state); 20 | for(i=0; i<512; i++){ 21 | data.work_x[i] = ((TYPE)prng_rand(&state))/((TYPE)PRNG_RAND_MAX); 22 | data.work_y[i] = ((TYPE)prng_rand(&state))/((TYPE)PRNG_RAND_MAX); 23 | } 24 | 25 | // Open and write 26 | fd = open("input.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); 27 | assert( fd>0 && "Couldn't open input data file" ); 28 | data_to_input(fd, (void *)(&data)); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /MachSuite/fft/transpose/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project fft_syn 2 | 3 | set_top fft1D_512 4 | 5 | add_files fft.c 6 | add_files input.data 7 | add_files check.data 8 | add_files -tb ../../common/harness.c 9 | 10 | set clock 10 11 | set part virtex7 12 | 13 | open_solution fft 14 | set_part $part 15 | create_clock -period $clock 16 | set_clock_uncertainty 0 17 | #source ./fft_dir 18 | config_rtl -reset all -reset_level low 19 | csynth_design 20 | cosim_design -tool modelsim -rtl verilog -trace_level all 21 | exit 22 | -------------------------------------------------------------------------------- /MachSuite/gemm/blocked/Makefile: -------------------------------------------------------------------------------- 1 | KERN=gemm 2 | ALG=blocked 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = gemm_blocked 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=bbgemm 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/gemm/blocked/gemm.c: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation based on algorithm described in: 3 | The cache performance and optimizations of blocked algorithms 4 | M. D. Lam, E. E. Rothberg, and M. E. Wolf 5 | ASPLOS 1991 6 | */ 7 | 8 | #include "gemm.h" 9 | 10 | #ifdef DMA_MODE 11 | #include "gem5/dma_interface.h" 12 | #endif 13 | 14 | void bbgemm(TYPE* host_m1, TYPE* host_m2, TYPE* host_prod, 15 | TYPE* m1, TYPE* m2, TYPE* prod) { 16 | int i, k, j, jj, kk; 17 | int i_row, k_row; 18 | TYPE temp_x, mul; 19 | 20 | #ifdef DMA_MODE 21 | dmaLoad(m1, host_m1, N * sizeof(TYPE)); 22 | dmaLoad(m2, host_m2, N * sizeof(TYPE)); 23 | #else 24 | m1 = host_m1; 25 | m2 = host_m2; 26 | prod = host_prod; 27 | #endif 28 | 29 | loopjj:for (jj = 0; jj < row_size; jj += block_size){ 30 | loopkk:for (kk = 0; kk < row_size; kk += block_size){ 31 | loopi:for ( i = 0; i < row_size; ++i){ 32 | loopk:for (k = 0; k < block_size; ++k){ 33 | i_row = i * row_size; 34 | k_row = (k + kk) * row_size; 35 | temp_x = m1[i_row + k + kk]; 36 | loopj:for (j = 0; j < block_size; ++j){ 37 | mul = temp_x * m2[k_row + j + jj]; 38 | prod[i_row + j + jj] += mul; 39 | } 40 | } 41 | } 42 | } 43 | } 44 | #ifdef DMA_MODE 45 | dmaStore(host_prod, prod, N * sizeof(TYPE)); 46 | #endif 47 | } 48 | -------------------------------------------------------------------------------- /MachSuite/gemm/blocked/gemm.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation based on algorithm described in: 3 | The cache performance and optimizations of blocked algorithms 4 | M. D. Lam, E. E. Rothberg, and M. E. Wolf 5 | ASPLOS 1991 6 | */ 7 | 8 | #include 9 | #include 10 | #include "support.h" 11 | 12 | //Data Type 13 | #define TYPE double 14 | 15 | //Algorithm Parameters 16 | #define row_size 64 17 | #define col_size 64 18 | #define N row_size*col_size 19 | #define block_size 8 20 | #define NUMOFBLOCKS N/block_size/block_size 21 | 22 | //Define the input range to operate over 23 | #define MIN 0. 24 | #define MAX 1.0 25 | 26 | //Set number of iterations to execute 27 | #define MAX_ITERATION 1 28 | 29 | void bbgemm(TYPE* host_m1, TYPE* host_m2, TYPE* host_prod, 30 | TYPE* m1, TYPE* m2, TYPE* prod); 31 | 32 | //////////////////////////////////////////////////////////////////////////////// 33 | // Test harness interface code. 34 | 35 | struct bench_args_t { 36 | TYPE m1[N]; 37 | TYPE m2[N]; 38 | TYPE prod[N]; 39 | }; 40 | -------------------------------------------------------------------------------- /MachSuite/gemm/blocked/gemm_dir: -------------------------------------------------------------------------------- 1 | #mem 2 | #set_directive_resource -core RAM_1P_BRAM "bbgemm" m1 3 | #set_directive_resource -core RAM_1P_BRAM "bbgemm" m2 4 | #set_directive_resource -core RAM_1P_BRAM "bbgemm" prod 5 | 6 | #partitioning 7 | #set_directive_array_partition -factor 64 -type cyclic bbgemm m1 8 | #set_directive_array_partition -factor 64 -type cyclic bbgemm m2 9 | #set_directive_array_partition -factor 64 -type cyclic bbgemm prod 10 | 11 | #unrolling 12 | #set_directive_unroll -factor 8 bbgemm/loopjj 13 | #set_directive_unroll -factor 8 bbgemm/loopkk 14 | #set_directive_unroll -factor 8 bbgemm/loopi 15 | #set_directive_unroll -factor 8 bbgemm/loopk 16 | #set_directive_unroll -factor 8 bbgemm/loopj 17 | 18 | #pipeline 19 | #set_directive_pipeline bbgemm/loopjj 20 | #set_directive_pipeline bbgemm/loopkk 21 | #set_directive_pipeline bbgemm/loopi 22 | #set_directive_pipeline bbgemm/loopk 23 | #set_directive_pipeline bbgemm/loopj 24 | 25 | #resources 26 | set_directive_resource -core Mul "bbgemm" mul 27 | -------------------------------------------------------------------------------- /MachSuite/gemm/blocked/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "gemm.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // Fill data structure 19 | prng_srand(1,&state); 20 | for(i=0; i0 && "Couldn't open input data file" ); 28 | data_to_input(fd, (void *)(&data)); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /MachSuite/gemm/blocked/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project gemm_syn 2 | 3 | add_files gemm.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top bbgemm 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | source ./gemm_dir 14 | csynth_design 15 | cosim_design -rtl verilog -tool modelsim -trace_level all 16 | 17 | exit 18 | -------------------------------------------------------------------------------- /MachSuite/gemm/ncubed/Makefile: -------------------------------------------------------------------------------- 1 | KERN=gemm 2 | ALG=ncubed 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = gemm_ncubed 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=gemm 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/gemm/ncubed/gemm.c: -------------------------------------------------------------------------------- 1 | #include "gemm.h" 2 | 3 | #ifdef DMA_MODE 4 | #include "gem5/dma_interface.h" 5 | #endif 6 | 7 | void gemm(TYPE* host_m1, TYPE* host_m2, TYPE* host_prod, 8 | TYPE* m1, TYPE* m2, TYPE* prod) { 9 | int i, j, k; 10 | int k_col, i_col; 11 | TYPE mult; 12 | 13 | #ifdef DMA_MODE 14 | dmaLoad(m1, host_m1, N * sizeof(TYPE)); 15 | dmaLoad(m2, host_m2, N * sizeof(TYPE)); 16 | #else 17 | m1 = host_m1; 18 | m2 = host_m2; 19 | prod = host_prod; 20 | #endif 21 | 22 | outer:for(i=0;i 3 | #include 4 | #include "support.h" 5 | 6 | //Define compute data type 7 | #define TYPE double 8 | 9 | //Specify row/column sizes 10 | #define row_size 64 11 | #define col_size 64 12 | #define N row_size*col_size 13 | 14 | //Define the input range to operate over 15 | #define MIN 0. 16 | #define MAX 1.0 17 | 18 | //Set number of iterations to execute 19 | #define MAX_ITERATION 1 20 | 21 | void gemm(TYPE* host_m1, TYPE* host_m2, TYPE* host_prod, 22 | TYPE* m1, TYPE* m2, TYPE* prod); 23 | //////////////////////////////////////////////////////////////////////////////// 24 | // Test harness interface code. 25 | 26 | struct bench_args_t { 27 | TYPE m1[N]; 28 | TYPE m2[N]; 29 | TYPE prod[N]; 30 | }; 31 | -------------------------------------------------------------------------------- /MachSuite/gemm/ncubed/gemm_dir: -------------------------------------------------------------------------------- 1 | #mem 2 | #set_directive_resource -core RAM_1P_BRAM "gemm" m1 3 | #set_directive_resource -core RAM_1P_BRAM "gemm" m2 4 | #set_directive_resource -core RAM_1P_BRAM "gemm" prod 5 | 6 | #partitioning 7 | #set_directive_array_partition -factor 64 -type cyclic gemm m1 8 | #set_directive_array_partition -factor 64 -type cyclic gemm m2 9 | #set_directive_array_partition -factor 64 -type cyclic gemm prod 10 | 11 | #unrolling 12 | #set_directive_unroll -factor 8 gemm/inner 13 | #set_directive_unroll -factor 8 gemm/middle 14 | #set_directive_unroll -factor 8 gemm/outter 15 | 16 | #pipeline 17 | set_directive_pipeline gemm/inner 18 | set_directive_pipeline gemm/middle 19 | set_directive_pipeline gemm/outter 20 | 21 | #resources 22 | set_directive_resource -core Mul "gemm" mult 23 | -------------------------------------------------------------------------------- /MachSuite/gemm/ncubed/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "gemm.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // Fill data structure 19 | prng_srand(1,&state); 20 | for(i=0; i0 && "Couldn't open input data file" ); 28 | data_to_input(fd, (void *)(&data)); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /MachSuite/gemm/ncubed/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project gemm_hls 2 | 3 | add_files gemm.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | #add_files -tb gemm_test.c 8 | 9 | set_top gemm 10 | 11 | open_solution -reset solution 12 | set_part virtex7 13 | create_clock -period 10 14 | source ./gemm_dir 15 | csynth_design 16 | cosim_design -rtl verilog -tool modelsim -trace_level all 17 | 18 | exit 19 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/Makefile: -------------------------------------------------------------------------------- 1 | KERN=kmp 2 | ALG=kmp 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = kmp_kmp 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=kmp 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/check.data: -------------------------------------------------------------------------------- 1 | %% 2 | 12 3 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "kmp.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int status, fd, nbytes; 16 | 17 | // Load string file 18 | fd = open("TR.txt", O_RDONLY); 19 | assert( fd>=0 && "couldn't open text file" ); 20 | nbytes = 0; 21 | do { 22 | status = read(fd, data.input, STRING_SIZE-nbytes); 23 | assert(status>=0 && "couldn't read from text file"); 24 | nbytes+=status; 25 | } while( nbytes0 && "Couldn't open input data file" ); 32 | 33 | data_to_input(fd, (void *)(&data)); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project kmp_syn 2 | 3 | add_files kmp.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | #add_files -tb kmp_test.c 9 | 10 | set_top kmp 11 | 12 | open_solution -reset solution 13 | set_part virtex7 14 | create_clock -period 10 15 | #source ./kmp_dir 16 | #config_rtl -reset all -reset_level low 17 | csynth_design 18 | cosim_design -rtl verilog -tool modelsim 19 | 20 | exit 21 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/kmp.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation based on http://www-igm.univ-mlv.fr/~lecroq/string/node8.html 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "support.h" 9 | 10 | #define PATTERN_SIZE 4 11 | #define STRING_SIZE (32411) 12 | 13 | int kmp(char* host_input, 14 | int32_t* host_n_matches, 15 | char* pattern, 16 | char* input, 17 | int32_t* kmpNext, 18 | int32_t* n_matches); 19 | //////////////////////////////////////////////////////////////////////////////// 20 | // Test harness interface code. 21 | 22 | struct bench_args_t { 23 | char pattern[PATTERN_SIZE]; 24 | char input[STRING_SIZE]; 25 | int32_t kmpNext[PATTERN_SIZE]; 26 | int32_t n_matches[1]; 27 | }; 28 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/kmp_dir: -------------------------------------------------------------------------------- 1 | #select functional units you want 2 | 3 | #select memory resources 4 | set_directive_resource -core RAM_1P_BRAM "CPF" pattern 5 | set_directive_resource -core RAM_1P_BRAM "CPF" kmpNext 6 | set_directive_resource -core RAM_1P_BRAM "kmp" pattern 7 | set_directive_resource -core RAM_1P_BRAM "kmp" input 8 | 9 | #loop pipelining factors 10 | #set_directive_pipeline kmp/init 11 | #set_directive_pipeline kmp/k1 12 | #set_directive_pipeline kmp/k2 13 | #set_directive_pipeline CPF/c1 14 | #set_directive_pipeline CPF/c2 15 | 16 | #loop unrolling 17 | #set_directive_unroll -factor 2 kmp/init 18 | #set_directive_unroll -factor 2 kmp/k1 19 | #set_directive_unroll -factor 2 kmp/k2 20 | #set_directive_unroll -factor 2 CDF/c1 21 | #set_directive_unroll -factor 2 CDF/c2 22 | 23 | #Array partitioning 24 | set_directive_array_partition -factor 2 -type cyclic kmp pattern 25 | #set_directive_array_partition -factor 2 -type cyclic kmp input 26 | #set_directive_array_partition -factor 2 -type complete CPF pattern 27 | #set_directive_array_partition -factor 2 -type complete CPF input 28 | -------------------------------------------------------------------------------- /MachSuite/kmp/kmp/kmp_test.c: -------------------------------------------------------------------------------- 1 | #include "kmp.h" 2 | #include 3 | #include "TR.h" 4 | 5 | int main(){ 6 | char *y = "bull";//moose"; 7 | //char randomletter = 8 | FILE *f = fopen("TR.txt", "r"); 9 | /* 10 | fseek(f, 0, SEEK_END); 11 | long pos = ftell(f); 12 | fseek(f, 0, SEEK_SET); 13 | printf("%dpos\n\n", pos); 14 | char *bytes = malloc(pos); 15 | 16 | fread(bytes, pos, 1, f); 17 | fclose(f); 18 | */ 19 | int j; 20 | int outs; 21 | outs = 0; 22 | int i; 23 | 24 | char x[STRING_SIZE]; 25 | for(j=0;j 0) 54 | printf("outs = %d \n",outs); 55 | //} 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /MachSuite/md/grid/Makefile: -------------------------------------------------------------------------------- 1 | KERN=md 2 | ALG=grid 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = md_grid 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=md 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/md/grid/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project grid_syn 2 | 3 | add_files md.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top md 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | #source ./grid_dir 14 | #config_rtl -reset all -reset_level low 15 | csynth_design 16 | cosim_design -rtl verilog -tool modelsim -trace_level all 17 | 18 | exit 19 | -------------------------------------------------------------------------------- /MachSuite/md/knn/Makefile: -------------------------------------------------------------------------------- 1 | KERN=md 2 | ALG=knn 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = md_knn 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=md_kernel 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/md/knn/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project knn_syn 2 | 3 | add_files md.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top md_kernel 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | #source ./knn_dir 14 | #config_rtl -reset all -reset_level low 15 | csynth_design 16 | cosim_design -rtl verilog -tool modelsim -trace_level all 17 | 18 | exit 19 | -------------------------------------------------------------------------------- /MachSuite/md/knn/knn_dir: -------------------------------------------------------------------------------- 1 | #set_directive_allocation -limit 26858 -type operation md_kernel add 2 | #set_directive_allocation -limit 11150 -type operation md_kernel mul 3 | 4 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" force_x 5 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" force_y 6 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" force_z 7 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" position_x 8 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" position_y 9 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" position_z 10 | #set_directive_resource -core RAM_1P_BRAM "md_kernel" NL 11 | 12 | #set_directive_array_partition -factor 64 -type cyclic md_kernel force_x 13 | #set_directive_array_partition -factor 64 -type cyclic md_kernel force_y 14 | #set_directive_array_partition -factor 64 -type cyclic md_kernel force_z 15 | #set_directive_array_partition -factor 64 -type cyclic md_kernel position_x 16 | #set_directive_array_partition -factor 64 -type cyclic md_kernel position_y 17 | #set_directive_array_partition -factor 64 -type cyclic md_kernel position_z 18 | #set_directive_array_partition -factor 64 -type cyclic md_kernel NL 19 | 20 | #set_directive_unroll -factor 8 md_kernel/loopi 21 | #set_directive_unroll -factor 8 md_kernel/loopj 22 | 23 | #set_directive_loop_flatten md_kernel/loopj 24 | #set_directive_loop_flatten md_kernel/loopj 25 | 26 | #set_directive_pipeline md_kernel/loopi 27 | #set_directive_pipeline md_kernel/loopj 28 | 29 | set_directive_resource -core Mul "md_kernel" mult 30 | -------------------------------------------------------------------------------- /MachSuite/md/knn/md.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implemenataion based on: 3 | A. Danalis, G. Marin, C. McCurdy, J. S. Meredith, P. C. Roth, K. Spafford, V. Tipparaju, and J. S. Vetter. 4 | The scalable heterogeneous computing (shoc) benchmark suite. 5 | In Proceedings of the 3rd Workshop on General-Purpose Computation on Graphics Processing Units, 2010. 6 | */ 7 | 8 | #include 9 | #include 10 | #include "support.h" 11 | 12 | #define TYPE double 13 | 14 | // Problem Constants 15 | #define nAtoms 256 16 | #define maxNeighbors 16 17 | // LJ coefficients 18 | #define lj1 1.5 19 | #define lj2 2.0 20 | 21 | void md_kernel(TYPE* host_force_x, 22 | TYPE* host_force_y, 23 | TYPE* host_force_z, 24 | TYPE* host_position_x, 25 | TYPE* host_position_y, 26 | TYPE* host_position_z, 27 | int32_t* host_NL, 28 | TYPE* force_x, 29 | TYPE* force_y, 30 | TYPE* force_z, 31 | TYPE* position_x, 32 | TYPE* position_y, 33 | TYPE* position_z, 34 | int32_t* NL); 35 | //////////////////////////////////////////////////////////////////////////////// 36 | // Test harness interface code. 37 | 38 | struct bench_args_t { 39 | TYPE force_x[nAtoms]; 40 | TYPE force_y[nAtoms]; 41 | TYPE force_z[nAtoms]; 42 | TYPE position_x[nAtoms]; 43 | TYPE position_y[nAtoms]; 44 | TYPE position_z[nAtoms]; 45 | int32_t NL[nAtoms*maxNeighbors]; 46 | }; 47 | -------------------------------------------------------------------------------- /MachSuite/md/knn/point_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import numpy as np 5 | 6 | points = np.array( [(float(x),float(y),float(z)) for (x,y,z) in map(lambda L: L.strip().split(), sys.stdin.readlines())] ) 7 | 8 | N = points.shape[0] 9 | 10 | # Johan Philip. "The Probability Distribution of the Distance between two Random Points in a Box", numerically estimated with Mathematica 11 | # an underestimate, actually, because we set a lower bound at the vdW threshold 12 | expected_dist = 12.69 13 | 14 | dists = np.zeros((N,N)) 15 | for (i,A) in enumerate(points): 16 | for (j,B) in enumerate(points): 17 | if i!=j: 18 | dists[i,j] = np.linalg.norm(A-B) 19 | else: 20 | dists[i,j] = expected_dist; 21 | 22 | print 'Distance mean:',np.mean(dists) 23 | print 'Distance variance:',np.var(dists) 24 | minpair = np.unravel_index(np.argmin(dists), (N,N)) 25 | print 'Closest pair:',minpair, np.min(dists) 26 | print ' p0',points[minpair[0]] 27 | print ' p1',points[minpair[1]] 28 | maxpair = np.unravel_index(np.argmax(dists), (N,N)) 29 | print 'Furthest pair:',maxpair, np.max(dists),'( max',20.0*np.sqrt(3.),')' 30 | print ' p0',points[maxpair[0]] 31 | print ' p1',points[maxpair[1]] 32 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/Makefile: -------------------------------------------------------------------------------- 1 | KERN=nw 2 | ALG=nw 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = nw_nw 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=needwun 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/check.data: -------------------------------------------------------------------------------- 1 | %% 2 | cggccgcttag-tgggtgcggtgctaagggggctagagggcttg-tc-gcggggcacgggacatgcg--gcg-t--cgtaaaccaaacat-g-gcgccgggag-attatgctcttgcacg-acag-ta----g-gat-aaagc---agc-t_________________________________________________________________________________________________________ 3 | %% 4 | --------tagct-ggtaccgt-ctaa-gtggc--ccggg-ttgagcggctgggca--gg-c-tg-gaag-gttagcgt-aaggagatatagtccg-cgggtgcagggtg-gctggcccgtacagctacctggcgctgtgcgcgggagctt_________________________________________________________________________________________________________ 5 | %% 6 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nw.h" 11 | int main(int argc, char **argv) 12 | { 13 | struct bench_args_t data; 14 | // Must be exact length 15 | char seqA[ALEN+1] = "tcgacgaaataggatgacagcacgttctcgtattagagggccgcggtacaaaccaaatgctgcggcgtacagggcacggggcgctgttcgggagatcgggggaatcgtggcgtgggtgattcgccggc"; 16 | char seqB[BLEN+1] = "ttcgagggcgcgtgtcgcggtccatcgacatgcccggtcggtgggacgtgggcgcctgatatagaggaatgcgattggaaggtcggacgggtcggcgagttgggcccggtgaatctgccatggtcgat"; 17 | int fd; 18 | 19 | assert( ALEN==strlen(seqA) && "String initializers must be exact length"); 20 | assert( BLEN==strlen(seqB) && "String initializers must be exact length"); 21 | 22 | // Fill data structure 23 | memcpy(data.seqA, seqA, ALEN); 24 | memcpy(data.seqB, seqB, BLEN); 25 | 26 | // Open and write 27 | fd = open("input.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); 28 | assert( fd>0 && "Couldn't open input data file" ); 29 | data_to_input(fd, (void *)(&data)); 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project nw_syn 2 | 3 | add_files nw.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | #add_files -tb nw_test.c 9 | 10 | set_top needwun 11 | 12 | open_solution -reset solution 13 | set_part virtex7 14 | create_clock -period 10 15 | #source ./nw_dir 16 | #config_rtl -reset all -reset_level low 17 | csynth_design 18 | cosim_design -rtl verilog -tool modelsim 19 | #-trace_level all 20 | 21 | exit 22 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/input.data: -------------------------------------------------------------------------------- 1 | %% 2 | tcgacgaaataggatgacagcacgttctcgtattagagggccgcggtacaaaccaaatgctgcggcgtacagggcacggggcgctgttcgggagatcgggggaatcgtggcgtgggtgattcgccggc 3 | %% 4 | ttcgagggcgcgtgtcgcggtccatcgacatgcccggtcggtgggacgtgggcgcctgatatagaggaatgcgattggaaggtcggacgggtcggcgagttgggcccggtgaatctgccatggtcgat 5 | %% 6 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/nw.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "support.h" 4 | 5 | #define ALEN 128 6 | #define BLEN 128 7 | 8 | void needwun(char* host_SEQA, 9 | char* host_SEQB, 10 | char* host_alignedA, 11 | char* host_alignedB, 12 | char* SEQA, 13 | char* SEQB, 14 | char* alignedA, 15 | char* alignedB, 16 | int* M, 17 | char* ptr); 18 | //////////////////////////////////////////////////////////////////////////////// 19 | // Test harness interface code. 20 | 21 | struct bench_args_t { 22 | char seqA[ALEN]; 23 | char seqB[BLEN]; 24 | char alignedA[ALEN+BLEN]; 25 | char alignedB[ALEN+BLEN]; 26 | int M[(ALEN+1)*(BLEN+1)]; 27 | char ptr[(ALEN+1)*(BLEN+1)]; 28 | }; 29 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/nw_dir: -------------------------------------------------------------------------------- 1 | #select functional units you want 2 | set_directive_resource -core Mul "needwun" row 3 | set_directive_resource -core Mul "needwun" row_up 4 | set_directive_resource -core Mul "needwun" r 5 | 6 | #select memory resources 7 | set_directive_resource -core RAM_1P_BRAM "needwun" SEQA 8 | set_directive_resource -core RAM_1P_BRAM "needwun" SEQB 9 | set_directive_resource -core RAM_1P_BRAM "needwun" allignedA 10 | set_directive_resource -core RAM_1P_BRAM "needwun" allignedB 11 | 12 | #loop pipelining factors 13 | #set_directive_pipeline needwun/init_row 14 | #set_directive_pipeline needwun/init_col 15 | #set_directive_pipeline needwun/fill_out 16 | #set_directive_pipeline needwun/fill_in 17 | #set_directive_pipeline needwun/trace 18 | #set_directive_pipeline needwun/pad_a 19 | #set_directive_pipeline needwun/pad_b 20 | 21 | #loop unrolling 22 | #set_directive_unroll -factor 2 needwun/init 23 | #set_directive_unroll -factor 2 needwun/init_row 24 | #set_directive_unroll -factor 2 needwun/init_col 25 | #set_directive_unroll -factor 2 needwun/fill_out 26 | #set_directive_unroll -factor 2 needwun/fill_in 27 | #set_directive_unroll -factor 2 needwun/trace 28 | #set_directive_unroll -factor 2 needwun/pad_a 29 | #set_directive_unroll -factor 2 needwun/pad_b 30 | 31 | #Array partitioning 32 | #set_directive_array_partition -factor 2 -type cyclic needwun SEQA 33 | #set_directive_array_partition -factor 2 -type cyclic needwun SEQB 34 | #set_directive_array_partition -factor 2 -type cyclic needwun alignedA 35 | #set_directive_array_partition -factor 2 -type cyclic needwun alignedB 36 | -------------------------------------------------------------------------------- /MachSuite/nw/nw/nw_test.c: -------------------------------------------------------------------------------- 1 | #include "nw.h" 2 | //#include "seq.h" 3 | 4 | int main(){ 5 | int i; 6 | char allignedA[N+M]; 7 | char allignedB[M+M]; 8 | 9 | char seqA[N] = "tcgacgaaataggatgacagcacgttctcgtattagagggccgcggtacaaaccaaatgctgcggcgtacagggcacggggcgctgttcgggagatcgggggaatcgtggcgtgggtgattcgccggc"; 10 | 11 | char seqB[M] = "ttcgagggcgcgtgtcgcggtccatcgacatgcccggtcggtgggacgtgggcgcctgatatagaggaatgcgattggaaggtcggacgggtcggcgagttgggcccggtgaatctgccatggtcgat"; 12 | char sA[N]; 13 | char sB[M]; 14 | for(i=0;i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "sort.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // Fill data structure 19 | prng_srand(1,&state); 20 | for(i=0; i0 && "Couldn't open input data file" ); 27 | data_to_input(fd, (void *)(&data)); 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /MachSuite/sort/merge/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project sort_syn 2 | 3 | add_files sort.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top ms_mergesort 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | source ./sort_dir 14 | csynth_design 15 | cosim_design -rtl verilog -tool modelsim -trace_level all 16 | 17 | exit 18 | -------------------------------------------------------------------------------- /MachSuite/sort/merge/sort.c: -------------------------------------------------------------------------------- 1 | #include "sort.h" 2 | 3 | #ifdef DMA_MODE 4 | #include "gem5/dma_interface.h" 5 | #endif 6 | 7 | void merge(TYPE a[SIZE], int start, int m, int stop){ 8 | TYPE temp[SIZE]; 9 | int i, j, k; 10 | 11 | merge_label1 : for(i=start; i<=m; i++){ 12 | temp[i] = a[i]; 13 | } 14 | 15 | merge_label2 : for(j=m+1; j<=stop; j++){ 16 | temp[m+1+stop-j] = a[j]; 17 | } 18 | 19 | i = start; 20 | j = stop; 21 | 22 | merge_label3 : for(k=start; k<=stop; k++){ 23 | TYPE tmp_j = temp[j]; 24 | TYPE tmp_i = temp[i]; 25 | if(tmp_j < tmp_i) { 26 | a[k] = tmp_j; 27 | j--; 28 | } else { 29 | a[k] = tmp_i; 30 | i++; 31 | } 32 | } 33 | } 34 | 35 | void ms_mergesort(TYPE* host_a, TYPE* a) { 36 | int start, stop; 37 | int i, m, from, mid, to; 38 | 39 | #ifdef DMA_MODE 40 | dmaLoad(a, host_a, SIZE * sizeof(TYPE)); 41 | #else 42 | a = host_a; 43 | #endif 44 | 45 | start = 0; 46 | stop = SIZE; 47 | 48 | mergesort_label1 : for(m=1; m 2 | #include 3 | #include 4 | #include "support.h" 5 | 6 | #define SIZE 2048 7 | #define TYPE int32_t 8 | #define TYPE_MAX INT32_MAX 9 | 10 | void ms_mergesort(TYPE* host_a, TYPE* a); 11 | 12 | //////////////////////////////////////////////////////////////////////////////// 13 | // Test harness interface code. 14 | 15 | struct bench_args_t { 16 | TYPE a[SIZE]; 17 | }; 18 | -------------------------------------------------------------------------------- /MachSuite/sort/merge/sort_dir: -------------------------------------------------------------------------------- 1 | #select memory resources 2 | set_directive_resource -core RAM_1P_BRAM "ms_mergesort" a 3 | set_directive_resource -core RAM_1P_BRAM "merge" a 4 | 5 | #loop pipelining factors 6 | #set_directive_pipeline ms_mergesort/mergesort_label1 7 | set_directive_pipeline ms_mergesort/mergesort_label2 8 | 9 | set_directive_pipeline merge/merge_label1 10 | set_directive_pipeline merge/merge_label2 11 | set_directive_pipeline merge/merge_label3 12 | 13 | #loop unrolling 14 | #set_directive_unroll -factor 2 ms_mergesort/mergesort_label1 15 | #set_directive_unroll -factor 2 ms_mergesort/mergesort_label2 16 | 17 | #set_directive_unroll -factor 2 merge/merge_label1 18 | #set_directive_unroll -factor 2 merge/merge_label2 19 | #set_directive_unroll -factor 2 merge/merge_label3 20 | 21 | #Array partitioning 22 | #set_directive_array_partition -factor 2 -type cyclic ms_mergesort a 23 | #set_directive_array_partition -factor 2 -type cyclic merge a 24 | -------------------------------------------------------------------------------- /MachSuite/sort/radix/Makefile: -------------------------------------------------------------------------------- 1 | KERN=sort 2 | ALG=radix 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = sort_radix 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=ss_sort 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/sort/radix/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "sort.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // Fill data structure 19 | prng_srand(1,&state); 20 | for(i=0; i0 && "Couldn't open input data file" ); 26 | data_to_input(fd, (void *)(&data)); 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /MachSuite/sort/radix/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project sort_syn 2 | 3 | set_top ss_sort 4 | 5 | add_files sort.c 6 | add_files input.data 7 | add_files check.data 8 | add_files -tb ../../common/harness.c 9 | 10 | set clock 10 11 | set part virtex7 12 | 13 | 14 | open_solution solution 15 | set_part $part 16 | create_clock -period $clock 17 | source ./inline_dir 18 | 19 | #config_rtl -reset all -reset_level low 20 | set_clock_uncertainty 0 21 | csynth_design 22 | cosim_design -rtl verilog -tool modelsim -trace_level all 23 | 24 | exit 25 | -------------------------------------------------------------------------------- /MachSuite/sort/radix/inline_dir: -------------------------------------------------------------------------------- 1 | set_directive_inline -off update 2 | set_directive_inline -off hist 3 | set_directive_inline -off init 4 | set_directive_inline -off last_step_scan 5 | set_directive_inline -off sum_scan 6 | set_directive_inline -off local_scan 7 | -------------------------------------------------------------------------------- /MachSuite/sort/radix/sort.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation based on algorithm described in: 3 | A. Danalis, G. Marin, C. McCurdy, J. S. Meredith, P. C. Roth, K. Spafford, V. Tipparaju, and J. S. Vetter. 4 | The scalable heterogeneous computing (shoc) benchmark suite. 5 | In Proceedings of the 3rd Workshop on General-Purpose Computation on Graphics Processing Units, 2010 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include "support.h" 12 | 13 | #define TYPE int32_t 14 | #define TYPE_MAX INT32_MAX 15 | 16 | #define SIZE 2048 17 | #define NUMOFBLOCKS 512 18 | 19 | #define ELEMENTSPERBLOCK 4 20 | #define RADIXSIZE 4 21 | #define BUCKETSIZE NUMOFBLOCKS*RADIXSIZE 22 | #define MASK 0x3 23 | 24 | #define SCAN_BLOCK 16 25 | #define SCAN_RADIX BUCKETSIZE/SCAN_BLOCK 26 | 27 | void ss_sort(int* host_a, 28 | int* a, 29 | int* b, 30 | int* bucket, 31 | int* sum); 32 | //////////////////////////////////////////////////////////////////////////////// 33 | // Test harness interface code. 34 | 35 | struct bench_args_t { 36 | int a[SIZE]; 37 | int b[SIZE]; 38 | // Need one extra bucket (for overflow of histogram?) 39 | int bucket[BUCKETSIZE+1]; 40 | int sum[SCAN_RADIX]; 41 | }; 42 | -------------------------------------------------------------------------------- /MachSuite/spmv/crs/Makefile: -------------------------------------------------------------------------------- 1 | KERN=spmv 2 | ALG=crs 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = spmv_crs 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=spmv 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/spmv/crs/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project spmv_syn 2 | 3 | add_files spmv.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top spmv 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | source ./spmv_dir 14 | csynth_design 15 | cosim_design -rtl verilog -tool modelsim 16 | 17 | exit 18 | -------------------------------------------------------------------------------- /MachSuite/spmv/crs/spmv.c: -------------------------------------------------------------------------------- 1 | /* 2 | Based on algorithm described here: 3 | http://www.cs.berkeley.edu/~mhoemmen/matrix-seminar/slides/UCB_sparse_tutorial_1.pdf 4 | */ 5 | 6 | #include "spmv.h" 7 | 8 | #ifdef DMA_MODE 9 | #include "gem5/dma_interface.h" 10 | #endif 11 | 12 | void spmv(TYPE* host_val, 13 | int32_t* host_cols, 14 | int32_t* host_rowDelimiters, 15 | TYPE* host_vec, 16 | TYPE* host_out, 17 | TYPE* val, 18 | int32_t* cols, 19 | int32_t* rowDelimiters, 20 | TYPE* vec, 21 | TYPE* out) { 22 | int i, j; 23 | TYPE sum, Si; 24 | 25 | #ifdef DMA_MODE 26 | dmaLoad(rowDelimiters, host_rowDelimiters, (N + 1) * sizeof(int32_t)); 27 | dmaLoad(vec, host_vec, N * sizeof(TYPE)); 28 | dmaLoad(val, host_val, NNZ * sizeof(TYPE)); 29 | dmaLoad(cols, host_cols, NNZ * sizeof(int32_t)); 30 | #else 31 | rowDelimiters = host_rowDelimiters; 32 | vec = host_vec; 33 | val = host_val; 34 | cols = host_cols; 35 | out = host_out; 36 | #endif 37 | 38 | spmv_1 : for(i = 0; i < N; i++){ 39 | sum = 0; Si = 0; 40 | int tmp_begin = rowDelimiters[i]; 41 | int tmp_end = rowDelimiters[i+1]; 42 | spmv_2 : for (j = tmp_begin; j < tmp_end; j++){ 43 | Si = val[j] * vec[cols[j]]; 44 | sum = sum + Si; 45 | } 46 | out[i] = sum; 47 | } 48 | #ifdef DMA_MODE 49 | dmaStore(host_out, out, N * sizeof(TYPE)); 50 | #endif 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /MachSuite/spmv/crs/spmv.h: -------------------------------------------------------------------------------- 1 | /* 2 | Based on algorithm described here: 3 | http://www.cs.berkeley.edu/~mhoemmen/matrix-seminar/slides/UCB_sparse_tutorial_1.pdf 4 | */ 5 | 6 | #include 7 | #include 8 | #include "support.h" 9 | 10 | // These constants valid for the IEEE 494 bus interconnect matrix 11 | #define NNZ 1666 12 | #define N 494 13 | 14 | #define TYPE double 15 | 16 | void spmv(TYPE* host_val, 17 | int32_t* host_cols, 18 | int32_t* host_rowDelimiters, 19 | TYPE* host_vec, 20 | TYPE* host_out, 21 | TYPE* val, 22 | int32_t* cols, 23 | int32_t* rowDelimiters, 24 | TYPE* vec, 25 | TYPE* out); 26 | //////////////////////////////////////////////////////////////////////////////// 27 | // Test harness interface code. 28 | 29 | struct bench_args_t { 30 | TYPE val[NNZ]; 31 | int32_t cols[NNZ]; 32 | int32_t rowDelimiters[N+1]; 33 | TYPE vec[N]; 34 | TYPE out[N]; 35 | }; 36 | -------------------------------------------------------------------------------- /MachSuite/spmv/crs/spmv_dir: -------------------------------------------------------------------------------- 1 | set_directive_resource -core RAM_1P_BRAM "spmv" val 2 | set_directive_resource -core RAM_1P_BRAM "spmv" cols 3 | set_directive_resource -core RAM_1P_BRAM "spmv" rowDelimiters 4 | set_directive_resource -core RAM_1P_BRAM "spmv" vec 5 | set_directive_resource -core RAM_1P_BRAM "spmv" out 6 | 7 | #set_directive_array_partition -factor 64 -type cyclic spmv val 8 | #set_directive_array_partition -factor 64 -type cyclic spmv cols 9 | #set_directive_array_partition -factor 64 -type cyclic spmv rowDelimiters 10 | #set_directive_array_partition -factor 64 -type cyclic spmv out 11 | 12 | #set_directive_unroll -factor 8 spmv/spmv_1 13 | #set_directive_unroll -factor 8 spmv/spmv_2 14 | 15 | #set_directive_pipeline spmv/spmv_1 16 | set_directive_pipeline spmv/spmv_2 17 | 18 | set_directive_resource -core Mul "spmv" Si 19 | -------------------------------------------------------------------------------- /MachSuite/spmv/crs/symmetry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Changes a file in MatrixMarket "symmetric" format into one in "general" format 4 | # This makes it easier to parse without changing the input. 5 | 6 | import sys 7 | 8 | for line in sys.stdin.readlines()[2:]: 9 | (row,col,value) = line.strip().split() 10 | if row==col: 11 | print row,col,value 12 | else: 13 | print row,col,value 14 | print col,row,value 15 | -------------------------------------------------------------------------------- /MachSuite/spmv/ellpack/Makefile: -------------------------------------------------------------------------------- 1 | KERN=spmv 2 | ALG=ellpack 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = spmv_ellpack 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=ellpack 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/spmv/ellpack/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project spmv_syn 2 | 3 | add_files spmv.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top ellpack 9 | 10 | open_solution -reset solution 11 | set_part virtex7 12 | create_clock -period 10 13 | source ./spmv_dir 14 | csynth_design 15 | cosim_design -rtl verilog -tool modelsim -trace_level all 16 | 17 | exit 18 | -------------------------------------------------------------------------------- /MachSuite/spmv/ellpack/spmv.c: -------------------------------------------------------------------------------- 1 | /* 2 | Based on algorithm described here: 3 | http://www.cs.berkeley.edu/~mhoemmen/matrix-seminar/slides/UCB_sparse_tutorial_1.pdf 4 | */ 5 | 6 | #include "spmv.h" 7 | 8 | #ifdef DMA_MODE 9 | #include "gem5/dma_interface.h" 10 | #endif 11 | 12 | void ellpack(TYPE* host_nzval, 13 | int32_t* host_cols, 14 | TYPE* host_vec, 15 | TYPE* host_out, 16 | TYPE* nzval, 17 | int32_t* cols, 18 | TYPE* vec, 19 | TYPE* out) 20 | { 21 | int i, j; 22 | TYPE Si; 23 | 24 | #ifdef DMA_MODE 25 | dmaLoad(nzval, host_nzval, (N*L) * sizeof(TYPE)); 26 | dmaLoad(cols, host_cols, (N*L) * sizeof(int32_t)); 27 | dmaLoad(vec, host_vec, (N) * sizeof(TYPE)); 28 | #else 29 | nzval = host_nzval; 30 | cols = host_cols; 31 | vec = host_vec; 32 | out = host_out; 33 | #endif 34 | 35 | ellpack_1 : for (i=0; i 7 | #include 8 | #include "support.h" 9 | 10 | // These constants valid for the IEEE 494 bus interconnect matrix 11 | #define NNZ 1666 12 | #define N 494 13 | #define L 10 14 | 15 | #define TYPE double 16 | 17 | void ellpack(TYPE* host_nzval, 18 | int32_t* host_cols, 19 | TYPE* host_vec, 20 | TYPE* host_out, 21 | TYPE* nzval, 22 | int32_t* cols, 23 | TYPE* vec, 24 | TYPE* out); 25 | //////////////////////////////////////////////////////////////////////////////// 26 | // Test harness interface code. 27 | 28 | struct bench_args_t { 29 | TYPE nzval[N*L]; 30 | int32_t cols[N*L]; 31 | TYPE vec[N]; 32 | TYPE out[N]; 33 | }; 34 | -------------------------------------------------------------------------------- /MachSuite/spmv/ellpack/spmv_dir: -------------------------------------------------------------------------------- 1 | set_directive_resource -core RAM_1P_BRAM "ellpack" nzval 2 | set_directive_resource -core RAM_1P_BRAM "ellpack" cols 3 | set_directive_resource -core RAM_1P_BRAM "ellpack" vec 4 | set_directive_resource -core RAM_1P_BRAM "ellpack" out 5 | 6 | #set_directive_array_partition -factor 64 -type cyclic ellpack nzval 7 | #set_directive_array_partition -factor 64 -type cyclic ellpack cols 8 | #set_directive_array_partition -factor 64 -type cyclic ellpack vec 9 | #set_directive_array_partition -factor 64 -type cyclic ellpack out 10 | 11 | #set_directive_unroll -factor 8 ellpack/ellpack_1 12 | #set_directive_unroll -factor 8 ellpack/ellpack_2 13 | 14 | #set_directive_pipeline ellpack/ellpack_1 15 | set_directive_pipeline ellpack/ellpack_2 16 | 17 | set_directive_resource -core Mul "ellpack" Si 18 | -------------------------------------------------------------------------------- /MachSuite/spmv/ellpack/spmv_test.c: -------------------------------------------------------------------------------- 1 | //http://www.cs.berkeley.edu/~mhoemmen/matrix-seminar/slides/UCB_sparse_tutorial_1.pdf 2 | #include "spmv.h" 3 | 4 | #define ran (TYPE)(((double) rand() / (RAND_MAX)) * (MAX-MIN) + MIN) 5 | 6 | void fillVal(TYPE nzval[N*L], int colind[N*L], TYPE x[N]){ 7 | int j, cur_indx, i; 8 | srand48(8650341L); 9 | for (i = 0; i < N; i++){ 10 | x[i] = ran; 11 | cur_indx = 0; 12 | for(j=0; j < L; j++){ 13 | //MAKE BETTER!... 14 | ///With... you know... stats. 15 | cur_indx = (TYPE)(((double) rand() / (RAND_MAX)) * ((L-1) - cur_indx) + cur_indx); 16 | printf("idx %d \n",cur_indx); 17 | if(cur_indx < L){ 18 | nzval[i*L + j] = ran; 19 | colind[i*L +j] = cur_indx; 20 | } 21 | } 22 | } 23 | } 24 | 25 | void initOut(TYPE y[N]){ 26 | int i; 27 | for (i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "stencil.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // Fill data structure 19 | prng_srand(1,&state); 20 | for(i=0; i0 && "Couldn't open input data file" ); 28 | data_to_input(fd, (void *)(&data)); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil2d/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project stencil_syn 2 | 3 | add_files stencil.c -cflags "-I../../common" 4 | add_files input.data 5 | add_files check.data 6 | #add_files -tb ../../common/support.h 7 | add_files -tb ../../common/support.c -cflags "-I../../common -g" 8 | add_files -tb local_support.c -cflags "-I../../common -g" 9 | add_files -tb ../../common/harness.c -cflags "-I../../common -g" 10 | 11 | set_top stencil 12 | open_solution -reset solution 13 | 14 | set_part virtex7 15 | create_clock -period 10 16 | source ./stencil_dir 17 | 18 | 19 | csynth_design 20 | cosim_design -rtl verilog -tool modelsim 21 | 22 | exit 23 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil2d/stencil.c: -------------------------------------------------------------------------------- 1 | #include "stencil.h" 2 | 3 | #ifdef DMA_MODE 4 | #include "gem5/dma_interface.h" 5 | #endif 6 | 7 | void stencil(TYPE* host_orig, 8 | TYPE* host_sol, 9 | TYPE* orig, 10 | TYPE* sol, 11 | TYPE* filter) { 12 | int r, c, k1, k2; 13 | TYPE temp, mul; 14 | 15 | #ifdef DMA_MODE 16 | dmaLoad(orig, host_orig, row_size * col_size * sizeof(TYPE)); 17 | // This is used to zero-initialize the array. 18 | dmaLoad(sol, host_sol, row_size * col_size * sizeof(TYPE)); 19 | #else 20 | orig = host_orig; 21 | sol = host_sol; 22 | #endif 23 | 24 | stencil_label1:for (r=0; r 2 | #include 3 | #include "support.h" 4 | 5 | //Define input sizes 6 | #define col_size 64 7 | #define row_size 128 8 | #define f_size 9 9 | 10 | //Data Bounds 11 | #define TYPE int32_t 12 | #define MAX 1000 13 | #define MIN 1 14 | 15 | //Set number of iterations to execute 16 | #define MAX_ITERATION 1 17 | 18 | void stencil(TYPE* host_orig, 19 | TYPE* host_sol, 20 | TYPE* orig, 21 | TYPE* sol, 22 | TYPE* filter); 23 | 24 | //////////////////////////////////////////////////////////////////////////////// 25 | // Test harness interface code. 26 | 27 | struct bench_args_t { 28 | TYPE orig[row_size*col_size]; 29 | TYPE sol[row_size*col_size]; 30 | TYPE filter[f_size]; 31 | }; 32 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil2d/stencil_dir: -------------------------------------------------------------------------------- 1 | #select functional units you want 2 | set_directive_resource -core Mul "stencil" mul 3 | 4 | #select memory resources 5 | set_directive_resource -core RAM_1P_BRAM "stencil" orig 6 | set_directive_resource -core RAM_1P_BRAM "stencil" sol 7 | 8 | #loop pipelining factors 9 | #set_directive_pipeline stencil/stencil_label1 10 | #set_directive_pipeline stencil/stencil_label2 11 | #set_directive_pipeline stencil/stencil_label3 12 | set_directive_pipeline stencil/stencil_label4 13 | 14 | #loop unrolling 15 | #set_directive_unroll -factor 2 stencil/stencil_label1 16 | 17 | #Array partitioning 18 | #set_directive_array_partition -factor 2 -type cyclic stencil sol 19 | #set_directive_array_partition -factor 2 -type cyclic stencil orig 20 | #set_directive_array_partition -type complete stencil filter 21 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil3d/Makefile: -------------------------------------------------------------------------------- 1 | KERN=stencil 2 | ALG=stencil3d 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = stencil_stencil3d 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=stencil3d 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil3d/generate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "stencil.h" 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct bench_args_t data; 15 | int i, fd; 16 | struct prng_rand_t state; 17 | 18 | // 3D discrete Laplacian 19 | data.C[0] = 6; 20 | data.C[1] = -1; 21 | // Random matrix 22 | prng_srand(1,&state); 23 | for(i=0; i0 && "Couldn't open input data file" ); 29 | data_to_input(fd, (void *)(&data)); 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil3d/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project stencil_syn 2 | 3 | add_files stencil.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | set_top stencil3d 9 | open_solution -reset solution 10 | 11 | set_part virtex7 12 | create_clock -period 10 13 | source ./stencil_dir 14 | 15 | csynth_design 16 | cosim_design -rtl verilog -tool modelsim -trace_level all 17 | 18 | exit 19 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil3d/stencil.h: -------------------------------------------------------------------------------- 1 | /* 2 | Implementation based on algorithm described in: 3 | "Stencil computation optimization and auto-tuning on state-of-the-art multicore architectures" 4 | K. Datta, M. Murphy, V. Volkov, S. Williams, J. Carter, L. Oliker, D. Patterson, J. Shalf, K. Yelick 5 | SC 2008 6 | */ 7 | 8 | #include 9 | #include 10 | #include "support.h" 11 | 12 | //Define input sizes 13 | #define height_size 32 14 | #define col_size 32 15 | #define row_size 16 16 | //Data Bounds 17 | #define TYPE int32_t 18 | #define MAX 1000 19 | #define MIN 1 20 | //Convenience Macros 21 | #define SIZE (row_size * col_size * height_size) 22 | #define INDX(_row_size,_col_size,_i,_j,_k) ((_i)+_row_size*((_j)+_col_size*(_k))) 23 | 24 | void stencil3d(TYPE* host_orig, 25 | TYPE* host_sol, 26 | TYPE* C, 27 | TYPE* orig, 28 | TYPE* sol); 29 | 30 | //////////////////////////////////////////////////////////////////////////////// 31 | // Test harness interface code. 32 | 33 | struct bench_args_t { 34 | TYPE C[2]; 35 | TYPE orig[SIZE]; 36 | TYPE sol[SIZE]; 37 | }; 38 | -------------------------------------------------------------------------------- /MachSuite/stencil/stencil3d/stencil_dir: -------------------------------------------------------------------------------- 1 | #select functional units you want 2 | set_directive_resource -core Mul "stencil3d" mul1 3 | set_directive_resource -core Mul "stencil3d" mul2 4 | 5 | #select memory resources 6 | set_directive_resource -core RAM_1P_BRAM "stencil3d" orig 7 | set_directive_resource -core RAM_1P_BRAM "stencil3d" sol 8 | 9 | #loop pipelining factors 10 | set_directive_pipeline stencil3d/loop_height 11 | set_directive_pipeline stencil3d/loop_col 12 | set_directive_pipeline stencil3d/loop_row 13 | 14 | #loop unrolling 15 | #set_directive_unroll -factor 2 stencil3d/loop_height 16 | #set_directive_unroll -factor 2 stencil3d/loop_col 17 | #set_directive_unroll -factor 2 stencil3d/loop_row 18 | 19 | #Array partitioning 20 | #set_directive_array_partition -factor 2 -type cyclic stencil3d sol 21 | #set_directive_array_partition -factor 2 -type cyclic stencil3d orig 22 | -------------------------------------------------------------------------------- /MachSuite/templates/Makefile: -------------------------------------------------------------------------------- 1 | KERN=FIXME 2 | ALG=FIXME 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c 17 | 18 | hls: $(KERN).c $(KERN).h 19 | vivado_hls hls.tcl 20 | 21 | clean: 22 | rm -f $(KERN) 23 | rm -f generate 24 | -------------------------------------------------------------------------------- /MachSuite/templates/Makefile_template: -------------------------------------------------------------------------------- 1 | radix: radix.c radix.h ../../common/harness.c 2 | $(CC) $(CFLAGS) -o radix radix.c ../../common/harness.c 3 | 4 | clean: 5 | rm -f radix 6 | -------------------------------------------------------------------------------- /MachSuite/templates/generate_template.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "radix.h" 11 | // Fake benchmark function to satisfy the extern 12 | void ss_sort(int a[N], int b[N], int bucket[BUCKETSIZE], int sum[SCAN_RADIX]) {} 13 | 14 | void generate_binary() 15 | { 16 | struct bench_args_t data; 17 | char *ptr; 18 | int status, i, fd, written=0; 19 | 20 | // Fill data structure 21 | srandom(1); 22 | for(i=0; i0 && "Couldn't open input data file" ); 31 | 32 | ptr = (char *) &data; 33 | while( written=0 && "Couldn't write input data file" ); 36 | written += status; 37 | } 38 | } 39 | 40 | int main(int argc, char **argv) 41 | { 42 | generate_binary(); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /MachSuite/templates/harness_interface_template.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // Test harness interface code. 3 | 4 | struct bench_args_t { 5 | int a[N]; 6 | int b[N]; 7 | int bucket[BUCKETSIZE]; 8 | int sum[SCAN_RADIX]; 9 | }; 10 | int INPUT_SIZE = sizeof(struct bench_args_t); 11 | 12 | void ss_sort(int a[N], int b[N], int bucket[BUCKETSIZE], int sum[SCAN_RADIX]); 13 | 14 | void run_benchmark( void *vargs ) { 15 | struct bench_args_t *args = (struct bench_args_t *)vargs; 16 | ss_sort( args->a, args->b, args->bucket, args->sum ); 17 | } 18 | 19 | //////////////////////////////////////////////////////////////////////////////// 20 | -------------------------------------------------------------------------------- /MachSuite/viterbi/viterbi/Makefile: -------------------------------------------------------------------------------- 1 | KERN=viterbi 2 | ALG=viterbi 3 | 4 | CFLAGS?=-O3 -Wall -Wno-unused-label 5 | 6 | SRCS=$(KERN).c local_support.c ../../common/support.c 7 | FILES=$(SRCS) $(KERN).h ../../common/support.h 8 | 9 | $(KERN): $(FILES) ../../common/harness.c 10 | $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c 11 | 12 | run: $(KERN) input.data check.data 13 | ./$(KERN) input.data check.data 14 | 15 | generate: $(FILES) generate.c 16 | $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c -lm 17 | ./generate 18 | 19 | hls: $(KERN).c $(KERN).h 20 | vivado_hls hls.tcl 21 | 22 | clean: 23 | rm -f $(KERN) generate output.data 24 | 25 | ACCEL_NAME = viterbi_viterbi 26 | TEST_BIN = $(ACCEL_NAME) 27 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 28 | ifndef WORKLOAD 29 | export WORKLOAD=viterbi 30 | endif 31 | include ../../common/Makefile.tracer 32 | include ../../common/Makefile.gem5 33 | -------------------------------------------------------------------------------- /MachSuite/viterbi/viterbi/check.data: -------------------------------------------------------------------------------- 1 | %% 2 | 27 3 | 27 4 | 27 5 | 27 6 | 27 7 | 31 8 | 63 9 | 63 10 | 63 11 | 63 12 | 47 13 | 4 14 | 38 15 | 38 16 | 38 17 | 38 18 | 7 19 | 7 20 | 7 21 | 7 22 | 7 23 | 7 24 | 7 25 | 7 26 | 2 27 | 2 28 | 2 29 | 43 30 | 52 31 | 52 32 | 43 33 | 43 34 | 43 35 | 43 36 | 43 37 | 44 38 | 44 39 | 32 40 | 9 41 | 9 42 | 15 43 | 45 44 | 45 45 | 45 46 | 45 47 | 45 48 | 45 49 | 0 50 | 55 51 | 55 52 | 55 53 | 30 54 | 13 55 | 13 56 | 13 57 | 13 58 | 13 59 | 13 60 | 57 61 | 57 62 | 21 63 | 21 64 | 21 65 | 21 66 | 7 67 | 41 68 | 41 69 | 41 70 | 41 71 | 17 72 | 17 73 | 30 74 | 41 75 | 41 76 | 58 77 | 58 78 | 58 79 | 31 80 | 54 81 | 54 82 | 54 83 | 54 84 | 54 85 | 54 86 | 54 87 | 54 88 | 54 89 | 54 90 | 54 91 | 54 92 | 52 93 | 52 94 | 52 95 | 21 96 | 21 97 | 21 98 | 28 99 | 18 100 | 18 101 | 40 102 | 40 103 | 40 104 | 40 105 | 40 106 | 40 107 | 46 108 | 46 109 | 2 110 | 2 111 | 2 112 | 53 113 | 53 114 | 53 115 | 55 116 | 38 117 | 57 118 | 57 119 | 57 120 | 57 121 | 57 122 | 57 123 | 57 124 | 57 125 | 57 126 | 57 127 | 30 128 | 30 129 | 5 130 | 5 131 | 5 132 | 5 133 | 5 134 | 5 135 | 5 136 | 5 137 | 30 138 | 30 139 | 26 140 | 38 141 | 38 142 | -------------------------------------------------------------------------------- /MachSuite/viterbi/viterbi/hls.tcl: -------------------------------------------------------------------------------- 1 | open_project viterbi_syn 2 | 3 | add_files viterbi.c 4 | add_files input.data 5 | add_files check.data 6 | add_files -tb ../../common/harness.c 7 | 8 | #add_files -tb viterbi_test.c 9 | 10 | set_top viterbi 11 | 12 | open_solution -reset solution 13 | set_part virtex7 14 | create_clock -period 10 15 | 16 | #source ./viterbi_dir 17 | #config_rtl -reset all -reset_level low 18 | 19 | csynth_design 20 | cosim_design -rtl verilog -tool modelsim 21 | 22 | exit 23 | -------------------------------------------------------------------------------- /MachSuite/viterbi/viterbi/viterbi.h: -------------------------------------------------------------------------------- 1 | /* 2 | Based on: 3 | Lawrence Rabiner. "A Tutorial on Hidden Markov Models and Selected Applications in Speech Recognition." Proc. IEEE, v77, #2. 1989. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "support.h" 11 | 12 | #define TYPE double 13 | typedef uint8_t tok_t; 14 | typedef TYPE prob_t; 15 | typedef uint8_t state_t; 16 | typedef int32_t step_t; 17 | 18 | //#define N_STATES 5 19 | //#define N_OBS 32 20 | //#define N_TOKENS 9 21 | #define N_STATES 64 22 | #define N_OBS 140 23 | #define N_TOKENS 64 24 | 25 | int viterbi(prob_t* host_init, 26 | prob_t* host_transition, 27 | prob_t* host_emission, 28 | state_t* host_path, 29 | tok_t* obs, 30 | prob_t* init, 31 | prob_t* transition, 32 | prob_t* emission, 33 | state_t* path); 34 | 35 | //////////////////////////////////////////////////////////////////////////////// 36 | // Test harness interface code. 37 | 38 | struct bench_args_t { 39 | tok_t obs[N_OBS]; 40 | prob_t init[N_STATES]; 41 | prob_t transition[N_STATES*N_STATES]; 42 | prob_t emission[N_STATES*N_TOKENS]; 43 | state_t path[N_OBS]; 44 | }; 45 | -------------------------------------------------------------------------------- /MachSuite/viterbi/viterbi/viterbi_dir: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harvard-acc/ALADDIN/b4444c38e1bc83a6763a92205c24778c1e12dfc1/MachSuite/viterbi/viterbi/viterbi_dir -------------------------------------------------------------------------------- /MachSuite/viterbi/viterbi/viterbi_test.c: -------------------------------------------------------------------------------- 1 | #include "viterbi.h" 2 | 3 | float RR(){ 4 | float x = (float)((float)rand()/(float)RAND_MAX); 5 | return x; 6 | } 7 | 8 | int main() { 9 | int i, j, k; 10 | int Obs[numObs]; 11 | float transMat[numStates*numObs], obsLik[numStates*numObs]; 12 | int finalState; 13 | finalState = 2; 14 | 15 | srandom(1); 16 | for(i=0;i 2 | #include 3 | 4 | #define TYPE int 5 | #define ROWSIZE 32 6 | #define N ROWSIZE*ROWSIZE 7 | #define BLOCKSIZE 8 8 | #define NUMOFBLOCKS N/BLOCKSIZE/BLOCKSIZE 9 | -------------------------------------------------------------------------------- /SHOC/common/Makefile.common: -------------------------------------------------------------------------------- 1 | .PHONY: all clean run 2 | 3 | all: $(ACCEL_NAME) 4 | 5 | $(ACCEL_NAME): $(SRCS) 6 | $(CC) $(CFLAGS) -o $(ACCEL_NAME) $(SRCS) 7 | 8 | run: $(ACCEL_NAME) 9 | ./$(ACCEL_NAME) 10 | 11 | clean: 12 | rm -rf $(ACCEL_NAME) 13 | -------------------------------------------------------------------------------- /SHOC/fft/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=fft.c 2 | 3 | ACCEL_NAME = fft 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=fft1D_512 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/md/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=md.c 2 | 3 | ACCEL_NAME = md 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=md 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/md/md.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define TYPE int 5 | 6 | // Problem Constants 7 | #define cutsq 160 // Square of cutoff distance 8 | #define nAtoms 32 9 | #define maxNeighbors 32 // Max number of nearest neighbors 10 | #define domainEdge 200 // Edge length of the cubic domain 11 | #define lj1 15 // LJ constants 12 | #define lj2 20 13 | -------------------------------------------------------------------------------- /SHOC/pp_scan/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=pp_scan.c 2 | 3 | ACCEL_NAME = pp_scan 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=pp_scan 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/pp_scan/pp_scan.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define TYPE int 5 | 6 | #define N 2048 7 | #define SCAN_BLOCK 16 8 | 9 | #define SCAN_RADIX N/SCAN_BLOCK 10 | #define BUCKETSIZE SCAN_BLOCK*SCAN_RADIX 11 | -------------------------------------------------------------------------------- /SHOC/reduction/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=reduction.c 2 | 3 | ACCEL_NAME = reduction 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=reduction 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/reduction/reduction.c: -------------------------------------------------------------------------------- 1 | #include "reduction.h" 2 | 3 | #ifdef DMA_MODE 4 | #include "gem5/dma_interface.h" 5 | #endif 6 | 7 | int reduction(int *in) 8 | { 9 | int i = 0; int sum = 0; 10 | #ifdef DMA_MODE 11 | dmaLoad(&in[0], 0, NUM*sizeof(int)); 12 | #endif 13 | sum:for (i = 0; i < NUM; i++) 14 | sum += in[i]; 15 | 16 | return sum; 17 | } 18 | 19 | 20 | int main() 21 | { 22 | int *in; 23 | in = (int *) malloc (sizeof(int) * NUM ); 24 | 25 | int i; 26 | int max = 2147483646; 27 | int min = 0; 28 | srand(8650341L); 29 | for (i = 0; i < NUM; i++) 30 | { 31 | in[i] = (int)(min + rand() * 1.0 * (max - min) / (RAND_MAX )); 32 | } 33 | 34 | #ifdef GEM5 35 | resetGem5Stats(); 36 | #endif 37 | int sum = reduction(&in[0]); 38 | #ifdef GEM5 39 | dumpGem5Stats("reduction"); 40 | #endif 41 | printf("sum: %d\n", sum); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /SHOC/reduction/reduction.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define NUM 2048 5 | -------------------------------------------------------------------------------- /SHOC/ss_sort/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=ss_sort.c 2 | 3 | ACCEL_NAME = ss_sort 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=ss_sort 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/ss_sort/ss_sort.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define TYPE int 5 | 6 | #define N 2048 7 | #define NUMOFBLOCKS 512 8 | 9 | #define ELEMENTSPERBLOCK 4 10 | #define RADIXSIZE 4 11 | #define BUCKETSIZE NUMOFBLOCKS*RADIXSIZE 12 | #define MASK 0x3 13 | //SCAN_BLOCK * SCAN_RADIX = BUCKETSIZE 14 | 15 | #define SCAN_BLOCK 16 16 | #define SCAN_RADIX BUCKETSIZE/SCAN_BLOCK 17 | -------------------------------------------------------------------------------- /SHOC/stencil/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=stencil.c 2 | 3 | ACCEL_NAME = stencil 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=stencil 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/stencil/stencil.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #define N 32 4 | -------------------------------------------------------------------------------- /SHOC/triad/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=triad.c 2 | 3 | ACCEL_NAME = triad 4 | TEST_BIN = $(ACCEL_NAME) 5 | export TRACE_OUTPUT_DIR=$(ACCEL_NAME) 6 | ifndef WORKLOAD 7 | export WORKLOAD=triad 8 | endif 9 | include ../common/Makefile.common 10 | include ../common/Makefile.tracer 11 | -------------------------------------------------------------------------------- /SHOC/triad/example/config_example: -------------------------------------------------------------------------------- 1 | partition,cyclic,a,8192,4,2 2 | partition,cyclic,b,8192,4,2 3 | partition,cyclic,c,8192,4,2 4 | unrolling,triad,triad,2 5 | pipeline,triad,1 6 | cycle_time,6 7 | -------------------------------------------------------------------------------- /SHOC/triad/triad.c: -------------------------------------------------------------------------------- 1 | #include "triad.h" 2 | 3 | #ifdef DMA_MODE 4 | #include "gem5/dma_interface.h" 5 | #endif 6 | 7 | void triad(int *a,int *b, int *c, int s){ 8 | #ifdef DMA_MODE 9 | dmaLoad(&a[0], 0*1024*sizeof(int), PAGE_SIZE); 10 | dmaLoad(&a[0], 1*1024*sizeof(int), PAGE_SIZE); 11 | dmaLoad(&b[0], 0*1024*sizeof(int), PAGE_SIZE); 12 | dmaLoad(&b[0], 1*1024*sizeof(int), PAGE_SIZE); 13 | #endif 14 | int i; 15 | triad:for(i=0;i 2 | #include 3 | #include 4 | #include 5 | #define NUM 2048 6 | 7 | void triad(int *a,int *b, int *c, int s); 8 | -------------------------------------------------------------------------------- /common/AladdinExceptions.h: -------------------------------------------------------------------------------- 1 | #ifndef _ALADDIN_EXCEPTIONS_H_ 2 | #define _ALADDIN_EXCEPTIONS_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "typedefs.h" 8 | 9 | class ExecNode; 10 | 11 | class AladdinException : public std::runtime_error { 12 | public: 13 | AladdinException(const std::string& message); 14 | AladdinException(const ExecNode* node, const std::string& message); 15 | }; 16 | 17 | class VirtualAddrLookupException : public AladdinException { 18 | public: 19 | VirtualAddrLookupException(const std::string& array_name); 20 | 21 | protected: 22 | static const std::string helpfulSuggestion; 23 | }; 24 | 25 | class UnknownArrayException : public AladdinException { 26 | public: 27 | UnknownArrayException(const std::string& array_name); 28 | UnknownArrayException(Addr array_addr); 29 | 30 | protected: 31 | static const std::string helpfulSuggestion; 32 | }; 33 | 34 | class IllegalHostMemoryAccessException : public AladdinException { 35 | public: 36 | IllegalHostMemoryAccessException(const std::string& array_name); 37 | IllegalHostMemoryAccessException(const ExecNode* node); 38 | 39 | protected: 40 | static const std::string helpfulSuggestion; 41 | }; 42 | 43 | class ArrayAccessException : public AladdinException { 44 | public: 45 | ArrayAccessException(const std::string& message); 46 | }; 47 | 48 | class AddressTranslationException : public AladdinException { 49 | public: 50 | AddressTranslationException(Addr vaddr, unsigned size); 51 | }; 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /common/DatabaseConfig.cpp: -------------------------------------------------------------------------------- 1 | #include "DatabaseConfig.h" 2 | 3 | const char* DB_URL = "localhost"; 4 | const char* DB_USER = "user"; 5 | const char* DB_PASS = "password"; 6 | -------------------------------------------------------------------------------- /common/DatabaseConfig.h: -------------------------------------------------------------------------------- 1 | #ifndef __DATABASE_CONFIG_H__ 2 | #define __DATABASE_CONFIG_H__ 3 | 4 | extern const char* DB_URL; 5 | extern const char* DB_USER; 6 | extern const char* DB_PASS; 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /common/DatabaseDeps.h: -------------------------------------------------------------------------------- 1 | #ifndef __DATABASE_DEPS_H__ 2 | #define __DATABASE_DEPS_H__ 3 | 4 | // All MySQL database headers needed. Any file needing database support only 5 | // needs to include this header. 6 | 7 | #ifdef USE_DB 8 | #include "mysql_connection.h" 9 | #include "mysql_driver.h" 10 | #include "cppconn/driver.h" 11 | #include "cppconn/exception.h" 12 | #include "cppconn/resultset.h" 13 | #include "cppconn/statement.h" 14 | #include "DatabaseConfig.h" 15 | #endif 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /common/MemoryType.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEMORY_TYPE_H__ 2 | #define __MEMORY_TYPE_H__ 3 | 4 | // Defines how an array is mapped to the accelerator and how it can be accessed 5 | // via the host. 6 | typedef enum _MemoryType { spad, reg, dma, acp, cache } MemoryType; 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /common/Partition.cpp: -------------------------------------------------------------------------------- 1 | #include "Partition.h" 2 | 3 | // TODO: There isn't any reason why this can't be done in the constructor, so 4 | // put it there. 5 | void Partition::setSize(unsigned _size, unsigned _word_size) { 6 | size = _size; 7 | word_size = _word_size; 8 | num_words = size/word_size; 9 | data.resize(num_words); 10 | for (auto& blk : data) 11 | blk = new uint8_t[word_size]; 12 | } 13 | 14 | Partition::~Partition() { 15 | for (auto& blk : data) { 16 | if (blk) 17 | delete[] blk; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /common/ReadyPartition.cpp: -------------------------------------------------------------------------------- 1 | #include "ReadyPartition.h" 2 | ReadyPartition::ReadyPartition() : Partition() {} 3 | 4 | ReadyPartition::~ReadyPartition() {} 5 | 6 | void ReadyPartition::setSize(unsigned _size, unsigned _word_size) { 7 | Partition::setSize(_size, _word_size); 8 | ready_bits.resize(num_words); 9 | } 10 | -------------------------------------------------------------------------------- /common/SourceManager.cpp: -------------------------------------------------------------------------------- 1 | #include "SourceEntity.h" 2 | #include "SourceManager.h" 3 | #include "DynamicEntity.h" 4 | 5 | #include 6 | 7 | namespace SrcTypes { 8 | 9 | const src_id_t InvalidId = std::numeric_limits::max(); 10 | 11 | template<> 12 | std::string SourceManager::get_type_prefix() const { 13 | return "F"; 14 | }; 15 | 16 | template<> 17 | std::string SourceManager::get_type_prefix() const { 18 | return "V"; 19 | }; 20 | 21 | template<> 22 | std::string SourceManager::get_type_prefix() const { 23 | return "I"; 24 | }; 25 | 26 | template<> 27 | std::string SourceManager::get_type_prefix