├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt ├── src ├── haoda │ ├── backend │ │ └── xilinx.py │ ├── ir │ │ ├── __init__.py │ │ ├── arithmetic │ │ │ ├── __init__.py │ │ │ └── base.py │ │ └── visitor.py │ └── util.py ├── soda │ ├── codegen │ │ └── xilinx │ │ │ ├── header.py │ │ │ ├── hls_kernel.py │ │ │ ├── host.py │ │ │ ├── opencl.py │ │ │ └── rtl_kernel.py │ ├── core.py │ ├── dataflow.py │ ├── grammar.py │ ├── mutator.py │ ├── util.py │ └── visitor.py └── sodac └── tests ├── src ├── blur.soda ├── denoise2d.soda ├── denoise3d.soda ├── heat3d.soda ├── jacobi2d.soda ├── jacobi3d.soda ├── seidel2d.soda └── sobel2d.soda └── test-compilation.sh /.gitignore: -------------------------------------------------------------------------------- 1 | sdaccel_profile_summary.csv 2 | sdaccel_profile_summary.html 3 | .Xil/ 4 | __pycache__ 5 | *.pyc 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 UCLA-VAST 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repo is archived. For latest version, please see https://github.com/UCLA-VAST/soda. 2 | 3 | # SODA Compiler 4 | Stencil with Optimized Dataflow Architecture Compiler 5 | 6 | ## Publication 7 | 8 | + Yuze Chi, Jason Cong, Peng Wei, Peipei Zhou. [SODA: Stencil with Optimized Dataflow Architecture](https://doi.org/10.1145/3240765.3240850). In *ICCAD*, 2018. (Best Paper Candidate) [[PDF]](https://about.blaok.me/pub/iccad18.pdf) [[Slides]](https://about.blaok.me/pub/iccad18.slides.pdf) 9 | 10 | ## SODA DSL Example 11 | 12 | # comments start with hashtag(#) 13 | 14 | kernel: blur # the kernel name, will be used as the kernel name in HLS 15 | burst width: 512 # DRAM burst I/O width in bits, for Xilinx platform by default it's 512 16 | unroll factor: 16 # how many pixels are generated per cycle 17 | 18 | # specify the dram bank, type, name, and dimension of the input tile 19 | # the last dimension is not needed and a placeholder '*' must be given 20 | # dram bank is optional 21 | # multiple inputs can be specified but 1 and only 1 must specify the dimensions 22 | input dram 1 uint16: input(2000, *) 23 | 24 | # specify an intermediate stage of computation, may appear 0 or more times 25 | local uint16: blur_x(0, 0) = (input(0, 0) + input(0, 1) + input(0, 2)) / 3 26 | 27 | # specify the output 28 | # dram bank is optional 29 | output dram 1 uint16: blur_y(0, 0) = (blur_x(0, 0) + blur_x(1, 0) + blur_x(2, 0)) / 3 30 | 31 | # how many times the whole computation is repeated (only works if input matches output) 32 | iterate: 1 33 | 34 | ## Getting Started 35 | 36 | ### Prerequisites 37 | 38 | + Python 3.5+ and corresponding `pip` 39 | + SDAccel 2018.3 (earlier versions might work but won't be supported) 40 | 41 |
How to install Python 3.5+ on Ubuntu 16.04+ and CentOS 7? 42 | 43 | #### Ubuntu 16.04+ 44 | ```bash 45 | sudo apt install python3 python3-pip 46 | ``` 47 | 48 | #### CentOS 7 49 | ```bash 50 | sudo yum install python36 python36-pip 51 | sudo alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 100 52 | ``` 53 | 54 |
55 | 56 | ### Clone the Repo 57 | git clone https://github.com/UCLA-VAST/soda-compiler.git 58 | cd soda-compiler 59 | python3 -m pip install --user -r requirements.txt 60 | 61 | ### Parameter Setup 62 | app=blur 63 | platform=xilinx_u200_xdma_201830_1 64 | # The following can be set via sourcing /path/to/xilinx/sdx/settings64.sh 65 | XILINX_SDX=/path/to/xilinx/sdx 66 | XILINX_VIVADO=/path/to/xilinx/vivado 67 | 68 | ### Generate HLS Kernel Code 69 | src/sodac tests/src/${app}.soda --xocl-kernel ${app}_kernel.cpp 70 | 71 | ### Generate OpenCL Host Code 72 | src/sodac tests/src/${app}.soda --xocl-header ${app}.h 73 | src/sodac tests/src/${app}.soda --xocl-host ${app}.cpp 74 | 75 | ### Create Testbench 76 | cat >${app}_run.cpp < 78 | #include 79 | 80 | #include "${app}.h" 81 | 82 | int ${app}_test(const char* xclbin, const int dims[4]); 83 | int main(int argc, char **argv) { 84 | if (argc != 4) { 85 | fprintf(stderr, "Usage: \n %s \n", argv[0]); 86 | return 1; 87 | } 88 | int dims[4] = {atoi(argv[2]), atoi(argv[3]), 0, 0}; 89 | return ${app}_test(argv[1], dims); 90 | } 91 | EOF 92 | 93 | ### Compile OpenCL Host Executable 94 | # Please set TILE_SIZE_DIM_0 and UNROLL_FACTOR macros to match the kernel. 95 | g++ -std=c++11 -I${XILINX_SDX}/runtime/include -I${XILINX_VIVADO}/include ${app}.cpp ${app}_run.cpp -o ${app} \ 96 | -lxilinxopencl -DTILE_SIZE_DIM_0=2000 -DUNROLL_FACTOR=2 -fopenmp -Wno-deprecated-declarations -Wall 97 | 98 | ### Create Emulation Config 99 | emconfigutil -f ${platform} 100 | 101 | ### Software Emulation 102 | 103 | #### Compile for Software Emulation 104 | xocc -t sw_emu -f ${platform} --kernel ${app}_kernel --xp prop:kernel.${app}_kernel.kernel_flags="-std=c++0x" \ 105 | -c ${app}_kernel.cpp -o ${app}.sw_emu.xo 106 | 107 | #### Link for Software Emulation 108 | xocc -t sw_emu -f ${platform} -l ${app}.sw_emu.xo -o ${app}.sw_emu.xclbin 109 | 110 | #### Run Software Emulation 111 | XCL_EMULATION_MODE=sw_emu ./${app} ${app}.sw_emu.xclbin 2000 100 112 | 113 | ### High-Level Synthesis 114 | xocc -t hw -f ${platform} --kernel ${app}_kernel --xp prop:kernel.${app}_kernel.kernel_flags="-std=c++0x" \ 115 | -c ${app}_kernel.cpp -o ${app}.hw.xo 116 | 117 | ### Hardware Emulation 118 | 119 | #### Link for Hardware Emulation 120 | xocc -t hw_emu -f ${platform} -l ${app}.hw.xo -o ${app}.hw_emu.xclbin 121 | 122 | #### Run Hardware Emulation 123 | # By default, kernel ports are connected via DRAM bank 1 on the xilinx_u200_xdma_201830_1 platform. 124 | DRAM_IN=1 DRAM_OUT=1 XCL_EMULATION_MODE=hw_emu ./${app} ${app}.hw_emu.xclbin 2000 10 125 | 126 | ### Hardware Deployment 127 | 128 | #### Logic Synthesis, Place, and Route 129 | xocc -t hw -f ${platform} -l ${app}.hw.xo -o ${app}.hw.xclbin 130 | 131 | #### Run Bitstream on FPGA 132 | # By default, kernel ports are connected via DRAM bank 1 on the xilinx_u200_xdma_201830_1 platform. 133 | DRAM_IN=1 DRAM_OUT=1 ./${app} ${app}.hw.xclbin 2000 1000 134 | 135 | ## Code Snippet Example 136 | 137 | ### Source Code 138 | 139 | kernel: jacobi2d 140 | burst width: 512 141 | unroll factor: 2 142 | input float: t1(2000, *) 143 | output float: t0(0, 0) = (t1(0, 1) + t1(1, 0) + t1(0, 0) + t1(0, -1) + t1(-1, 0)) * 0.2f 144 | iterate: 1 145 | 146 | ### HLS Kernel Code 147 | Each function in the below code snippets is synthesized into an RTL module. 148 | Their arguments are all `hls::stream` FIFOs; Without unrolling, a simple line-buffer pipeline is generated, producing 1 pixel per cycle. 149 | With unrolling, a SODA microarchitecture pipeline is generated, procuding 2 pixeles per cycle. 150 | 151 | #### Without Unrolling (`--unroll-factor=1`) 152 | 153 | #pragma HLS dataflow 154 | Module1Func( 155 | /*output*/ &from_t1_offset_0_to_t1_offset_1999, 156 | /*output*/ &from_t1_offset_0_to_t0_pe_0, 157 | /* input*/ &from_super_source_to_t1_offset_0); 158 | Module2Func( 159 | /*output*/ &from_t1_offset_1999_to_t1_offset_2000, 160 | /*output*/ &from_t1_offset_1999_to_t0_pe_0, 161 | /* input*/ &from_t1_offset_0_to_t1_offset_1999); 162 | Module3Func( 163 | /*output*/ &from_t1_offset_2000_to_t1_offset_2001, 164 | /*output*/ &from_t1_offset_2000_to_t0_pe_0, 165 | /* input*/ &from_t1_offset_1999_to_t1_offset_2000); 166 | Module3Func( 167 | /*output*/ &from_t1_offset_2001_to_t1_offset_4000, 168 | /*output*/ &from_t1_offset_2001_to_t0_pe_0, 169 | /* input*/ &from_t1_offset_2000_to_t1_offset_2001); 170 | Module4Func( 171 | /*output*/ &from_t1_offset_4000_to_t0_pe_0, 172 | /* input*/ &from_t1_offset_2001_to_t1_offset_4000); 173 | Module5Func( 174 | /*output*/ &from_t0_pe_0_to_super_sink, 175 | /* input*/ &from_t1_offset_0_to_t0_pe_0, 176 | /* input*/ &from_t1_offset_1999_to_t0_pe_0, 177 | /* input*/ &from_t1_offset_2000_to_t0_pe_0, 178 | /* input*/ &from_t1_offset_4000_to_t0_pe_0, 179 | /* input*/ &from_t1_offset_2001_to_t0_pe_0); 180 | 181 | In the above code snippet, `Module1Func` to `Module4Func` are forwarding modules; they constitute the data-reuse line buffer. 182 | The line buffer size is approximately two lines of pixels, i.e. 4000 pixels. 183 | `Module5Func` is a computing module; it implements the computation kernel. 184 | The whole design is fully pipelined; however, with only 1 computing module, it can only produce 1 pixel per cycle. 185 | 186 | #### Unroll 2 Times (`--unroll-factor=2`) 187 | 188 | #pragma HLS dataflow 189 | Module1Func( 190 | /*output*/ &from_t1_offset_1_to_t1_offset_1999, 191 | /*output*/ &from_t1_offset_1_to_t0_pe_0, 192 | /* input*/ &from_super_source_to_t1_offset_1); 193 | Module1Func( 194 | /*output*/ &from_t1_offset_0_to_t1_offset_2000, 195 | /*output*/ &from_t1_offset_0_to_t0_pe_1, 196 | /* input*/ &from_super_source_to_t1_offset_0); 197 | Module2Func( 198 | /*output*/ &from_t1_offset_1999_to_t1_offset_2001, 199 | /*output*/ &from_t1_offset_1999_to_t0_pe_1, 200 | /* input*/ &from_t1_offset_1_to_t1_offset_1999); 201 | Module3Func( 202 | /*output*/ &from_t1_offset_2000_to_t1_offset_2002, 203 | /*output*/ &from_t1_offset_2000_to_t0_pe_1, 204 | /*output*/ &from_t1_offset_2000_to_t0_pe_0, 205 | /* input*/ &from_t1_offset_0_to_t1_offset_2000); 206 | Module4Func( 207 | /*output*/ &from_t1_offset_2001_to_t1_offset_4001, 208 | /*output*/ &from_t1_offset_2001_to_t0_pe_1, 209 | /*output*/ &from_t1_offset_2001_to_t0_pe_0, 210 | /* input*/ &from_t1_offset_1999_to_t1_offset_2001); 211 | Module5Func( 212 | /*output*/ &from_t1_offset_2002_to_t1_offset_4000, 213 | /*output*/ &from_t1_offset_2002_to_t0_pe_0, 214 | /* input*/ &from_t1_offset_2000_to_t1_offset_2002); 215 | Module6Func( 216 | /*output*/ &from_t1_offset_4001_to_t0_pe_0, 217 | /* input*/ &from_t1_offset_2001_to_t1_offset_4001); 218 | Module7Func( 219 | /*output*/ &from_t0_pe_0_to_super_sink, 220 | /* input*/ &from_t1_offset_1_to_t0_pe_0, 221 | /* input*/ &from_t1_offset_2000_to_t0_pe_0, 222 | /* input*/ &from_t1_offset_2001_to_t0_pe_0, 223 | /* input*/ &from_t1_offset_4001_to_t0_pe_0, 224 | /* input*/ &from_t1_offset_2002_to_t0_pe_0); 225 | Module8Func( 226 | /*output*/ &from_t1_offset_4000_to_t0_pe_1, 227 | /* input*/ &from_t1_offset_2002_to_t1_offset_4000); 228 | Module7Func( 229 | /*output*/ &from_t0_pe_1_to_super_sink, 230 | /* input*/ &from_t1_offset_0_to_t0_pe_1, 231 | /* input*/ &from_t1_offset_1999_to_t0_pe_1, 232 | /* input*/ &from_t1_offset_2000_to_t0_pe_1, 233 | /* input*/ &from_t1_offset_4000_to_t0_pe_1, 234 | /* input*/ &from_t1_offset_2001_to_t0_pe_1); 235 | 236 | In the above code snippet, `Module1Func` to `Module6Func` and `Module8Func` are forwarding modules; they constitute the reuse buffers of the SODA microarchitecture. 237 | Although unrolled, the reuse buffer size is still approximately two lines of pixels, i.e. 4000 pixels. 238 | `Module7Func` is a computing module; it is instanciated twice. 239 | The whole design is fully pipelined and can produce 2 pixel per cycle. 240 | In general, the unroll factor can be set to any number that satisfies the throughput requirement. 241 | 242 | ## Design Considerations 243 | 244 | + `kernel`, `burst width`, `unroll factor`, `input`, `output`, and `iterate` keywords are mandatory 245 | + For non-iterative stencil, `unroll factor` shall be determined by the DRAM bandwidth, i.e. saturate the external bandwidth, since the resource is usually not the bottleneck 246 | + For iterative stencil, prefer to use more PEs in a single iteration rather than implement more iterations 247 | + Note that `2.0` will be a `double` number. To generate `float`, use `2.0f`. This may help reduce DSP usage 248 | + SODA is tiling-based and the size of the tile is specified in the `input` keyword. The last dimension is a placeholder because it is not needed in the reuse buffer generation 249 | 250 | ## Projects Using SODA 251 | 252 | + Yi-Hsiang Lai, Yuze Chi, Yuwei Hu, Jie Wang, Cody Hao Yu, Yuan Zhou, Jason Cong, Zhiru Zhang. [HeteroCL: A Multi-Paradigm Programming Infrastructure for Software-Defined Reconfigurable Computing](https://doi.org/10.1145/3289602.3293910). In *FPGA*, 2019. (Best Paper Candidate) [[PDF]](https://about.blaok.me/pub/fpga19-heterocl.pdf) [[Slides]](https://about.blaok.me/pub/fpga19-heterocl.slides.pdf) 253 | + Yuze Chi, Young-kyu Choi, Jason Cong, Jie Wang. [Rapid Cycle-Accurate Simulator for High-Level Synthesis](https://doi.org/10.1145/3289602.3293918). In *FPGA*, 2019. [[PDF]](https://about.blaok.me/pub/fpga19-flash.pdf) [[Slides]](https://about.blaok.me/pub/fpga19-flash.slides.pdf) 254 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | textx 2 | cached_property 3 | -------------------------------------------------------------------------------- /src/haoda/backend/xilinx.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import collections 3 | import logging 4 | import os 5 | import subprocess 6 | import tarfile 7 | import tempfile 8 | import xml.etree.ElementTree as ET 9 | import zipfile 10 | 11 | from haoda import util 12 | 13 | _logger = logging.getLogger().getChild(__name__) 14 | 15 | class Vivado(subprocess.Popen): 16 | """Call vivado with the given tcl commands and arguments. 17 | 18 | Args: 19 | commands: string of tcl commands 20 | args: sequence of arguments 21 | """ 22 | def __init__(self, commands, *args): 23 | self.cwd = tempfile.TemporaryDirectory(prefix='vivado-') 24 | self.tcl_file = open(os.path.join(self.cwd.name, 'tcl'), mode='w+') 25 | self.tcl_file.write(commands) 26 | self.tcl_file.flush() 27 | cmd_args = ['vivado', '-mode', 'batch', '-source', self.tcl_file.name, 28 | '-nojournal', '-nolog', '-tclargs', *args] 29 | pipe_args = {'stdout' : subprocess.PIPE, 'stderr' : subprocess.PIPE} 30 | super().__init__(cmd_args, cwd=self.cwd.name, **pipe_args) 31 | 32 | def __exit__(self, *args): 33 | super().__exit__(*args) 34 | self.tcl_file.close() 35 | self.cwd.cleanup() 36 | 37 | class VivadoHls(subprocess.Popen): 38 | """Call vivado_hls with the given tcl commands. 39 | 40 | Args: 41 | commands: string of tcl commands 42 | """ 43 | def __init__(self, commands): 44 | self.cwd = tempfile.TemporaryDirectory(prefix='vivado-hls-') 45 | self.tcl_file = open(os.path.join(self.cwd.name, 'tcl'), mode='w+') 46 | self.tcl_file.write(commands) 47 | self.tcl_file.flush() 48 | cmd_args = ['vivado_hls', '-f', self.tcl_file.name, '-l', '/dev/null'] 49 | pipe_args = {'stdout' : subprocess.PIPE, 'stderr' : subprocess.PIPE} 50 | super().__init__(cmd_args, cwd=self.cwd.name, **pipe_args) 51 | 52 | def __exit__(self, *args): 53 | super().__exit__(*args) 54 | self.tcl_file.close() 55 | self.cwd.cleanup() 56 | 57 | PACKAGEXO_COMMANDS = r''' 58 | set tmp_ip_dir "{tmpdir}/tmp_ip_dir" 59 | set tmp_project "{tmpdir}/tmp_project" 60 | 61 | create_project -force kernel_pack ${{tmp_project}} 62 | add_files -norecurse [glob {hdl_dir}/*.v] 63 | foreach tcl_file [glob -nocomplain {hdl_dir}/*.tcl] {{ 64 | source ${{tcl_file}} 65 | }} 66 | update_compile_order -fileset sources_1 67 | update_compile_order -fileset sim_1 68 | ipx::package_project -root_dir ${{tmp_ip_dir}} -vendor xilinx.com -library RTLKernel -taxonomy /KernelIP -import_files -set_current false 69 | ipx::unload_core ${{tmp_ip_dir}}/component.xml 70 | ipx::edit_ip_in_project -upgrade true -name tmp_edit_project -directory ${{tmp_ip_dir}} ${{tmp_ip_dir}}/component.xml 71 | set_property core_revision 2 [ipx::current_core] 72 | foreach up [ipx::get_user_parameters] {{ 73 | ipx::remove_user_parameter [get_property NAME ${{up}}] [ipx::current_core] 74 | }} 75 | set_property sdx_kernel true [ipx::current_core] 76 | set_property sdx_kernel_type rtl [ipx::current_core] 77 | ipx::create_xgui_files [ipx::current_core] 78 | {bus_ifaces} 79 | ipx::associate_bus_interfaces -busif s_axi_control -clock ap_clk [ipx::current_core] 80 | set_property xpm_libraries {{XPM_CDC XPM_MEMORY XPM_FIFO}} [ipx::current_core] 81 | set_property supported_families {{ }} [ipx::current_core] 82 | set_property auto_family_support_level level_2 [ipx::current_core] 83 | ipx::update_checksums [ipx::current_core] 84 | ipx::save_core [ipx::current_core] 85 | close_project -delete 86 | 87 | package_xo -force -xo_path "{xo_file}" -kernel_name {top_name} -ip_directory ${{tmp_ip_dir}} -kernel_xml {kernel_xml}{cpp_kernels} 88 | ''' 89 | 90 | class PackageXo(Vivado): 91 | """Packages the given files into a Xilinx hardware object. 92 | 93 | Args: 94 | xo_file: name of the generated xo file. 95 | top_name: top-level module name. 96 | kernel_xml: xml description of the kernel. 97 | hdl_dir: directory of all HDL files. 98 | m_axi_names: variable names connected to the m_axi bus. 99 | cpp_kernels: sequence of file names of C++ kernels. 100 | """ 101 | def __init__(self, xo_file, top_name, kernel_xml, hdl_dir, m_axi_names, 102 | cpp_kernels=()): 103 | self.tmpdir = tempfile.TemporaryDirectory(prefix='package-xo-') 104 | if _logger.isEnabledFor(logging.INFO): 105 | for _, _, files in os.walk(hdl_dir): 106 | for filename in files: 107 | _logger.info('packing: %s', filename) 108 | kwargs = { 109 | 'top_name' : top_name, 110 | 'kernel_xml' : kernel_xml, 111 | 'hdl_dir' : hdl_dir, 112 | 'xo_file' : xo_file, 113 | 'bus_ifaces' : '\n'.join(map( 114 | 'ipx::associate_bus_interfaces -busif m_axi_{} -clock ap_clk ' 115 | '[ipx::current_core]'.format, m_axi_names)), 116 | 'tmpdir' : self.tmpdir.name, 117 | 'cpp_kernels' : ''.join(map(' -kernel_files {}'.format, cpp_kernels)) 118 | } 119 | super().__init__(PACKAGEXO_COMMANDS.format(**kwargs)) 120 | 121 | def __exit__(self, *args): 122 | super().__exit__(*args) 123 | self.tmpdir.cleanup() 124 | 125 | HLS_COMMANDS = r''' 126 | cd "{project_dir}" 127 | open_project "{project_name}" 128 | set_top {top_name} 129 | {add_kernels} 130 | open_solution "{solution_name}" 131 | set_part {{{part_num}}} 132 | create_clock -period {clock_period} -name default 133 | config_compile -name_max_length 253 134 | config_interface -m_axi_addr64 135 | config_rtl -disable_start_propagation 136 | csynth_design 137 | exit 138 | ''' 139 | 140 | class RunHls(VivadoHls): 141 | """Runs Vivado HLS for the given kernels and generate HDL files 142 | 143 | Args: 144 | tarfileobj: file object that will contain the reports and HDL files. 145 | kernel_files: file names of the kernels. 146 | top_name: top-level module name. 147 | clock_period: target clock period. 148 | part_num: target part number. 149 | """ 150 | def __init__(self, tarfileobj, kernel_files, top_name, clock_period, 151 | part_num): 152 | self.project_dir = tempfile.TemporaryDirectory(prefix='hls-') 153 | self.project_name = 'project' 154 | self.solution_name = 'solution' 155 | self.tarfileobj = tarfileobj 156 | kwargs = { 157 | 'project_dir' : self.project_dir.name, 158 | 'project_name' : self.project_name, 159 | 'solution_name' : self.solution_name, 160 | 'top_name' : top_name, 161 | 'add_kernels' : '\n'.join(map( 162 | 'add_files "{}" -cflags "-std=c++11"'.format, kernel_files)), 163 | 'part_num' : part_num, 164 | 'clock_period' : clock_period 165 | } 166 | super().__init__(HLS_COMMANDS.format(**kwargs)) 167 | 168 | def __exit__(self, *args): 169 | super().__exit__(*args) 170 | if self.returncode == 0: 171 | with tarfile.open(mode='w', fileobj=self.tarfileobj) as tar: 172 | solution_dir = os.path.join(self.project_dir.name, self.project_name, 173 | self.solution_name) 174 | tar.add(os.path.join(solution_dir, 'syn/report'), arcname='report') 175 | tar.add(os.path.join(solution_dir, 'syn/verilog'), arcname='hdl') 176 | tar.add(os.path.join(solution_dir, self.solution_name + '.log'), 177 | arcname=self.solution_name + '.log') 178 | self.project_dir.cleanup() 179 | 180 | XILINX_XML_NS = {'xd' : 'http://www.xilinx.com/xd'} 181 | 182 | def get_device_info(platform_path): 183 | """Extract device part number and target frequency from SDAccel platform. 184 | 185 | Currently only support 5.x platforms. 186 | """ 187 | device_name = os.path.basename(platform_path) 188 | with zipfile.ZipFile(os.path.join( 189 | platform_path, 'hw', device_name + '.dsa')) as platform: 190 | with platform.open(device_name + '.hpfm') as metadata: 191 | platform_info = ET.parse(metadata).find('./xd:component/xd:platformInfo', 192 | XILINX_XML_NS) 193 | return { 194 | 'clock_period' : platform_info.find( 195 | "./xd:systemClocks/xd:clock/[@xd:id='0']", XILINX_XML_NS).attrib[ 196 | '{{{xd}}}period'.format(**XILINX_XML_NS)], 197 | 'part_num' : platform_info.find( 198 | 'xd:deviceInfo', XILINX_XML_NS).attrib[ 199 | '{{{xd}}}name'.format(**XILINX_XML_NS)] 200 | } 201 | 202 | KERNEL_XML_TEMPLATE = r''' 203 | 204 | 205 | 206 | {m_axi_ports} 207 | 208 | 209 | {args} 210 | 211 | 212 | 213 | ''' 214 | 215 | PORT_TEMPLATE = r''' 216 | 217 | ''' 218 | 219 | ARG_TEMPLATE = r''' 220 | 221 | ''' 222 | 223 | def print_kernel_xml(top_name, ports, kernel_xml): 224 | """Generate kernel.xml file. 225 | 226 | Args: 227 | top_name: name of the top-level kernel function. 228 | ports: sequence of (port_name, bundle_name, haoda_type, _) of m_axi ports 229 | kernel_xml: file object to write to. 230 | """ 231 | m_axi_ports = '' 232 | args = '' 233 | offset = 0x10 234 | arg_id = 0 235 | bundle_set = set() 236 | for port_name, bundle_name, haoda_type, _ in ports: 237 | size = host_size = 8 238 | if bundle_name not in bundle_set: 239 | m_axi_ports += PORT_TEMPLATE.format( 240 | name=bundle_name, 241 | width=util.get_width_in_bits(haoda_type)).rstrip('\n') 242 | bundle_set.add(bundle_name) 243 | args += ARG_TEMPLATE.format( 244 | name=port_name, addr_qualifier=1, arg_id=arg_id, 245 | port_name='m_axi_' + bundle_name, c_type=util.get_c_type(haoda_type), 246 | size=size, offset=offset, host_size=host_size).rstrip('\n') 247 | offset += size + 4 248 | arg_id += 1 249 | args += ARG_TEMPLATE.format( 250 | name='coalesced_data_num', addr_qualifier=0, arg_id=arg_id, 251 | port_name='s_axi_control', c_type='uint64_t', size=size, offset=offset, 252 | host_size=host_size).rstrip('\n') 253 | kernel_xml.write(KERNEL_XML_TEMPLATE.format( 254 | top_name=top_name, m_axi_ports=m_axi_ports, args=args)) 255 | 256 | BRAM_FIFO_TEMPLATE = r''' 257 | `timescale 1ns/1ps 258 | 259 | module {name}_w{width}_d{depth}_A 260 | #(parameter 261 | MEM_STYLE = "block", 262 | DATA_WIDTH = {width}, 263 | ADDR_WIDTH = {addr_width}, 264 | DEPTH = {depth} 265 | ) 266 | ( 267 | // system signal 268 | input wire clk, 269 | input wire reset, 270 | 271 | // write 272 | output wire if_full_n, 273 | input wire if_write_ce, 274 | input wire if_write, 275 | input wire [DATA_WIDTH-1:0] if_din, 276 | 277 | // read 278 | output wire if_empty_n, 279 | input wire if_read_ce, 280 | input wire if_read, 281 | output wire [DATA_WIDTH-1:0] if_dout 282 | ); 283 | //------------------------Parameter---------------------- 284 | 285 | //------------------------Local signal------------------- 286 | (* ram_style = MEM_STYLE *) 287 | reg [DATA_WIDTH-1:0] mem[0:DEPTH-1]; 288 | reg [DATA_WIDTH-1:0] q_buf = 1'b0; 289 | reg [ADDR_WIDTH-1:0] waddr = 1'b0; 290 | reg [ADDR_WIDTH-1:0] raddr = 1'b0; 291 | wire [ADDR_WIDTH-1:0] wnext; 292 | wire [ADDR_WIDTH-1:0] rnext; 293 | wire push; 294 | wire pop; 295 | reg [ADDR_WIDTH-1:0] usedw = 1'b0; 296 | reg full_n = 1'b1; 297 | reg empty_n = 1'b0; 298 | reg [DATA_WIDTH-1:0] q_tmp = 1'b0; 299 | reg show_ahead = 1'b0; 300 | reg [DATA_WIDTH-1:0] dout_buf = 1'b0; 301 | reg dout_valid = 1'b0; 302 | 303 | 304 | //------------------------Instantiation------------------ 305 | 306 | //------------------------Task and function-------------- 307 | 308 | //------------------------Body--------------------------- 309 | assign if_full_n = full_n; 310 | assign if_empty_n = dout_valid; 311 | assign if_dout = dout_buf; 312 | assign push = full_n & if_write_ce & if_write; 313 | assign pop = empty_n & if_read_ce & (~dout_valid | if_read); 314 | assign wnext = !push ? waddr : 315 | (waddr == DEPTH - 1) ? 1'b0 : 316 | waddr + 1'b1; 317 | assign rnext = !pop ? raddr : 318 | (raddr == DEPTH - 1) ? 1'b0 : 319 | raddr + 1'b1; 320 | 321 | // waddr 322 | always @(posedge clk) begin 323 | if (reset == 1'b1) 324 | waddr <= 1'b0; 325 | else 326 | waddr <= wnext; 327 | end 328 | 329 | // raddr 330 | always @(posedge clk) begin 331 | if (reset == 1'b1) 332 | raddr <= 1'b0; 333 | else 334 | raddr <= rnext; 335 | end 336 | 337 | // usedw 338 | always @(posedge clk) begin 339 | if (reset == 1'b1) 340 | usedw <= 1'b0; 341 | else if (push & ~pop) 342 | usedw <= usedw + 1'b1; 343 | else if (~push & pop) 344 | usedw <= usedw - 1'b1; 345 | end 346 | 347 | // full_n 348 | always @(posedge clk) begin 349 | if (reset == 1'b1) 350 | full_n <= 1'b1; 351 | else if (push & ~pop) 352 | full_n <= (usedw != DEPTH - 1); 353 | else if (~push & pop) 354 | full_n <= 1'b1; 355 | end 356 | 357 | // empty_n 358 | always @(posedge clk) begin 359 | if (reset == 1'b1) 360 | empty_n <= 1'b0; 361 | else if (push & ~pop) 362 | empty_n <= 1'b1; 363 | else if (~push & pop) 364 | empty_n <= (usedw != 1'b1); 365 | end 366 | 367 | // mem 368 | always @(posedge clk) begin 369 | if (push) 370 | mem[waddr] <= if_din; 371 | end 372 | 373 | // q_buf 374 | always @(posedge clk) begin 375 | q_buf <= mem[rnext]; 376 | end 377 | 378 | // q_tmp 379 | always @(posedge clk) begin 380 | if (reset == 1'b1) 381 | q_tmp <= 1'b0; 382 | else if (push) 383 | q_tmp <= if_din; 384 | end 385 | 386 | // show_ahead 387 | always @(posedge clk) begin 388 | if (reset == 1'b1) 389 | show_ahead <= 1'b0; 390 | else if (push && usedw == pop) 391 | show_ahead <= 1'b1; 392 | else 393 | show_ahead <= 1'b0; 394 | end 395 | 396 | // dout_buf 397 | always @(posedge clk) begin 398 | if (reset == 1'b1) 399 | dout_buf <= 1'b0; 400 | else if (pop) 401 | dout_buf <= show_ahead? q_tmp : q_buf; 402 | end 403 | 404 | // dout_valid 405 | always @(posedge clk) begin 406 | if (reset == 1'b1) 407 | dout_valid <= 1'b0; 408 | else if (pop) 409 | dout_valid <= 1'b1; 410 | else if (if_read_ce & if_read) 411 | dout_valid <= 1'b0; 412 | end 413 | 414 | endmodule 415 | ''' 416 | 417 | SRL_FIFO_TEMPLATE = r''' 418 | // ============================================================== 419 | // File generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC 420 | // Version: 2018.2 421 | // Copyright (C) 1986-2018 Xilinx, Inc. All Rights Reserved. 422 | // 423 | // ============================================================== 424 | 425 | 426 | `timescale 1 ns / 1 ps 427 | 428 | module {name}_w{width}_d{depth}_A_shiftReg ( 429 | clk, 430 | data, 431 | ce, 432 | a, 433 | q); 434 | 435 | parameter DATA_WIDTH = 32'd{width}; 436 | parameter ADDR_WIDTH = 32'd{addr_width}; 437 | parameter DEPTH = {depth_width}'d{depth}; 438 | 439 | input clk; 440 | input [DATA_WIDTH-1:0] data; 441 | input ce; 442 | input [ADDR_WIDTH-1:0] a; 443 | output [DATA_WIDTH-1:0] q; 444 | 445 | reg[DATA_WIDTH-1:0] SRL_SIG [0:DEPTH-1]; 446 | integer i; 447 | 448 | always @ (posedge clk) 449 | begin 450 | if (ce) 451 | begin 452 | for (i=0;i threshold: 629 | self.bram_fifo_module(width, depth) 630 | else: 631 | self.srl_fifo_module(width, depth) 632 | 633 | def bram_fifo_module(self, width, depth, name='fifo'): 634 | """Generate BRAM FIFO with the given parameters. 635 | 636 | Generate a BRAM FIFO module named {name}_w{width}_d{depth}_A. 637 | 638 | Args: 639 | printer: VerilogPrinter to print to. 640 | width: FIFO width 641 | depth: FIFO depth 642 | name: Optionally give the fifo a name prefix, default to 'fifo'. 643 | """ 644 | self._out.write(BRAM_FIFO_TEMPLATE.format( 645 | width=width, depth=depth, name=name, 646 | addr_width=(depth - 1).bit_length())) 647 | 648 | def srl_fifo_module(self, width, depth, name='fifo'): 649 | """Generate SRL FIFO with the given parameters. 650 | 651 | Generate a SRL FIFO module named {name}_w{width}_d{depth}_A. 652 | 653 | Args: 654 | printer: VerilogPrinter to print to. 655 | width: FIFO width 656 | depth: FIFO depth 657 | name: Optionally give the fifo a name prefix, default to 'fifo'. 658 | """ 659 | addr_width = (depth - 1).bit_length() 660 | self._out.write(SRL_FIFO_TEMPLATE.format( 661 | width=width, depth=depth, name=name, addr_width=addr_width, 662 | depth_width=addr_width + 1)) 663 | -------------------------------------------------------------------------------- /src/haoda/ir/__init__.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import copy 3 | import logging 4 | import math 5 | 6 | import cached_property 7 | 8 | from haoda import util 9 | from haoda.ir import visitor 10 | 11 | _logger = logging.getLogger().getChild(__name__) 12 | 13 | GRAMMAR = r''' 14 | Bin: /0[Bb][01]+([Uu][Ll][Ll]?|[Ll]?[Ll]?[Uu]?)/; 15 | Dec: /\d+([Uu][Ll][Ll]?|[Ll]?[Ll]?[Uu]?)/; 16 | Oct: /0[0-7]+([Uu][Ll][Ll]?|[Ll]?[Ll]?[Uu]?)/; 17 | Hex: /0[Xx][0-9a-fA-F]+([Uu][Ll][Ll]?|[Ll]?[Ll]?[Uu]?)/; 18 | Int: ('+'|'-')?(Hex|Bin|Oct|Dec); 19 | Float: /(((\d*\.\d+|\d+\.)([+-]?[Ee]\d+)?)|(\d+[+-]?[Ee]\d+))[FfLl]?/; 20 | Num: Float|Int; 21 | 22 | Type: FixedType | FloatType; 23 | FixedType: /u?int[1-9]\d*(_[1-9]\d*)?/; 24 | FloatType: /float[1-9]\d*(_[1-9]\d*)?/ | 'float' | 'double' | 'half'; 25 | 26 | Let: (haoda_type=Type)? name=ID '=' expr=Expr; 27 | Ref: name=ID '(' idx=INT (',' idx=INT)* ')' ('~' lat=Int)?; 28 | 29 | Expr: operand=LogicAnd (operator=LogicOrOp operand=LogicAnd)*; 30 | LogicOrOp: '||'; 31 | 32 | LogicAnd: operand=BinaryOr (operator=LogicAndOp operand=BinaryOr)*; 33 | LogicAndOp: '&&'; 34 | 35 | BinaryOr: operand=Xor (operator=BinaryOrOp operand=Xor)*; 36 | BinaryOrOp: '|'; 37 | 38 | Xor: operand=BinaryAnd (operator=XorOp operand=BinaryAnd)*; 39 | XorOp: '^'; 40 | 41 | BinaryAnd: operand=EqCmp (operator=BinaryAndOp operand=EqCmp)*; 42 | BinaryAndOp: '&'; 43 | 44 | EqCmp: operand=LtCmp (operator=EqCmpOp operand=LtCmp)*; 45 | EqCmpOp: '=='|'!='; 46 | 47 | LtCmp: operand=AddSub (operator=LtCmpOp operand=AddSub)*; 48 | LtCmpOp: '<='|'>='|'<'|'>'; 49 | 50 | AddSub: operand=MulDiv (operator=AddSubOp operand=MulDiv)*; 51 | AddSubOp: '+'|'-'; 52 | 53 | MulDiv: operand=Unary (operator=MulDivOp operand=Unary)*; 54 | MulDivOp: '*'|'/'|'%'; 55 | 56 | Unary: (operator=UnaryOp)* operand=Operand; 57 | UnaryOp: '+'|'-'|'~'|'!'; 58 | 59 | Operand: cast=Cast | call=Call | ref=Ref | num=Num | var=Var | '(' expr=Expr ')'; 60 | Cast: haoda_type=Type '(' expr=Expr ')'; 61 | Call: name=FuncName '(' arg=Expr (',' arg=Expr)* ')'; 62 | Var: name=ID ('[' idx=Int ']')*; 63 | 64 | ''' 65 | 66 | class Node(): 67 | """A immutable, hashable IR node. 68 | """ 69 | SCALAR_ATTRS = () 70 | LINEAR_ATTRS = () 71 | 72 | @property 73 | def ATTRS(self): 74 | return self.SCALAR_ATTRS + self.LINEAR_ATTRS 75 | 76 | def __init__(self, **kwargs): 77 | for attr in self.SCALAR_ATTRS: 78 | setattr(self, attr, kwargs.pop(attr)) 79 | for attr in self.LINEAR_ATTRS: 80 | setattr(self, attr, tuple(kwargs.pop(attr))) 81 | 82 | def __hash__(self): 83 | return hash((tuple(getattr(self, _) for _ in self.SCALAR_ATTRS), 84 | tuple(tuple(getattr(self, _)) for _ in self.LINEAR_ATTRS))) 85 | 86 | def __eq__(self, other): 87 | return all(hasattr(other, attr) and 88 | getattr(self, attr) == getattr(other, attr) 89 | for attr in self.ATTRS) 90 | 91 | @property 92 | def c_type(self): 93 | return util.get_c_type(self.haoda_type) 94 | 95 | @property 96 | def width_in_bits(self): 97 | return util.get_width_in_bits(self.haoda_type) 98 | 99 | def visit(self, callback, args=None, pre_recursion=None, post_recursion=None): 100 | """A general-purpose, flexible, and powerful visitor. 101 | 102 | The args parameter will be passed to the callback callable so that it may 103 | read or write any information from or to the caller. 104 | 105 | A copy of self will be made and passed to the callback to avoid destructive 106 | access. 107 | 108 | If a new object is returned by the callback, it will be returned directly 109 | without recursion. 110 | 111 | If the same object is returned by the callback, if any attribute is 112 | changed, it will not be recursively visited. If an attribute is unchanged, 113 | it will be recursively visited. 114 | """ 115 | 116 | def callback_wrapper(callback, obj, args): 117 | if callback is None: 118 | return obj 119 | result = callback(obj, args) 120 | if result is not None: 121 | return result 122 | return obj 123 | 124 | self_copy = copy.copy(self) 125 | obj = callback_wrapper(callback, self_copy, args) 126 | if obj is not self_copy: 127 | return obj 128 | self_copy = callback_wrapper(pre_recursion, copy.copy(self), args) 129 | scalar_attrs = {attr: getattr(self_copy, attr).visit( 130 | callback, args, pre_recursion, post_recursion) 131 | if isinstance(getattr(self_copy, attr), Node) 132 | else getattr(self_copy, attr) 133 | for attr in self_copy.SCALAR_ATTRS} 134 | linear_attrs = {attr: tuple(_.visit( 135 | callback, args, pre_recursion, post_recursion) 136 | if isinstance(_, Node) else _ 137 | for _ in getattr(self_copy, attr)) 138 | for attr in self_copy.LINEAR_ATTRS} 139 | 140 | for attr in self.SCALAR_ATTRS: 141 | # old attribute may not exist in mutated object 142 | if not hasattr(obj, attr): 143 | continue 144 | if getattr(obj, attr) is getattr(self, attr): 145 | if isinstance(getattr(obj, attr), Node): 146 | setattr(obj, attr, scalar_attrs[attr]) 147 | for attr in self.LINEAR_ATTRS: 148 | # old attribute may not exist in mutated object 149 | if not hasattr(obj, attr): 150 | continue 151 | setattr(obj, attr, tuple( 152 | c if a is b and isinstance(a, Node) else a 153 | for a, b, c in zip(getattr(obj, attr), getattr(self, attr), 154 | linear_attrs[attr]))) 155 | return callback_wrapper(post_recursion, obj, args) 156 | 157 | class Let(Node): 158 | SCALAR_ATTRS = 'haoda_type', 'name', 'expr' 159 | 160 | def __str__(self): 161 | result = '{} = {}'.format(self.name, unparenthesize(self.expr)) 162 | if self.haoda_type is not None: 163 | result = '{} {}'.format(self.haoda_type, result) 164 | return result 165 | 166 | @property 167 | def haoda_type(self): 168 | if self._haoda_type is None: 169 | return self.expr.haoda_type 170 | return self._haoda_type 171 | 172 | @haoda_type.setter 173 | def haoda_type(self, val): 174 | self._haoda_type = val 175 | 176 | @property 177 | def c_expr(self): 178 | return 'const {} {} = {};'.format(self.c_type, self.name, 179 | unparenthesize(self.expr.c_expr)) 180 | 181 | class Ref(Node): 182 | SCALAR_ATTRS = 'name', 'lat' 183 | LINEAR_ATTRS = ('idx',) 184 | def __init__(self, **kwargs): 185 | super().__init__(**kwargs) 186 | self.idx = tuple(self.idx) 187 | if not hasattr(self, 'haoda_type'): 188 | self.haoda_type = None 189 | # self.lat will be defined in super().__init__(**kwargs) 190 | # pylint: disable=access-member-before-definition 191 | if isinstance(self.lat, str): 192 | self.lat = str2int(self.lat) 193 | 194 | def __str__(self): 195 | result = '{}({})'.format(self.name, ', '.join(map(str, self.idx))) 196 | if self.lat is not None: 197 | result += ' ~{}'.format(self.lat) 198 | return result 199 | 200 | class BinaryOp(Node): 201 | LINEAR_ATTRS = 'operand', 'operator' 202 | def __str__(self): 203 | result = str(self.operand[0]) 204 | for operator, operand in zip(self.operator, self.operand[1:]): 205 | result += ' {} {}'.format(operator, operand) 206 | if self.singleton: 207 | return result 208 | return parenthesize(result) 209 | 210 | @property 211 | def haoda_type(self): 212 | # TODO: derive from all operands 213 | return self.operand[0].haoda_type 214 | 215 | @property 216 | def c_expr(self): 217 | result = self.operand[0].c_expr 218 | for operator, operand in zip(self.operator, self.operand[1:]): 219 | result += ' {} {}'.format(operator, operand.c_expr) 220 | if self.singleton: 221 | return result 222 | return parenthesize(result) 223 | 224 | @property 225 | def singleton(self) -> bool: 226 | return len(self.operand) == 1 227 | 228 | class Expr(BinaryOp): 229 | pass 230 | 231 | class LogicAnd(BinaryOp): 232 | pass 233 | 234 | class BinaryOr(BinaryOp): 235 | pass 236 | 237 | class Xor(BinaryOp): 238 | pass 239 | 240 | class BinaryAnd(BinaryOp): 241 | pass 242 | 243 | class EqCmp(BinaryOp): 244 | pass 245 | 246 | class LtCmp(BinaryOp): 247 | pass 248 | 249 | class AddSub(BinaryOp): 250 | pass 251 | 252 | class MulDiv(BinaryOp): 253 | pass 254 | 255 | class Unary(Node): 256 | SCALAR_ATTRS = ('operand',) 257 | LINEAR_ATTRS = ('operator',) 258 | def __str__(self): 259 | return ''.join(self.operator)+str(self.operand) 260 | 261 | @property 262 | def haoda_type(self): 263 | return self.operand.haoda_type 264 | 265 | @property 266 | def c_expr(self): 267 | return ''.join(self.operator)+self.operand.c_expr 268 | 269 | class Operand(Node): 270 | SCALAR_ATTRS = 'cast', 'call', 'ref', 'num', 'var', 'expr' 271 | def __str__(self): 272 | for attr in ('cast', 'call', 'ref', 'num', 'var'): 273 | if getattr(self, attr) is not None: 274 | return str(getattr(self, attr)) 275 | # pylint: disable=useless-else-on-loop 276 | else: 277 | return parenthesize(self.expr) 278 | 279 | @property 280 | def c_expr(self): 281 | for attr in ('cast', 'call', 'ref', 'num', 'var'): 282 | attr = getattr(self, attr) 283 | if attr is not None: 284 | if hasattr(attr, 'c_expr'): 285 | return attr.c_expr 286 | return str(attr) 287 | # pylint: disable=useless-else-on-loop 288 | else: 289 | return parenthesize(self.expr.c_expr) 290 | 291 | @property 292 | def haoda_type(self): 293 | for attr in self.ATTRS: 294 | val = getattr(self, attr) 295 | if val is not None: 296 | if hasattr(val, 'haoda_type'): 297 | return val.haoda_type 298 | if attr == 'num': 299 | if 'u' in val.lower(): 300 | if 'll' in val.lower(): 301 | return 'uint64' 302 | return 'uint32' 303 | if 'll' in val.lower(): 304 | return 'int64' 305 | if 'fl' in val.lower(): 306 | return 'double' 307 | if 'f' in val.lower() or 'e' in val.lower(): 308 | return 'float' 309 | if '.' in val: 310 | return 'double' 311 | return 'int32' 312 | return None 313 | raise util.InternalError('undefined Operand') 314 | 315 | class Cast(Node): 316 | SCALAR_ATTRS = 'haoda_type', 'expr' 317 | def __str__(self): 318 | return '{}{}'.format(self.haoda_type, parenthesize(self.expr)) 319 | 320 | @property 321 | def c_expr(self): 322 | return 'static_cast<{} >{}'.format(self.c_type, 323 | parenthesize(self.expr.c_expr)) 324 | 325 | class Call(Node): 326 | SCALAR_ATTRS = ('name',) 327 | LINEAR_ATTRS = ('arg',) 328 | def __str__(self): 329 | return '{}({})'.format(self.name, ', '.join(map(str, self.arg))) 330 | 331 | @property 332 | def haoda_type(self): 333 | if self.name in ('select',): 334 | return self.arg[1].haoda_type 335 | return self.arg[0].haoda_type 336 | 337 | @property 338 | def c_expr(self): 339 | return '{}({})'.format(self.name, ', '.join(_.c_expr for _ in self.arg)) 340 | 341 | class Var(Node): 342 | SCALAR_ATTRS = ('name',) 343 | LINEAR_ATTRS = ('idx',) 344 | def __str__(self): 345 | return self.name+''.join(map('[{}]'.format, self.idx)) 346 | 347 | @property 348 | def c_expr(self): 349 | return self.name+''.join(map('[{}]'.format, self.idx)) 350 | 351 | class FIFO(Node): 352 | """A reference to another node in a haoda.ir.Expr. 353 | 354 | This is used to represent a read/write from/to a Module in an output's Expr. 355 | It replaces Ref in haoda.ir, which is used to represent an element 356 | reference to a tensor. 357 | 358 | Attributes: 359 | read_module: Module reading from this FIFO. 360 | read_lat: int, at what cycle of a pipelined loop it is being read. 361 | write_module: Module writing to this FIFO. 362 | write_lat: int, at what cycle of a pipelined loop it is being written. 363 | depth: int, FIFO depth. 364 | """ 365 | IMMUTABLE_ATTRS = 'read_module', 'write_module' 366 | SCALAR_ATTRS = 'read_module', 'read_lat', 'write_module', 'write_lat', 'depth' 367 | 368 | def __init__(self, write_module, read_module, 369 | depth=None, write_lat=None, read_lat=None): 370 | super().__init__(write_module=write_module, read_module=read_module, 371 | depth=depth, write_lat=write_lat, read_lat=read_lat) 372 | 373 | def __repr__(self): 374 | return 'fifo[%d]: %s%s => %s%s' % (self.depth, repr(self.write_module), 375 | '' if self.write_lat is None else ' ~%s'%self.write_lat, 376 | repr(self.read_module), 377 | '' if self.read_lat is None else ' ~%s'%self.read_lat) 378 | 379 | def __hash__(self): 380 | return hash(tuple(getattr(self, _) for _ in self.IMMUTABLE_ATTRS)) 381 | 382 | def __eq__(self, other): 383 | return all(getattr(self, _) == getattr(other, _) 384 | for _ in type(self).IMMUTABLE_ATTRS) 385 | @property 386 | def edge(self): 387 | return self.write_module, self.read_module 388 | 389 | @property 390 | def haoda_type(self): 391 | return self.write_module.exprs[self].haoda_type 392 | 393 | @property 394 | def c_expr(self): 395 | return 'from_{}_to_{}'.format(self.write_module.name, self.read_module.name) 396 | 397 | class Module(): 398 | """A node in the dataflow graph. 399 | 400 | This is the base class for a dataflow module. It defines the parent (input) 401 | nodes, children (output) nodes, output expressions, input schedules, and 402 | output schedules. It also has a name to help identify itself. 403 | 404 | Attributes: 405 | parents: Set of parent (input) Module. 406 | children: Set of child (output) Module. 407 | lets: List of haoda.ir.Let expressions. 408 | exprs: Dict of {FIFO: haoda.ir.Expr}, stores an output's expression. 409 | """ 410 | def __init__(self): 411 | """Initializes attributes into empty list or dict. 412 | """ 413 | self.parents = [] 414 | self.children = [] 415 | self.lets = [] 416 | self.exprs = collections.OrderedDict() 417 | 418 | @property 419 | def name(self): 420 | return 'module_%u' % hash(self) 421 | 422 | @property 423 | def fifos(self): 424 | return tuple(self.exprs.keys()) 425 | 426 | @property 427 | def fifo_dict(self): 428 | return {(self, fifo.read_module): fifo for fifo in self.exprs} 429 | 430 | def fifo(self, dst_node): 431 | return self.fifo_dict[(self, dst_node)] 432 | 433 | def get_latency(self, dst_node): 434 | return self.fifo(dst_node).write_lat or 0 435 | 436 | def visit_loads(self, callback, args=None): 437 | obj = copy.copy(self) 438 | obj.lets = tuple(_.visit(callback, args) for _ in self.lets) 439 | obj.exprs = collections.OrderedDict() 440 | for fifo in self.exprs: 441 | obj.exprs[fifo] = self.exprs[fifo].visit(callback, args) 442 | return obj 443 | 444 | @property 445 | def dram_reads(self): 446 | return self._interfaces['dram_reads'] 447 | 448 | @property 449 | def dram_writes(self): 450 | return self._interfaces['dram_writes'] 451 | 452 | @property 453 | def input_fifos(self): 454 | return self._interfaces['input_fifos'] 455 | 456 | @property 457 | def output_fifos(self): 458 | return self._interfaces['output_fifos'] 459 | 460 | @cached_property.cached_property 461 | def _interfaces(self): 462 | # find dram reads 463 | reads_in_lets = tuple(_.expr for _ in self.lets) 464 | reads_in_exprs = tuple(self.exprs.values()) 465 | dram_reads = collections.OrderedDict() 466 | for dram_ref in visitor.get_dram_refs(reads_in_lets + reads_in_exprs): 467 | for bank in dram_ref.dram: 468 | dram_reads[(dram_ref.var, bank)] = (dram_ref, bank) 469 | dram_reads = tuple(dram_reads.values()) 470 | 471 | # find dram writes 472 | writes_in_lets = tuple(_.name for _ in self.lets 473 | if not isinstance(_.name, str)) 474 | dram_writes = collections.OrderedDict() 475 | for dram_ref in visitor.get_dram_refs(writes_in_lets): 476 | for bank in dram_ref.dram: 477 | dram_writes[(dram_ref.var, bank)] = (dram_ref, bank) 478 | dram_writes = tuple(dram_writes.values()) 479 | 480 | output_fifos = tuple(_.c_expr for _ in self.exprs) 481 | input_fifos = tuple(_.c_expr for _ in visitor.get_read_fifo_set(self)) 482 | 483 | 484 | return { 485 | 'dram_writes' : dram_writes, 486 | 'output_fifos' : output_fifos, 487 | 'input_fifos' : input_fifos, 488 | 'dram_reads' : dram_reads 489 | } 490 | 491 | def __str__(self): 492 | return '%s @ 0x%x: %s' % (type(self).__name__, id(self), 493 | self.__dict__) 494 | 495 | def __repr__(self): 496 | return '%s @ 0x%x' % (type(self).__name__, id(self)) 497 | 498 | def add_child(self, child): 499 | """Add a child (low level). 500 | 501 | This method only handles children and parents field; lets and exprs are 502 | not updated. 503 | 504 | Arguments: 505 | child: Module, child being added 506 | """ 507 | if child not in self.children: 508 | self.children.append(child) 509 | if self not in child.parents: 510 | child.parents.append(self) 511 | 512 | def bfs_node_gen(self): 513 | """BFS over descendant nodes. 514 | 515 | This method is a BFS traversal generator over all descendant nodes. 516 | """ 517 | node_queue = collections.deque([self]) 518 | seen_nodes = {self} 519 | while node_queue: 520 | node = node_queue.popleft() 521 | yield node 522 | for child in node.children: 523 | if child not in seen_nodes: 524 | node_queue.append(child) 525 | seen_nodes.add(child) 526 | 527 | def dfs_node_gen(self): 528 | """DFS over descendant nodes. 529 | 530 | This method is a DFS traversal generator over all descendant nodes. 531 | """ 532 | node_stack = [self] 533 | seen_nodes = {self} 534 | while node_stack: 535 | node = node_stack.pop() 536 | yield node 537 | for child in node.children: 538 | if child not in seen_nodes: 539 | node_stack.append(child) 540 | seen_nodes.add(child) 541 | 542 | def tpo_node_gen(self): 543 | """Traverse descendant nodes in topological order. 544 | 545 | This method is a generator that traverses all descendant nodes in 546 | topological order. 547 | """ 548 | nodes = collections.OrderedDict() 549 | for node in self.bfs_node_gen(): 550 | nodes[node] = len(node.parents) 551 | while nodes: 552 | for node in nodes: 553 | if nodes[node] == 0: 554 | yield node 555 | for child in node.children: 556 | nodes[child] -= 1 557 | del nodes[node] 558 | break 559 | else: 560 | return 561 | 562 | def bfs_edge_gen(self): 563 | """BFS over descendant edges. 564 | 565 | This method is a BFS traversal generator over all descendant edges. 566 | """ 567 | node_queue = collections.deque([self]) 568 | seen_nodes = {self} 569 | while node_queue: 570 | node = node_queue.popleft() 571 | for child in node.children: 572 | yield node, child 573 | if child not in seen_nodes: 574 | node_queue.append(child) 575 | seen_nodes.add(child) 576 | 577 | def dfs_edge_gen(self): 578 | """DFS over descendant edges. 579 | 580 | This method is a DFS traversal generator over all descendant edges. 581 | """ 582 | node_stack = [self] 583 | seen_nodes = {self} 584 | while node_stack: 585 | node = node_stack.pop() 586 | for child in node.children: 587 | yield node, child 588 | if child not in seen_nodes: 589 | node_stack.append(child) 590 | seen_nodes.add(child) 591 | 592 | def get_descendants(self): 593 | """Get all descendant nodes. 594 | 595 | This method returns all descendant nodes as a set. 596 | 597 | Returns: 598 | Set of descendant Module. 599 | """ 600 | return {self}.union(*map(Module.get_descendants, self.children)) 601 | 602 | def get_connections(self): 603 | """Get all descendant edges. 604 | 605 | This method returns all descendant edges as a set. 606 | 607 | Returns: 608 | Set of descendant (src Module, dst Module) tuple. 609 | """ 610 | return ({(self, child) for child in self.children} 611 | .union(*map(Module.get_connections, self.children))) 612 | 613 | 614 | class DelayedRef(Node): 615 | """A delayed FIFO reference. 616 | 617 | Attributes: 618 | delay: int 619 | ref: FIFO 620 | """ 621 | SCALAR_ATTRS = ('delay', 'ref') 622 | @property 623 | def haoda_type(self): 624 | return self.ref.haoda_type 625 | 626 | def __str__(self): 627 | return '%s delayed %d' % (self.ref, self.delay) 628 | 629 | def __repr__(self): 630 | return str(self) 631 | 632 | def __hash__(self): 633 | return hash((self.delay, self.ref)) 634 | 635 | def __eq__(self, other): 636 | return all(getattr(self, attr) == getattr(other, attr) 637 | for attr in ('delay', 'ref')) 638 | 639 | @property 640 | def buf_name(self): 641 | return '{ref.c_expr}_delayed_{delay}_buf'.format(**self.__dict__) 642 | 643 | @property 644 | def ptr(self): 645 | return '{ref.c_expr}_delayed_{delay}_ptr'.format(**self.__dict__) 646 | 647 | @property 648 | def ptr_type(self): 649 | return 'uint%d' % int(math.log2(self.delay)+1) 650 | 651 | @property 652 | def c_expr(self): 653 | return '{ref.c_expr}_delayed_{delay}'.format(**self.__dict__) 654 | 655 | @property 656 | def c_ptr_type(self): 657 | return util.get_c_type(self.ptr_type) 658 | 659 | @property 660 | def c_ptr_decl(self): 661 | return '{} {} = 0;'.format(self.c_ptr_type, self.ptr) 662 | 663 | @property 664 | def c_buf_ref(self): 665 | return '{}[{}]'.format(self.buf_name, self.ptr) 666 | 667 | @property 668 | def c_buf_decl(self): 669 | return '{} {}[{}];'.format(self.c_type, self.buf_name, self.delay) 670 | 671 | @property 672 | def c_buf_load(self): 673 | return '{} = {};'.format(self.c_expr, self.c_buf_ref) 674 | 675 | @property 676 | def c_buf_store(self): 677 | return '{} = {};'.format(self.c_buf_ref, self.ref.ref_name) 678 | 679 | @property 680 | def c_next_ptr_expr(self): 681 | return '{ptr} < {depth} ? {c_ptr_type}({ptr}+1) : {c_ptr_type}(0)'.format( 682 | ptr=self.ptr, c_ptr_type=self.c_ptr_type, depth=self.delay-1) 683 | 684 | class FIFORef(Node): 685 | """A FIFO reference. 686 | 687 | Attributes: 688 | fifo: FIFO it is linked to 689 | lat: int, at what cycle of a pipelined loop it is being referenced. 690 | ref_id: int, reference id in the current scope 691 | Properties: 692 | c_type: str 693 | c_expr: str 694 | haoda_type: str 695 | ld_name: str 696 | st_name: str 697 | ref_name: str 698 | """ 699 | SCALAR_ATTRS = ('fifo', 'lat', 'ref_id') 700 | LD_PREFIX = 'fifo_ld_' 701 | ST_PREFIX = 'fifo_st_' 702 | REF_PREFIX = 'fifo_ref_' 703 | def __str__(self): 704 | return '<%s fifo_ref_%d%s>' % (self.haoda_type, self.ref_id, 705 | '@%s'%self.lat if self.lat else '') 706 | 707 | def __repr__(self): 708 | return str(self) 709 | 710 | def __hash__(self): 711 | return hash((self.lat, self.ref_id)) 712 | 713 | def __eq__(self, other): 714 | return all(getattr(self, attr) == getattr(other, attr) 715 | for attr in ('lat', 'ref_id')) 716 | 717 | @property 718 | def haoda_type(self): 719 | return self.fifo.haoda_type 720 | 721 | @property 722 | def ld_name(self): 723 | return '{LD_PREFIX}{ref_id}'.format(**self.__dict__, **type(self).__dict__) 724 | 725 | @property 726 | def ref_name(self): 727 | return '{REF_PREFIX}{ref_id}'.format(**self.__dict__, **type(self).__dict__) 728 | 729 | @property 730 | def c_expr(self): 731 | return self.ref_name 732 | 733 | class DRAMRef(Node): 734 | """A DRAM reference. 735 | 736 | Attributes: 737 | haoda_type: str 738 | dram: [int], DRAM id it is accessing 739 | var: str, variable name it is accessing 740 | offset: int 741 | """ 742 | SCALAR_ATTRS = 'haoda_type', 'dram', 'var', 'offset' 743 | def __str__(self): 744 | return 'dram'.format(util.lst2str(self.dram), 745 | self.var, self.offset) 746 | 747 | def __repr__(self): 748 | return str(self) 749 | 750 | def __hash__(self): 751 | return hash((self.dram, self.offset)) 752 | 753 | def __eq__(self, other): 754 | return all(getattr(self, attr) == getattr(other, attr) 755 | for attr in ('dram', 'offset')) 756 | @property 757 | def c_expr(self): 758 | return str(self) 759 | 760 | def dram_buf_name(self, bank): 761 | assert bank in self.dram, 'unexpected bank {}'.format(bank) 762 | return 'dram_{}_bank_{}_buf'.format(self.var, bank) 763 | 764 | def dram_fifo_name(self, bank): 765 | assert bank in self.dram, 'unexpected bank {}'.format(bank) 766 | return 'dram_{}_bank_{}_fifo'.format(self.var, bank) 767 | 768 | class ModuleTrait(Node): 769 | """A immutable, hashable trait of a dataflow module. 770 | 771 | Attributes: 772 | lets: tuple of lets 773 | exprs: tuple of exprs 774 | template_types: tuple of template types (TODO) 775 | template_ints: tuple of template ints (TODO) 776 | 777 | Properties: 778 | loads: tuple of FIFORefs 779 | """ 780 | LINEAR_ATTRS = ('lets', 'exprs', 'template_types', 'template_ints') 781 | 782 | def __init__(self, node): 783 | def mutate(obj, loads): 784 | if isinstance(obj, FIFO): 785 | if loads: 786 | if obj not in loads: 787 | load_id = next(reversed(loads.values())).ref_id+1 788 | else: 789 | return loads[obj] 790 | else: 791 | load_id = 0 792 | fifo_ref = FIFORef(fifo=obj, lat=obj.read_lat, ref_id=load_id) 793 | loads[obj] = fifo_ref 794 | return fifo_ref 795 | return obj 796 | loads = collections.OrderedDict() 797 | node = node.visit_loads(mutate, loads) 798 | self.loads = tuple(loads.values()) 799 | super().__init__(lets=tuple(node.lets), exprs=tuple(node.exprs.values()), 800 | template_types=tuple(), template_ints=tuple()) 801 | _logger.debug('Signature: %s', self) 802 | 803 | def __repr__(self): 804 | return '%s(loads: %s, lets: %s, exprs: %s)' % ( 805 | type(self).__name__, 806 | util.idx2str(self.loads), 807 | util.idx2str(self.lets), 808 | util.idx2str(self.exprs)) 809 | 810 | @property 811 | def dram_reads(self): 812 | return self._interfaces['dram_reads'] 813 | 814 | @property 815 | def dram_writes(self): 816 | return self._interfaces['dram_writes'] 817 | 818 | @property 819 | def input_fifos(self): 820 | return self._interfaces['input_fifos'] 821 | 822 | @property 823 | def output_fifos(self): 824 | return self._interfaces['output_fifos'] 825 | 826 | @cached_property.cached_property 827 | def _interfaces(self): 828 | # find dram reads 829 | reads_in_lets = tuple(_.expr for _ in self.lets) 830 | reads_in_exprs = tuple(self.exprs) 831 | dram_reads = collections.OrderedDict() 832 | for dram_ref in visitor.get_dram_refs(reads_in_lets + reads_in_exprs): 833 | for bank in dram_ref.dram: 834 | dram_reads[(dram_ref.var, bank)] = (dram_ref, bank) 835 | dram_reads = tuple(dram_reads.values()) 836 | 837 | # find dram writes 838 | writes_in_lets = tuple(_.name for _ in self.lets 839 | if not isinstance(_.name, str)) 840 | dram_writes = collections.OrderedDict() 841 | for dram_ref in visitor.get_dram_refs(writes_in_lets): 842 | for bank in dram_ref.dram: 843 | dram_writes[(dram_ref.var, bank)] = (dram_ref, bank) 844 | dram_writes = tuple(dram_writes.values()) 845 | 846 | output_fifos = tuple('{}{}'.format(FIFORef.ST_PREFIX, idx) 847 | for idx, expr in enumerate(self.exprs)) 848 | input_fifos = tuple(_.ld_name for _ in self.loads) 849 | 850 | return { 851 | 'dram_writes' : dram_writes, 852 | 'output_fifos' : output_fifos, 853 | 'input_fifos' : input_fifos, 854 | 'dram_reads' : dram_reads 855 | } 856 | 857 | def make_var(val): 858 | """Make literal Var from val.""" 859 | return Var(name=val, idx=()) 860 | 861 | def str2int(s, none_val=None): 862 | if s is None: 863 | return none_val 864 | while s[-1] in 'UuLl': 865 | s = s[:-1] 866 | if s[0:2] == '0x' or s[0:2] == '0X': 867 | return int(s, 16) 868 | if s[0:2] == '0b' or s[0:2] == '0B': 869 | return int(s, 2) 870 | if s[0] == '0': 871 | return int(s, 8) 872 | return int(s) 873 | 874 | def parenthesize(expr) -> str: 875 | return '({})'.format(unparenthesize(expr)) 876 | 877 | def unparenthesize(expr) -> str: 878 | expr_str = str(expr) 879 | while expr_str.startswith('(') and expr_str.endswith(')'): 880 | expr_str = expr_str[1:-1] 881 | return expr_str 882 | 883 | def get_result_type(operand1, operand2, operator): 884 | for t in ('double', 'float') + sum((('int%d_t'%w, 'uint%d_t'%w) 885 | for w in (64, 32, 16, 8)), tuple()): 886 | if t in (operand1, operand2): 887 | return t 888 | raise util.SemanticError('cannot parse type: %s %s %s' % 889 | (operand1, operator, operand2)) 890 | -------------------------------------------------------------------------------- /src/haoda/ir/arithmetic/__init__.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import logging 3 | 4 | from haoda.ir.arithmetic import base 5 | 6 | _logger = logging.getLogger().getChild(__name__) 7 | 8 | def simplify(expr): 9 | """Simplifies expressions. 10 | 11 | Args: 12 | expr: A haoda.ir.Node or a sequence of haoda.ir.Node. 13 | 14 | Returns: 15 | Simplified haoda.ir.Node or sequence. 16 | """ 17 | 18 | if expr is None: 19 | _logger.debug('None expr, no simplification.') 20 | return expr 21 | 22 | passes = base.compose( 23 | base.flatten, 24 | base.print_tree) 25 | 26 | if isinstance(expr, collections.Iterable): 27 | return type(expr)(map(passes, expr)) 28 | 29 | return passes(expr) 30 | -------------------------------------------------------------------------------- /src/haoda/ir/arithmetic/base.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from haoda import ir 5 | from haoda import util 6 | 7 | _logger = logging.getLogger().getChild(__name__) 8 | 9 | def compose(*funcs): 10 | """Composes functions. The first function in funcs are invoked the first. 11 | """ 12 | # Somehow pylint gives false positive for f and g. 13 | # pylint: disable=undefined-variable 14 | return functools.reduce(lambda g, f: lambda x: f(g(x)), funcs, lambda x: x) 15 | 16 | def flatten(node: ir.Node) -> ir.Node: 17 | """Flattens an node if possible. 18 | 19 | Flattens an node if it is: 20 | + a singleton BinaryOp; or 21 | + a compound BinaryOp with reduction operators; or 22 | + a compound Operand; or 23 | + a Unary with an identity operator. 24 | 25 | An Operand is a compound Operand if and only if its attr is a ir.Node. 26 | 27 | A Unary has identity operator if and only if all its operators are '+' or '-', 28 | and the number of '-' is even; or all of its operators are '!' and the number 29 | of '!' is even. 30 | 31 | Args: 32 | node: ir.Node to flatten. 33 | 34 | Returns: 35 | node: flattened ir.Node. 36 | 37 | Raises: 38 | util.InternalError: if Operand is undefined. 39 | """ 40 | 41 | def visitor(node, args=None): 42 | if isinstance(node, ir.BinaryOp): 43 | 44 | # Flatten singleton BinaryOp 45 | if len(node.operand) == 1: 46 | return flatten(node.operand[0]) 47 | 48 | # Flatten BinaryOp with reduction operators 49 | new_operator, new_operand = [], [] 50 | for child_operator, child_operand in zip((None, *node.operator), 51 | node.operand): 52 | if child_operator is not None: 53 | new_operator.append(child_operator) 54 | # The first operator can always be flattened if two operations has the 55 | # same type. 56 | if child_operator in (None, '||', '&&', *'|&+*') and \ 57 | type(child_operand) is type(node): 58 | new_operator.extend(child_operand.operator) 59 | new_operand.extend(child_operand.operand) 60 | else: 61 | new_operand.append(child_operand) 62 | # At least 1 operand is flattened. 63 | if len(new_operand) > len(node.operand): 64 | return flatten(type(node)(operator=new_operator, operand=new_operand)) 65 | 66 | # Flatten compound Operand 67 | if isinstance(node, ir.Operand): 68 | for attr in node.ATTRS: 69 | val = getattr(node, attr) 70 | if val is not None: 71 | if isinstance(val, ir.Node): 72 | return flatten(val) 73 | break 74 | else: 75 | raise util.InternalError('undefined Operand') 76 | 77 | # Flatten identity unary operators 78 | if isinstance(node, ir.Unary): 79 | minus_count = node.operator.count('-') 80 | if minus_count % 2 == 0: 81 | plus_count = node.operator.count('+') 82 | if plus_count + minus_count == len(node.operator): 83 | return flatten(node.operand) 84 | not_count = node.operator.count('!') 85 | if not_count % 2 == 0 and not_count == len(node.operator): 86 | return flatten(node.operand) 87 | 88 | return node 89 | 90 | if not isinstance(node, ir.Node): 91 | return node 92 | 93 | return node.visit(visitor) 94 | 95 | def print_tree(node, printer=_logger.debug): 96 | """Prints the node type as a tree. 97 | 98 | Args: 99 | node: ir.Node to print. 100 | args: Singleton list of the current tree height. 101 | 102 | Returns: 103 | node: Input ir.Node as-is. 104 | """ 105 | 106 | def pre_recursion(node, args): 107 | args[0] += 1 108 | 109 | def post_recursion(node, args): 110 | args[0] -= 1 111 | 112 | def visitor(node, args): 113 | printer('%s+-%s: %s' % (' ' * args[0], type(node).__name__, node)) 114 | 115 | if not isinstance(node, ir.Node): 116 | return node 117 | 118 | printer('root') 119 | return node.visit(visitor, args=[1], pre_recursion=pre_recursion, 120 | post_recursion=post_recursion) 121 | 122 | def propagate_type(node, symbol_table): 123 | def visitor(node, symbol_table): 124 | if node.haoda_type is None: 125 | if isinstance(node, (ir.Ref, ir.Var)): 126 | node.haoda_type = symbol_table[node.name] 127 | return node 128 | return node.visit(visitor, symbol_table) 129 | -------------------------------------------------------------------------------- /src/haoda/ir/visitor.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from haoda import ir 4 | 5 | def get_dram_refs(obj): 6 | """Get all DRAM references as a tuple. 7 | 8 | Args: 9 | obj: A haoda.ir.Node object or an Iterable of haoda.ir.Node objects. 10 | 11 | Returns: 12 | A tuple of all DRAM references. 13 | 14 | Raises: 15 | TypeError: If obj is not an IR node or a sequence. 16 | """ 17 | def visitor(obj, args): 18 | if isinstance(obj, ir.DRAMRef): 19 | args.append(obj) 20 | return obj 21 | if isinstance(obj, collections.Iterable): 22 | return sum(map(get_dram_refs, obj), ()) 23 | dram_refs = [] 24 | if isinstance(obj, ir.Node): 25 | obj.visit(visitor, dram_refs) 26 | else: 27 | raise TypeError('argument is not an IR node or a sequence') 28 | return tuple(dram_refs) 29 | 30 | def get_read_fifo_set(module): 31 | """Get all read FIFOs as a tuple. Each FIFO only appears once. 32 | 33 | Args: 34 | module: A haoda.ir.Module object. 35 | 36 | Returns: 37 | A tuple of all FIFOs that are read in the module. 38 | 39 | Raises: 40 | TypeError: If argument is not a module. 41 | """ 42 | def visitor(obj, args): 43 | if isinstance(obj, ir.FIFO): 44 | args[obj] = None 45 | return obj 46 | fifo_loads = collections.OrderedDict() 47 | if isinstance(module, ir.Module): 48 | module.visit_loads(visitor, fifo_loads) 49 | else: 50 | raise TypeError('argument is not a module') 51 | return tuple(fifo_loads) 52 | -------------------------------------------------------------------------------- /src/haoda/util.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import logging 3 | import signal 4 | 5 | # constants 6 | COORDS_TILED = 'xyzw' 7 | COORDS_IN_TILE = 'ijkl' 8 | COORDS_IN_ORIG = 'pqrs' 9 | TYPE_WIDTH = { 10 | 'float': 32, 11 | 'double': 64, 12 | 'half': 16 13 | } 14 | MAX_DRAM_BANK = 4 15 | 16 | _logger = logging.getLogger().getChild(__name__) 17 | 18 | class InternalError(Exception): 19 | pass 20 | 21 | class SemanticError(Exception): 22 | pass 23 | 24 | class SemanticWarn(Exception): 25 | pass 26 | 27 | class Printer(): 28 | def __init__(self, out): 29 | self._out = out 30 | self._indent = 0 31 | self._assign = 0 32 | self._comments = [] 33 | self._tab = 2 34 | 35 | def println(self, line='', indent=-1): 36 | if indent < 0: 37 | indent = self._indent 38 | if line: 39 | self._out.write('%s%s\n' % (' '*indent*self._tab, line)) 40 | else: 41 | self._out.write('\n') 42 | 43 | def do_indent(self): 44 | self._indent += 1 45 | 46 | def un_indent(self): 47 | self._indent -= 1 48 | 49 | def do_scope(self, comment=''): 50 | self.println('{') 51 | self.do_indent() 52 | self._comments.append(comment) 53 | 54 | def un_scope(self, comment='', suffix=''): 55 | self.un_indent() 56 | popped_comment = self._comments.pop() 57 | if comment: 58 | self.println('}%s // %s' % (suffix, comment)) 59 | else: 60 | if popped_comment: 61 | self.println('}%s // %s' % (suffix, popped_comment)) 62 | else: 63 | self.println('}%s' % suffix) 64 | 65 | def new_var(self): 66 | self._assign += 1 67 | return self.last_var() 68 | 69 | def last_var(self, offset=-1): 70 | return 'assign_%d' % (self._assign+offset) 71 | 72 | def print_func(self, name, params, suffix='', align=80): 73 | lines = [name+'('] 74 | for param in params: 75 | if ((self._indent + min(1, len(lines)-1))*self._tab+ 76 | len(lines[-1])+len(param+', ')) > align: 77 | lines.append(param+', ') 78 | else: 79 | lines[-1] += param+', ' 80 | if lines[-1][-2:] == ', ': 81 | lines[-1] = lines[-1][:-2]+')'+suffix 82 | line = lines.pop(0) 83 | self.println(line) 84 | if lines: 85 | self.do_indent() 86 | for line in lines: 87 | self.println(line) 88 | self.un_indent() 89 | 90 | @contextlib.contextmanager 91 | def for_(self, *args): 92 | if len(args) == 3: 93 | self.println('for ({}; {}; {}) {{'.format(*args)) 94 | elif len(args) == 2: 95 | self.println('for ({} : {}) {{'.format(*args)) 96 | else: 97 | raise InternalError('for_ takes 2 or 3 arguments') 98 | self.do_indent() 99 | yield 100 | self.un_indent() 101 | self.println('}') 102 | 103 | @contextlib.contextmanager 104 | def do_while(self, cond): 105 | self.println('do {') 106 | self.do_indent() 107 | yield 108 | self.un_indent() 109 | self.println('}} while ({});'.format(cond)) 110 | 111 | @contextlib.contextmanager 112 | def if_(self, cond): 113 | self.println('if ({}) {{'.format(cond)) 114 | self.do_indent() 115 | yield 116 | self.un_indent() 117 | self.println('}') 118 | 119 | @contextlib.contextmanager 120 | def elif_(self, cond): 121 | self.un_indent() 122 | self.println('}} else if ({}) {{'.format(cond)) 123 | self.do_indent() 124 | yield 125 | 126 | @contextlib.contextmanager 127 | def else_(self): 128 | self.un_indent() 129 | self.println('} else {') 130 | self.do_indent() 131 | yield 132 | 133 | def print_define(printer, var, val): 134 | printer.println('#ifndef %s' % var) 135 | printer.println('#define %s %d' % (var, val)) 136 | printer.println('#endif//%s' % var) 137 | 138 | def print_guard(printer, var, val): 139 | printer.println('#ifdef %s' % var) 140 | printer.println('#if %s != %d' % (var, val)) 141 | printer.println('#error %s != %d' % (var, val)) 142 | printer.println('#endif//%s != %d' % (var, val)) 143 | printer.println('#endif//%s' % var) 144 | 145 | def get_c_type(haoda_type): 146 | if haoda_type in { 147 | 'uint8', 'uint16', 'uint32', 'uint64', 148 | 'int8', 'int16', 'int32', 'int64'}: 149 | return haoda_type+'_t' 150 | if haoda_type is None: 151 | return None 152 | if haoda_type == 'float32': 153 | return 'float' 154 | if haoda_type == 'float64': 155 | return 'double' 156 | for token in ('int', 'uint'): 157 | if haoda_type.startswith(token): 158 | return 'ap_{}<{}>'.format(token, haoda_type.replace(token, '')) 159 | return haoda_type 160 | 161 | def get_haoda_type(c_type): 162 | return c_type[:-2] if c_type[-2:] == '_t' else c_type 163 | 164 | def get_width_in_bits(haoda_type): 165 | if isinstance(haoda_type, str): 166 | if haoda_type in TYPE_WIDTH: 167 | return TYPE_WIDTH[haoda_type] 168 | for prefix in 'uint', 'int', 'float': 169 | if haoda_type.startswith(prefix): 170 | return int(haoda_type.lstrip(prefix).split('_')[0]) 171 | else: 172 | if hasattr(haoda_type, 'haoda_type'): 173 | return get_width_in_bits(haoda_type.haoda_type) 174 | raise InternalError('unknown haoda type: %s' % haoda_type) 175 | 176 | def get_width_in_bytes(haoda_type): 177 | return (get_width_in_bits(haoda_type)-1)//8+1 178 | 179 | def is_float(haoda_type): 180 | return haoda_type in {'half', 'double'} or haoda_type.startswith('float') 181 | 182 | def idx2str(idx): 183 | return '(%s)' % ', '.join(map(str, idx)) 184 | 185 | def lst2str(idx): 186 | return '[%s]' % ', '.join(map(str, idx)) 187 | 188 | def get_module_name(module_id): 189 | return 'module_%d' % module_id 190 | 191 | def get_func_name(module_id): 192 | return 'Module%dFunc' % module_id 193 | 194 | get_port_name = lambda name, bank: 'bank_{}_{}'.format(bank, name) 195 | get_port_buf_name = lambda name, bank: 'bank_{}_{}_buf'.format(bank, name) 196 | def get_bundle_name(name, bank): 197 | return '{}_bank_{}'.format(name.replace('<', '_').replace('>', ''), bank) 198 | 199 | def pause_for_debugging(): 200 | if _logger.isEnabledFor(logging.DEBUG): 201 | try: 202 | _logger.debug('pausing for debugging... send Ctrl-C to resume') 203 | signal.pause() 204 | except KeyboardInterrupt: 205 | pass 206 | -------------------------------------------------------------------------------- /src/soda/codegen/xilinx/header.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from haoda import util 4 | 5 | logger = logging.getLogger().getChild(__name__) 6 | 7 | def print_code(stencil, header_file): 8 | logger.info('generate host header code as %s' % header_file.name) 9 | printer = util.Printer(header_file) 10 | println = printer.println 11 | do_indent = printer.do_indent 12 | un_indent = printer.un_indent 13 | println('#ifndef HALIDE_%s_H_' % stencil.app_name.upper()) 14 | println('#define HALIDE_%s_H_' % stencil.app_name.upper()) 15 | println() 16 | 17 | println('#ifndef HALIDE_ATTRIBUTE_ALIGN') 18 | do_indent() 19 | println('#ifdef _MSC_VER') 20 | do_indent() 21 | println('#define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x))') 22 | un_indent() 23 | println('#else') 24 | do_indent() 25 | println('#define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x)))') 26 | un_indent() 27 | println('#endif') 28 | un_indent() 29 | println('#endif//HALIDE_ATTRIBUTE_ALIGN') 30 | println() 31 | 32 | println('#ifndef BUFFER_T_DEFINED') 33 | println('#define BUFFER_T_DEFINED') 34 | println('#include') 35 | println('#include') 36 | println('typedef struct buffer_t {') 37 | do_indent() 38 | println('uint64_t dev;') 39 | println('uint8_t* host;') 40 | println('int32_t extent[4];') 41 | println('int32_t stride[4];') 42 | println('int32_t min[4];') 43 | println('int32_t elem_size;') 44 | println('HALIDE_ATTRIBUTE_ALIGN(1) bool host_dirty;') 45 | println('HALIDE_ATTRIBUTE_ALIGN(1) bool dev_dirty;') 46 | println('HALIDE_ATTRIBUTE_ALIGN(1) uint8_t _padding[10 - sizeof(void *)];') 47 | un_indent() 48 | println('} buffer_t;') 49 | println('#endif//BUFFER_T_DEFINED') 50 | println() 51 | 52 | println('#ifndef HALIDE_FUNCTION_ATTRS') 53 | println('#define HALIDE_FUNCTION_ATTRS') 54 | println('#endif//HALIDE_FUNCTION_ATTRS') 55 | println() 56 | 57 | tensors = stencil.input_names + stencil.output_names + stencil.param_names 58 | println('int {}({}const char* xclbin) HALIDE_FUNCTION_ATTRS;'.format( 59 | stencil.app_name, 60 | ''.join(map('buffer_t *var_{}_buffer, '.format, tensors)))) 61 | println() 62 | 63 | println('#endif//HALIDE_%s_H_' % stencil.app_name.upper()) 64 | println() 65 | -------------------------------------------------------------------------------- /src/soda/codegen/xilinx/hls_kernel.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import functools 3 | import logging 4 | import operator 5 | 6 | from haoda import ir 7 | from haoda import util 8 | from haoda.ir import visitor 9 | 10 | _logger = logging.getLogger().getChild(__name__) 11 | 12 | def _print_interface(printer, kernel_name, inputs, outputs, super_source): 13 | """Prints the top-level module for the given arguments. 14 | 15 | Prints the top-level interfaces and sub-module instances with proper interface 16 | pragmas, hls::stream declarations and references, and module function calls. 17 | Currently only streaming applications are supported. 18 | 19 | Args: 20 | printer: Printer to which the code is emitted. 21 | kernel_name: str, name of the kernel. 22 | inputs: Sequence of (name, c_type, bank, depth) tuples, specifies the m_axi 23 | input interfaces. 24 | outputs: Sequence of (name, c_type, bank, depth) tuples, specifies the m_axi 25 | output interfaces. 26 | super_source: SuperSourceNode of a DAG of HAODA nodes. 27 | """ 28 | println = printer.println 29 | do_indent = printer.do_indent 30 | un_indent = printer.un_indent 31 | do_scope = printer.do_scope 32 | un_scope = printer.un_scope 33 | 34 | get_bundle_name = util.get_bundle_name 35 | get_port_name = util.get_port_name 36 | get_port_buf_name = util.get_port_buf_name 37 | 38 | println('extern "C"') 39 | println('{') 40 | println() 41 | println('void %s(' % kernel_name) 42 | do_indent() 43 | for name, c_type, bank, _ in outputs + inputs: 44 | println('{}* {},'.format(c_type, get_port_name(name, bank))) 45 | println('uint64_t coalesced_data_num)') 46 | un_indent() 47 | do_scope() 48 | 49 | for name, c_type, bank, depth in outputs + inputs: 50 | println('#pragma HLS interface m_axi port={} offset=slave depth={} bundle={' 51 | '}'.format(get_port_name(name, bank), depth, 52 | get_bundle_name(name, bank)), 0) 53 | 54 | println() 55 | for name, _, bank, _ in outputs + inputs: 56 | println('#pragma HLS interface s_axilite port={} bundle=control'.format( 57 | get_port_name(name, bank)), 0) 58 | 59 | println('#pragma HLS interface s_axilite port=coalesced_data_num ' 60 | 'bundle=control', 0) 61 | println('#pragma HLS interface s_axilite port=return bundle=control', 0) 62 | println() 63 | 64 | # port buf declarations 65 | for name, c_type, bank, _ in inputs + outputs: 66 | println('hls::stream> {name}("{name}");'.format( 67 | name=get_port_buf_name(name, bank), c_type=c_type)) 68 | # port buf depths 69 | println('#pragma HLS stream variable={} depth=32'.format( 70 | get_port_buf_name(name, bank)), 0) 71 | println('#pragma HLS data_pack variable={}'.format( 72 | get_port_buf_name(name, bank)), indent=0) 73 | println() 74 | 75 | # internal fifos 76 | for node in super_source.tpo_node_gen(): 77 | for fifo in node.fifos: 78 | println('hls::stream> {1}("{1}");'.format(fifo.c_type, 79 | fifo.c_expr)) 80 | println('#pragma HLS stream variable={} depth={}'.format( 81 | fifo.c_expr, 82 | max(fifo.depth, 512 // util.get_width_in_bits(fifo.haoda_type))), 0) 83 | println('#pragma HLS data_pack variable={}'.format(fifo.c_expr), 84 | indent=0) 85 | 86 | println() 87 | 88 | println('#pragma HLS dataflow', 0) 89 | for name, _, bank, _ in inputs: 90 | println('BurstRead(&{}, {}, coalesced_data_num);'.format( 91 | get_port_buf_name(name, bank), get_port_name(name, bank))) 92 | 93 | for node in super_source.tpo_node_gen(): 94 | module_trait_id = super_source.module_table[node][1] 95 | _print_module_func_call(printer, node, module_trait_id) 96 | 97 | for name, _, bank, _ in outputs: 98 | println('BurstWrite({}, &{}, coalesced_data_num);'.format( 99 | get_port_name(name, bank), get_port_buf_name(name, bank))) 100 | 101 | un_scope() 102 | println() 103 | println('}//extern "C"') 104 | 105 | def print_header(printer): 106 | println = printer.println 107 | for header in ['float', 'math', 'stdbool', 'stddef', 'stdint', 'stdio', 108 | 'string', 'ap_int', 'hls_stream']: 109 | println('#include<%s.h>' % header) 110 | println() 111 | 112 | def _print_burst_read(printer): 113 | println = printer.println 114 | do_scope = printer.do_scope 115 | un_scope = printer.un_scope 116 | println('void BurstRead(hls::stream>>* to, ap_uint<' 117 | 'BURST_WIDTH>* from, uint64_t data_num)') 118 | do_scope() 119 | println('load_epoch:', 0) 120 | println('for (uint64_t epoch = 0; epoch < data_num;)') 121 | do_scope() 122 | println('#pragma HLS pipeline II=1', 0) 123 | println('const uint64_t next_epoch = epoch + 1;') 124 | println('WriteData(to, from[epoch], next_epoch < data_num);') 125 | println('epoch = next_epoch;') 126 | un_scope() 127 | un_scope() 128 | 129 | def _print_burst_write(printer): 130 | println = printer.println 131 | do_scope = printer.do_scope 132 | un_scope = printer.un_scope 133 | println('void BurstWrite(ap_uint* to, hls::stream>>* from, uint64_t data_num)') 135 | do_scope() 136 | println('store_epoch:', 0) 137 | println('for (uint64_t epoch = 0; epoch < data_num; ++epoch)') 138 | do_scope() 139 | println('#pragma HLS pipeline II=1', 0) 140 | println('ap_uint buf;') 141 | println('ReadData(&buf, from);') 142 | println('to[epoch] = buf;') 143 | un_scope() 144 | un_scope() 145 | 146 | def print_code(stencil, output_file): 147 | _logger.info('generate kernel code as %s' % output_file.name) 148 | printer = util.Printer(output_file) 149 | 150 | print_header(printer) 151 | 152 | printer.println() 153 | 154 | util.print_define(printer, 'BURST_WIDTH', stencil.burst_width) 155 | printer.println() 156 | 157 | util.print_guard(printer, 'UNROLL_FACTOR', stencil.unroll_factor) 158 | for i in range(len(stencil.tile_size)-1): 159 | util.print_guard(printer, 'TILE_SIZE_DIM_%d' % i, stencil.tile_size[i]) 160 | util.print_guard(printer, 'BURST_WIDTH', stencil.burst_width) 161 | printer.println() 162 | 163 | _print_data_struct(printer) 164 | _print_reinterpret(printer) 165 | _print_read_data(printer) 166 | _print_write_data(printer) 167 | 168 | _print_burst_read(printer) 169 | _print_burst_write(printer) 170 | 171 | for module_trait_id, module_trait in enumerate(stencil.module_traits): 172 | _print_module_definition(printer, module_trait, module_trait_id, 173 | burst_width=stencil.burst_width) 174 | 175 | outputs = [] 176 | inputs = [] 177 | for stmt in stencil.output_stmts: 178 | for bank in sorted(stmt.dram): 179 | outputs.append((stmt.name, 'ap_uint<%d>' % stencil.burst_width, bank, 180 | 65536)) 181 | for stmt in stencil.input_stmts: 182 | for bank in sorted(stmt.dram): 183 | inputs.append((stmt.name, 'ap_uint<%d>' % stencil.burst_width, bank, 184 | 65536)) 185 | for stmt in stencil.param_stmts: 186 | inputs.append(('var_%s' % stmt.name, stmt.type, 0, 187 | functools.reduce(operator.mul, stmt.size))) 188 | _print_interface(printer, stencil.app_name + '_kernel', inputs, outputs, 189 | stencil.dataflow_super_source) 190 | 191 | def _print_module_func_call(printer, node, module_trait_id, **kwargs): 192 | println = printer.println 193 | print_func = printer.print_func 194 | func_name = util.get_func_name(module_trait_id) 195 | 196 | dram_reads = tuple( 197 | '/* input*/ &' + util.get_port_buf_name(dram_ref.var, bank) 198 | for dram_ref, bank in node.dram_reads) 199 | dram_writes = tuple( 200 | '/*output*/ &' + util.get_port_buf_name(dram_ref.var, bank) 201 | for dram_ref, bank in node.dram_writes) 202 | output_fifos = tuple('/*output*/ &' + _ for _ in node.output_fifos) 203 | input_fifos = tuple('/* input*/ &' + _ for _ in node.input_fifos) 204 | params = dram_writes + output_fifos + input_fifos + dram_reads 205 | 206 | print_func(func_name, params, suffix=';', align=0) 207 | 208 | # pylint: disable=too-many-branches,too-many-statements 209 | def _print_module_definition(printer, module_trait, module_trait_id, **kwargs): 210 | println = printer.println 211 | do_scope = printer.do_scope 212 | un_scope = printer.un_scope 213 | func_name = util.get_func_name(module_trait_id) 214 | func_lower_name = util.get_module_name(module_trait_id) 215 | ii = 1 216 | 217 | def get_delays(obj, delays): 218 | if isinstance(obj, ir.DelayedRef): 219 | delays.append(obj) 220 | return obj 221 | delays = [] 222 | for let in module_trait.lets: 223 | let.visit(get_delays, delays) 224 | for expr in module_trait.exprs: 225 | expr.visit(get_delays, delays) 226 | _logger.debug('delays: %s', delays) 227 | 228 | fifo_loads = tuple('/* input*/ hls::stream>* {}'.format( 229 | _.c_type, _.ld_name) for _ in module_trait.loads) 230 | fifo_stores = tuple('/*output*/ hls::stream>* {}{}'.format( 231 | expr.c_type, ir.FIFORef.ST_PREFIX, idx) 232 | for idx, expr in enumerate(module_trait.exprs)) 233 | 234 | # look for DRAM access 235 | reads_in_lets = tuple(_.expr for _ in module_trait.lets) 236 | writes_in_lets = tuple(_.name for _ in module_trait.lets 237 | if not isinstance(_.name, str)) 238 | reads_in_exprs = module_trait.exprs 239 | dram_reads = visitor.get_dram_refs(reads_in_lets + reads_in_exprs) 240 | dram_writes = visitor.get_dram_refs(writes_in_lets) 241 | dram_read_map = collections.OrderedDict() 242 | dram_write_map = collections.OrderedDict() 243 | all_dram_reads = () 244 | num_bank_map = {} 245 | if dram_reads: # this is an unpacking module 246 | assert not dram_writes, 'cannot read and write DRAM in the same module' 247 | for dram_read in dram_reads: 248 | dram_read_map.setdefault(dram_read.var, 249 | collections.OrderedDict()).setdefault( 250 | dram_read.dram, []).append(dram_read) 251 | _logger.debug('dram read map: %s', dram_read_map) 252 | burst_width = kwargs.pop('burst_width') 253 | for var in dram_read_map: 254 | for dram in dram_read_map[var]: 255 | # number of elements per cycle 256 | batch_size = len(dram_read_map[var][dram]) 257 | dram_read_map[var][dram] = collections.OrderedDict( 258 | (_.offset, _) for _ in dram_read_map[var][dram]) 259 | dram_reads = dram_read_map[var][dram] 260 | num_banks = len(next(iter(dram_reads.values())).dram) 261 | if var in num_bank_map: 262 | assert num_bank_map[var] == num_banks, 'inconsistent num banks' 263 | else: 264 | num_bank_map[var] = num_banks 265 | _logger.debug('dram reads: %s', dram_reads) 266 | assert tuple(sorted(dram_reads.keys())) == tuple(range(batch_size)), \ 267 | 'unexpected DRAM accesses pattern %s' % dram_reads 268 | batch_width = sum(util.get_width_in_bits(_.haoda_type) 269 | for _ in dram_reads.values()) 270 | del dram_reads 271 | if burst_width * num_banks >= batch_width: 272 | assert burst_width * num_banks % batch_width == 0, \ 273 | 'cannot process such a burst' 274 | # a single burst consumed in multiple cycles 275 | coalescing_factor = burst_width * num_banks // batch_width 276 | ii = coalescing_factor 277 | else: 278 | assert batch_width * num_banks % burst_width == 0, \ 279 | 'cannot process such a burst' 280 | # multiple bursts consumed in a single cycle 281 | # reassemble_factor = batch_width // (burst_width * num_banks) 282 | raise util.InternalError('cannot process such a burst yet') 283 | dram_reads = tuple(next(iter(_.values())) 284 | for _ in dram_read_map[var].values()) 285 | all_dram_reads += dram_reads 286 | fifo_loads += tuple( 287 | '/* input*/ hls::stream>>* ' 288 | '{bank_name}'.format( 289 | burst_width=burst_width, bank_name=_.dram_fifo_name(bank)) 290 | for _ in dram_reads for bank in _.dram) 291 | elif dram_writes: # this is a packing module 292 | for dram_write in dram_writes: 293 | dram_write_map.setdefault(dram_write.var, 294 | collections.OrderedDict()).setdefault( 295 | dram_write.dram, []).append(dram_write) 296 | _logger.debug('dram write map: %s', dram_write_map) 297 | burst_width = kwargs.pop('burst_width') 298 | for var in dram_write_map: 299 | for dram in dram_write_map[var]: 300 | # number of elements per cycle 301 | batch_size = len(dram_write_map[var][dram]) 302 | dram_write_map[var][dram] = collections.OrderedDict( 303 | (_.offset, _) for _ in dram_write_map[var][dram]) 304 | dram_writes = dram_write_map[var][dram] 305 | num_banks = len(next(iter(dram_writes.values())).dram) 306 | if var in num_bank_map: 307 | assert num_bank_map[var] == num_banks, 'inconsistent num banks' 308 | else: 309 | num_bank_map[var] = num_banks 310 | _logger.debug('dram writes: %s', dram_writes) 311 | assert tuple(sorted(dram_writes.keys())) == tuple(range(batch_size)), \ 312 | 'unexpected DRAM accesses pattern %s' % dram_writes 313 | batch_width = sum(util.get_width_in_bits(_.haoda_type) 314 | for _ in dram_writes.values()) 315 | del dram_writes 316 | if burst_width * num_banks >= batch_width: 317 | assert burst_width * num_banks % batch_width == 0, \ 318 | 'cannot process such a burst' 319 | # a single burst consumed in multiple cycles 320 | coalescing_factor = burst_width * num_banks // batch_width 321 | ii = coalescing_factor 322 | else: 323 | assert batch_width * num_banks % burst_width == 0, \ 324 | 'cannot process such a burst' 325 | # multiple bursts consumed in a single cycle 326 | # reassemble_factor = batch_width // (burst_width * num_banks) 327 | raise util.InternalError('cannot process such a burst yet') 328 | dram_writes = tuple(next(iter(_.values())) 329 | for _ in dram_write_map[var].values()) 330 | fifo_stores += tuple( 331 | '/*output*/ hls::stream>>* ' 332 | '{bank_name}'.format( 333 | burst_width=burst_width, bank_name=_.dram_fifo_name(bank)) 334 | for _ in dram_writes for bank in _.dram) 335 | 336 | # print function 337 | printer.print_func('void {func_name}'.format(**locals()), 338 | fifo_stores+fifo_loads, align=0) 339 | do_scope(func_name) 340 | 341 | for dram_ref, bank in module_trait.dram_writes: 342 | println('#pragma HLS data_pack variable = {}'.format( 343 | dram_ref.dram_fifo_name(bank)), 0) 344 | for arg in module_trait.output_fifos: 345 | println('#pragma HLS data_pack variable = %s' % arg, 0) 346 | for arg in module_trait.input_fifos: 347 | println('#pragma HLS data_pack variable = %s' % arg, 0) 348 | for dram_ref, bank in module_trait.dram_reads: 349 | println('#pragma HLS data_pack variable = {}'.format( 350 | dram_ref.dram_fifo_name(bank)), 0) 351 | 352 | # print inter-iteration declarations 353 | for delay in delays: 354 | println(delay.c_buf_decl) 355 | println(delay.c_ptr_decl) 356 | 357 | # print loop 358 | println('{}_epoch:'.format(func_lower_name), indent=0) 359 | println('for (bool enable = true; enable;)') 360 | do_scope('for {}_epoch'.format(func_lower_name)) 361 | println('#pragma HLS pipeline II=%d' % ii, 0) 362 | for delay in delays: 363 | println('#pragma HLS dependence variable=%s inter false' % 364 | delay.buf_name, 0) 365 | 366 | # print emptyness tests 367 | println('if (%s)' % (' && '.join( 368 | '!{fifo}->empty()'.format(fifo=fifo) 369 | for fifo in tuple(_.ld_name for _ in module_trait.loads) + 370 | tuple(_.dram_fifo_name(bank) 371 | for _ in all_dram_reads for bank in _.dram)))) 372 | do_scope('if not empty') 373 | 374 | # print intra-iteration declarations 375 | for fifo_in in module_trait.loads: 376 | println('{fifo_in.c_type} {fifo_in.ref_name};'.format(**locals())) 377 | for var in dram_read_map: 378 | for dram in (next(iter(_.values())) for _ in dram_read_map[var].values()): 379 | for bank in dram.dram: 380 | println('ap_uint<{}> {};'.format(burst_width, dram.dram_buf_name(bank))) 381 | for var in dram_write_map: 382 | for dram in (next(iter(_.values())) for _ in dram_write_map[var].values()): 383 | for bank in dram.dram: 384 | println('ap_uint<{}> {};'.format(burst_width, dram.dram_buf_name(bank))) 385 | 386 | # print enable conditions 387 | if not dram_write_map: 388 | for fifo_in in module_trait.loads: 389 | println('const bool {fifo_in.ref_name}_enable = ' 390 | 'ReadData(&{fifo_in.ref_name}, {fifo_in.ld_name});'.format(**locals())) 391 | for dram in all_dram_reads: 392 | for bank in dram.dram: 393 | println('const bool {dram_buf_name}_enable = ' 394 | 'ReadData(&{dram_buf_name}, {dram_fifo_name});'.format( 395 | dram_buf_name=dram.dram_buf_name(bank), 396 | dram_fifo_name=dram.dram_fifo_name(bank))) 397 | if not dram_write_map: 398 | println('const bool enabled = %s;' % ( 399 | ' && '.join(tuple('{_.ref_name}_enable'.format(_=_) 400 | for _ in module_trait.loads) + 401 | tuple('{}_enable'.format(_.dram_buf_name(bank)) 402 | for _ in all_dram_reads for bank in _.dram)))) 403 | println('enable = enabled;') 404 | 405 | # print delays (if any) 406 | for delay in delays: 407 | println('const {} {};'.format(delay.c_type, delay.c_buf_load)) 408 | 409 | # print lets 410 | def mutate_dram_ref_for_writes(obj, kwargs): 411 | if isinstance(obj, ir.DRAMRef): 412 | coalescing_idx = kwargs.pop('coalescing_idx') 413 | unroll_factor = kwargs.pop('unroll_factor') 414 | type_width = util.get_width_in_bits(obj.haoda_type) 415 | elem_idx = coalescing_idx * unroll_factor + obj.offset 416 | num_banks = num_bank_map[obj.var] 417 | bank = obj.dram[elem_idx % num_banks] 418 | lsb = (elem_idx // num_banks) * type_width 419 | msb = lsb + type_width - 1 420 | return ir.Var(name='{}({msb}, {lsb})'.format( 421 | obj.dram_buf_name(bank), msb=msb, lsb=lsb), idx=()) 422 | return obj 423 | 424 | # mutate dram ref for writes 425 | if dram_write_map: 426 | for coalescing_idx in range(coalescing_factor): 427 | for fifo_in in module_trait.loads: 428 | if coalescing_idx == coalescing_factor - 1: 429 | prefix = 'const bool {fifo_in.ref_name}_enable = '.format( 430 | fifo_in=fifo_in) 431 | else: 432 | prefix = '' 433 | println('{prefix}ReadData(&{fifo_in.ref_name},' 434 | ' {fifo_in.ld_name});'.format(fifo_in=fifo_in, prefix=prefix)) 435 | if coalescing_idx == coalescing_factor - 1: 436 | println('const bool enabled = %s;' % ( 437 | ' && '.join(tuple('{_.ref_name}_enable'.format(_=_) 438 | for _ in module_trait.loads) + 439 | tuple('{}_enable'.format(_.dram_buf_name(bank)) 440 | for _ in dram_reads for bank in _.dram)))) 441 | println('enable = enabled;') 442 | for idx, let in enumerate(module_trait.lets): 443 | let = let.visit(mutate_dram_ref_for_writes, { 444 | 'coalescing_idx': coalescing_idx, 'unroll_factor': len( 445 | dram_write_map[let.name.var][let.name.dram])}) 446 | println('{} = Reinterpret>({});'.format( 447 | let.name, let.expr.c_expr, 448 | width=util.get_width_in_bits(let.expr.haoda_type))) 449 | for var in dram_write_map: 450 | for dram in (next(iter(_.values())) 451 | for _ in dram_write_map[var].values()): 452 | for bank in dram.dram: 453 | println('WriteData({}, {}, enabled);'.format( 454 | dram.dram_fifo_name(bank), dram.dram_buf_name(bank))) 455 | else: 456 | for let in module_trait.lets: 457 | println(let.c_expr) 458 | 459 | def mutate_dram_ref_for_reads(obj, kwargs): 460 | if isinstance(obj, ir.DRAMRef): 461 | coalescing_idx = kwargs.pop('coalescing_idx') 462 | unroll_factor = kwargs.pop('unroll_factor') 463 | type_width = util.get_width_in_bits(obj.haoda_type) 464 | elem_idx = coalescing_idx * unroll_factor + obj.offset 465 | num_banks = num_bank_map[obj.var] 466 | bank = expr.dram[elem_idx % num_banks] 467 | lsb = (elem_idx // num_banks) * type_width 468 | msb = lsb + type_width - 1 469 | return ir.Var( 470 | name='Reinterpret<{c_type}>(static_cast>(' 471 | '{dram_buf_name}({msb}, {lsb})))'.format( 472 | c_type=obj.c_type, dram_buf_name=obj.dram_buf_name(bank), 473 | msb=msb, lsb=lsb, width=msb-lsb+1), idx=()) 474 | return obj 475 | 476 | # mutate dram ref for reads 477 | if dram_read_map: 478 | for coalescing_idx in range(coalescing_factor): 479 | for idx, expr in enumerate(module_trait.exprs): 480 | println('WriteData({}{}, {}, {});'.format( 481 | ir.FIFORef.ST_PREFIX, idx, 482 | expr.visit(mutate_dram_ref_for_reads, { 483 | 'coalescing_idx': coalescing_idx, 'unroll_factor': len( 484 | dram_read_map[expr.var][expr.dram])}).c_expr, 485 | 'true' if coalescing_idx < coalescing_factor - 1 else 'enabled')) 486 | else: 487 | for idx, expr in enumerate(module_trait.exprs): 488 | println('WriteData({}{}, {}({}), enabled);'.format( 489 | ir.FIFORef.ST_PREFIX, idx, expr.c_type, expr.c_expr)) 490 | 491 | for delay in delays: 492 | println(delay.c_buf_store) 493 | println('{} = {};'.format(delay.ptr, delay.c_next_ptr_expr)) 494 | 495 | un_scope() 496 | un_scope() 497 | un_scope() 498 | _logger.debug('printing: %s', module_trait) 499 | 500 | def _print_data_struct(printer): 501 | println = printer.println 502 | println('template struct Data') 503 | printer.do_scope() 504 | println('T data;') 505 | println('bool ctrl;') 506 | printer.un_scope(suffix=';') 507 | 508 | def _print_reinterpret(printer): 509 | println = printer.println 510 | println('template') 511 | println('inline To Reinterpret(const From& val)') 512 | printer.do_scope() 513 | println('return reinterpret_cast(val);') 514 | printer.un_scope() 515 | 516 | def _print_read_data(printer): 517 | println = printer.println 518 | println('template inline bool ReadData' 519 | '(T* data, hls::stream>* from)') 520 | printer.do_scope() 521 | println('#pragma HLS inline', indent=0) 522 | println('const Data& tmp = from->read();') 523 | println('*data = tmp.data;') 524 | println('return tmp.ctrl;') 525 | printer.un_scope() 526 | 527 | def _print_write_data(printer): 528 | println = printer.println 529 | println('template inline void WriteData' 530 | '(hls::stream>* to, const T& data, bool ctrl)') 531 | printer.do_scope() 532 | println('#pragma HLS inline', indent=0) 533 | println('Data tmp;') 534 | println('tmp.data = data;') 535 | println('tmp.ctrl = ctrl;') 536 | println('to->write(tmp);') 537 | printer.un_scope() 538 | -------------------------------------------------------------------------------- /src/soda/codegen/xilinx/opencl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | import tempfile 5 | 6 | from soda.codegen.xilinx import header 7 | from soda.codegen.xilinx import host 8 | from soda.codegen.xilinx import hls_kernel as kernel 9 | from soda.codegen.xilinx import rtl_kernel 10 | 11 | def add_arguments(parser): 12 | parser.add_argument( 13 | '--xocl', type=str, dest='output_dir', metavar='dir', nargs='?', const='', 14 | help='directory to generate kernel and host code; default names are' 15 | 'used; default to the current working directory; may be overridden by ' 16 | '--xocl-header, --xocl-host, or --xocl-kernel') 17 | parser.add_argument( 18 | '--xocl-header', type=str, dest='header_file', metavar='file', 19 | help='host C++ header code; overrides --xocl') 20 | parser.add_argument( 21 | '--xocl-host', type=str, dest='host_file', metavar='file', 22 | help='host C++ source code for the Xilinx OpenCL flow; overrides --xocl') 23 | parser.add_argument( 24 | '--xocl-kernel', type=str, dest='kernel_file', metavar='file', 25 | help='Vivado HLS C++ kernel code for the Xilinx OpenCL flow; overrides ' 26 | '--xocl') 27 | parser.add_argument( 28 | '--xocl-platform', type=str, dest='xocl_platform', metavar='dir', 29 | help='SDAccel platform directory of the Xilinx OpenCL flow') 30 | parser.add_argument('--xocl-hw-xo', type=str, dest='xo_file', metavar='file', 31 | help='hardware object file for the Xilinx OpenCL flow') 32 | 33 | def print_code(stencil, args): 34 | if args.kernel_file is not None: 35 | with tempfile.TemporaryFile(mode='w+') as tmp: 36 | kernel.print_code(stencil, tmp) 37 | tmp.seek(0) 38 | if args.kernel_file == '-': 39 | shutil.copyfileobj(tmp, sys.stdout) 40 | else: 41 | with open(args.kernel_file, 'w') as kernel_file: 42 | shutil.copyfileobj(tmp, kernel_file) 43 | 44 | if args.host_file is not None: 45 | with tempfile.TemporaryFile(mode='w+') as tmp: 46 | host.print_code(stencil, tmp) 47 | tmp.seek(0) 48 | if args.host_file == '-': 49 | shutil.copyfileobj(tmp, sys.stdout) 50 | else: 51 | with open(args.host_file, 'w') as host_file: 52 | shutil.copyfileobj(tmp, host_file) 53 | 54 | if args.header_file is not None: 55 | with tempfile.TemporaryFile(mode='w+') as tmp: 56 | header.print_code(stencil, tmp) 57 | tmp.seek(0) 58 | if args.header_file == '-': 59 | shutil.copyfileobj(tmp, sys.stdout) 60 | else: 61 | with open(args.header_file, 'w') as header_file: 62 | shutil.copyfileobj(tmp, header_file) 63 | 64 | if args.xo_file is not None: 65 | with tempfile.TemporaryFile(mode='w+b') as tmp: 66 | rtl_kernel.print_code(stencil, tmp, platform=args.xocl_platform) 67 | tmp.seek(0) 68 | if args.xo_file == '-': 69 | shutil.copyfileobj(tmp, sys.stdout) 70 | else: 71 | with open(args.xo_file, 'wb') as xo_file: 72 | shutil.copyfileobj(tmp, xo_file) 73 | 74 | if args.output_dir is not None and (args.kernel_file is None or 75 | args.host_file is None or 76 | args.header_file is None): 77 | if args.kernel_file is None: 78 | dram_in = args.dram_in if args.dram_in else '_' 79 | dram_out = args.dram_out if args.dram_out else '_' 80 | kernel_file_name = os.path.join( 81 | args.output_dir, '%s_kernel-tile%s-unroll%d-ddr%s.cpp' % ( 82 | stencil.app_name, 83 | 'x'.join('%d'%x for x in stencil.tile_size[:-1]), 84 | stencil.unroll_factor, dram_in + '-' + dram_out)) 85 | else: 86 | kernel_file_name = args.kernel_file 87 | with tempfile.TemporaryFile(mode='w+') as tmp: 88 | kernel.print_code(stencil, tmp) 89 | tmp.seek(0) 90 | with open(kernel_file_name, 'w') as kernel_file: 91 | shutil.copyfileobj(tmp, kernel_file) 92 | if args.host_file is None: 93 | host_file_name = os.path.join(args.output_dir, stencil.app_name + '.cpp') 94 | else: 95 | host_file_name = args.host_file 96 | with tempfile.TemporaryFile(mode='w+') as tmp: 97 | host.print_code(stencil, tmp) 98 | tmp.seek(0) 99 | with open(host_file_name, 'w') as host_file: 100 | shutil.copyfileobj(tmp, kernel_file) 101 | if args.header_file is None: 102 | header_file_name = os.path.join(args.output_dir, stencil.app_name + '.h') 103 | else: 104 | header_file_name = args.header_file 105 | with tempfile.TemporaryFile(mode='w+') as tmp: 106 | header.print_code(stencil, tmp) 107 | tmp.seek(0) 108 | with open(header_file_name, 'w') as header_file: 109 | shutil.copyfileobj(tmp, header_file) 110 | -------------------------------------------------------------------------------- /src/soda/codegen/xilinx/rtl_kernel.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import concurrent 3 | import logging 4 | import os 5 | import shutil 6 | import sys 7 | import tarfile 8 | import tempfile 9 | 10 | from haoda import util 11 | from haoda.backend import xilinx as backend 12 | from soda.codegen.xilinx import hls_kernel 13 | 14 | _logger = logging.getLogger().getChild(__name__) 15 | 16 | def print_code(stencil, xo_file, platform=None, jobs=os.cpu_count()): 17 | """Generate hardware object file for the given Stencil. 18 | 19 | Working `vivado` and `vivado_hls` is required in the PATH. 20 | 21 | Args: 22 | stencil: Stencil object to generate from. 23 | xo_file: file object to write to. 24 | platform: path to the SDAccel platform directory. 25 | jobs: maximum number of jobs running in parallel. 26 | """ 27 | 28 | m_axi_names = [] 29 | m_axi_bundles = [] 30 | inputs = [] 31 | outputs = [] 32 | for stmt in stencil.output_stmts + stencil.input_stmts: 33 | for bank in stmt.dram: 34 | haoda_type = 'uint%d' % stencil.burst_width 35 | bundle_name = util.get_bundle_name(stmt.name, bank) 36 | m_axi_names.append(bundle_name) 37 | m_axi_bundles.append((bundle_name, haoda_type)) 38 | 39 | for stmt in stencil.output_stmts: 40 | for bank in stmt.dram: 41 | haoda_type = 'uint%d' % stencil.burst_width 42 | bundle_name = util.get_bundle_name(stmt.name, bank) 43 | outputs.append((util.get_port_name(stmt.name, bank), bundle_name, 44 | haoda_type, util.get_port_buf_name(stmt.name, bank))) 45 | for stmt in stencil.input_stmts: 46 | for bank in stmt.dram: 47 | haoda_type = 'uint%d' % stencil.burst_width 48 | bundle_name = util.get_bundle_name(stmt.name, bank) 49 | inputs.append((util.get_port_name(stmt.name, bank), bundle_name, 50 | haoda_type, util.get_port_buf_name(stmt.name, bank))) 51 | 52 | top_name = stencil.app_name + '_kernel' 53 | 54 | if 'XDEVICE' in os.environ: 55 | xdevice = os.environ['XDEVICE'].replace(':', '_').replace('.', '_') 56 | if platform is None or not os.path.exists(platform): 57 | platform = os.path.join('/opt/xilinx/platforms', xdevice) 58 | if platform is None or not os.path.exists(platform): 59 | if 'XILINX_SDX' in os.environ: 60 | platform = os.path.join(os.environ['XILINX_SDX'], 'platforms', xdevice) 61 | if platform is None or not os.path.exists(platform): 62 | raise ValueError('Cannot determine platform from environment.') 63 | device_info = backend.get_device_info(platform) 64 | 65 | with tempfile.TemporaryDirectory(prefix='sodac-xrtl-') as tmpdir: 66 | dataflow_kernel = os.path.join(tmpdir, 'dataflow_kernel.cpp') 67 | with open(dataflow_kernel, 'w') as dataflow_kernel_obj: 68 | print_dataflow_hls_interface( 69 | util.Printer(dataflow_kernel_obj), top_name, inputs, outputs) 70 | 71 | kernel_xml = os.path.join(tmpdir, 'kernel.xml') 72 | with open(kernel_xml, 'w') as kernel_xml_obj: 73 | backend.print_kernel_xml(top_name, outputs + inputs, kernel_xml_obj) 74 | 75 | kernel_file = os.path.join(tmpdir, 'kernel.cpp') 76 | with open(kernel_file, 'w') as kernel_fileobj: 77 | hls_kernel.print_code(stencil, kernel_fileobj) 78 | 79 | super_source = stencil.dataflow_super_source 80 | with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor: 81 | threads = [] 82 | for module_id in range(len(super_source.module_traits)): 83 | threads.append(executor.submit( 84 | synthesis_module, tmpdir, [kernel_file], 85 | util.get_func_name(module_id), device_info)) 86 | threads.append(executor.submit( 87 | synthesis_module, tmpdir, [dataflow_kernel], top_name, device_info)) 88 | for future in concurrent.futures.as_completed(threads): 89 | returncode, stdout, stderr = future.result() 90 | log_func = _logger.error if returncode != 0 else _logger.debug 91 | if stdout: 92 | log_func(stdout.decode()) 93 | if stderr: 94 | log_func(stderr.decode()) 95 | if returncode != 0: 96 | util.pause_for_debugging() 97 | sys.exit(returncode) 98 | 99 | hdl_dir = os.path.join(tmpdir, 'hdl') 100 | with open(os.path.join(hdl_dir, 'Dataflow.v'), mode='w') as dataflow_v: 101 | print_top_module(backend.VerilogPrinter(dataflow_v), 102 | stencil.dataflow_super_source, inputs, outputs) 103 | 104 | util.pause_for_debugging() 105 | 106 | xo_filename = os.path.join(tmpdir, stencil.app_name + '.xo') 107 | with backend.PackageXo(xo_filename, top_name, kernel_xml, hdl_dir, 108 | m_axi_names, [dataflow_kernel]) as proc: 109 | stdout, stderr = proc.communicate() 110 | log_func = _logger.error if proc.returncode != 0 else _logger.debug 111 | log_func(stdout.decode()) 112 | log_func(stderr.decode()) 113 | with open(xo_filename, mode='rb') as xo_fileobj: 114 | shutil.copyfileobj(xo_fileobj, xo_file) 115 | 116 | def synthesis_module(tmpdir, kernel_files, module_name, device_info): 117 | """Synthesis a module in kernel files. 118 | 119 | Returns: 120 | (returncode, stdout, stderr) results of the subprocess. 121 | """ 122 | with tempfile.TemporaryFile(mode='w+b') as tarfileobj: 123 | with backend.RunHls( 124 | tarfileobj, kernel_files, module_name, device_info['clock_period'], 125 | device_info['part_num']) as proc: 126 | stdout, stderr = proc.communicate() 127 | if proc.returncode == 0: 128 | tarfileobj.seek(0) 129 | with tarfile.open(mode='r', fileobj=tarfileobj) as tar: 130 | tar.extractall(tmpdir, filter(lambda _: _.name.startswith('hdl'), 131 | tar.getmembers())) 132 | return proc.returncode, stdout, stderr 133 | 134 | FIFO_PORT_SUFFIXES = dict( 135 | data_in='_din', 136 | not_full='_full_n', 137 | write_enable='_write', 138 | data_out='_dout', 139 | not_empty='_empty_n', 140 | read_enable='_read', 141 | not_block='_blk_n') 142 | 143 | 144 | def print_top_module(printer, super_source, inputs, outputs): 145 | println = printer.println 146 | println('`timescale 1 ns / 1 ps') 147 | args = ['ap_clk', 'ap_rst', 'ap_start', 'ap_done', 'ap_continue', 'ap_idle', 148 | 'ap_ready'] 149 | for port_name, _, _, _ in outputs: 150 | args.append('{}_V_V{data_in}'.format(port_name, **FIFO_PORT_SUFFIXES)) 151 | args.append('{}_V_V{not_full}'.format(port_name, **FIFO_PORT_SUFFIXES)) 152 | args.append('{}_V_V{write_enable}'.format(port_name, **FIFO_PORT_SUFFIXES)) 153 | for port_name, _, _, _ in inputs: 154 | args.append('{}_V_V{data_out}'.format(port_name, **FIFO_PORT_SUFFIXES)) 155 | args.append('{}_V_V{not_empty}'.format(port_name, **FIFO_PORT_SUFFIXES)) 156 | args.append('{}_V_V{read_enable}'.format(port_name, **FIFO_PORT_SUFFIXES)) 157 | printer.module('Dataflow', args) 158 | println() 159 | 160 | input_args = 'ap_clk', 'ap_rst', 'ap_start', 'ap_continue' 161 | output_args = 'ap_done', 'ap_idle', 'ap_ready' 162 | 163 | for arg in input_args: 164 | println('input %s;' % arg) 165 | for arg in output_args: 166 | println('output %s;' % arg) 167 | for port_name, _, haoda_type, _ in outputs: 168 | kwargs = dict(port_name=port_name, **FIFO_PORT_SUFFIXES) 169 | println('output [{}:0] {port_name}_V_V{data_in};'.format( 170 | util.get_width_in_bits(haoda_type) - 1, **kwargs)) 171 | println('input {port_name}_V_V{not_full};'.format(**kwargs)) 172 | println('output {port_name}_V_V{write_enable};'.format(**kwargs)) 173 | for port_name, _, haoda_type, _ in inputs: 174 | kwargs = dict(port_name=port_name, **FIFO_PORT_SUFFIXES) 175 | println('input [{}:0] {port_name}_V_V{data_out};'.format( 176 | util.get_width_in_bits(haoda_type) - 1, **kwargs)) 177 | println('input {port_name}_V_V{not_empty};'.format(**kwargs)) 178 | println('output {port_name}_V_V{read_enable};'.format(**kwargs)) 179 | println() 180 | 181 | println("reg ap_done = 1'b0;") 182 | println("reg ap_idle = 1'b1;") 183 | println("reg ap_ready = 1'b0;") 184 | 185 | for port_name, _, haoda_type, _ in outputs: 186 | kwargs = dict(port_name=port_name, **FIFO_PORT_SUFFIXES) 187 | println('reg [{}:0] {port_name}{data_in};'.format( 188 | util.get_width_in_bits(haoda_type) - 1, **kwargs)) 189 | println('wire {port_name}_V_V{write_enable};'.format(**kwargs)) 190 | for port_name, _, haoda_type, _ in inputs: 191 | println('wire {}_V_V{read_enable};'.format(port_name, **FIFO_PORT_SUFFIXES)) 192 | println('reg ap_rst_n_inv;') 193 | with printer.always('*'): 194 | println('ap_rst_n_inv = ap_rst;') 195 | println() 196 | 197 | with printer.always('posedge ap_clk'): 198 | with printer.if_('ap_rst'): 199 | println("ap_done <= 1'b0;") 200 | println("ap_idle <= 1'b1;") 201 | println("ap_ready <= 1'b0;") 202 | printer.else_() 203 | println('ap_idle <= ~ap_start;') 204 | 205 | for port_name, _, _, _ in outputs: 206 | println('reg {}_V_V{not_block};'.format(port_name, **FIFO_PORT_SUFFIXES)) 207 | for port_name, _, _, _ in inputs: 208 | println('reg {}_V_V{not_block};'.format(port_name, **FIFO_PORT_SUFFIXES)) 209 | 210 | with printer.always('*'): 211 | for port_name, _, _, _ in outputs: 212 | println('{port_name}_V_V{not_block} = {port_name}_V_V{not_full};'.format( 213 | port_name=port_name, **FIFO_PORT_SUFFIXES)) 214 | for port_name, _, _, _ in inputs: 215 | println('{port_name}_V_V{not_block} = {port_name}_V_V{not_empty};'.format( 216 | port_name=port_name, **FIFO_PORT_SUFFIXES)) 217 | println() 218 | 219 | for module in super_source.tpo_node_gen(): 220 | for fifo in module.fifos: 221 | kwargs = { 222 | 'name' : fifo.c_expr, 223 | 'msb' : fifo.width_in_bits - 1, 224 | **FIFO_PORT_SUFFIXES 225 | } 226 | println('wire [{msb}:0] {name}{data_in};'.format(**kwargs)) 227 | println('wire {name}{not_full};'.format(**kwargs)) 228 | println('wire {name}{write_enable};'.format(**kwargs)) 229 | println('wire [{msb}:0] {name}{data_out};'.format(**kwargs)) 230 | println('wire {name}{not_empty};'.format(**kwargs)) 231 | println('wire {name}{read_enable};'.format(**kwargs)) 232 | println() 233 | 234 | args = collections.OrderedDict(( 235 | ('clk', 'ap_clk'), 236 | ('reset', 'ap_rst_n_inv'), 237 | ('if_read_ce', "1'b1"), 238 | ('if_write_ce', "1'b1"), 239 | ('if{data_in}'.format(**kwargs), 240 | '{name}{data_in}'.format(**kwargs)), 241 | ('if{not_full}'.format(**kwargs), 242 | '{name}{not_full}'.format(**kwargs)), 243 | ('if{write_enable}'.format(**kwargs), 244 | '{name}{write_enable}'.format(**kwargs)), 245 | ('if{data_out}'.format(**kwargs), 246 | '{name}{data_out}'.format(**kwargs)), 247 | ('if{not_empty}'.format(**kwargs), 248 | '{name}{not_empty}'.format(**kwargs)), 249 | ('if{read_enable}'.format(**kwargs), 250 | '{name}{read_enable}'.format(**kwargs)) 251 | )) 252 | printer.module_instance('fifo_w{width}_d{depth}_A'.format( 253 | width=fifo.width_in_bits, depth=fifo.depth+2), fifo.c_expr, args) 254 | println() 255 | 256 | for module in super_source.tpo_node_gen(): 257 | module_trait, module_trait_id = super_source.module_table[module] 258 | args = collections.OrderedDict((('ap_clk', 'ap_clk'), 259 | ('ap_rst', 'ap_rst_n_inv'), 260 | ('ap_start', "1'b1"))) 261 | for dram_ref, bank in module.dram_writes: 262 | kwargs = dict(port=dram_ref.dram_fifo_name(bank), 263 | fifo=util.get_port_name(dram_ref.var, bank), 264 | **FIFO_PORT_SUFFIXES) 265 | args['{port}_V{data_in}'.format(**kwargs)] = \ 266 | '{fifo}_V_V{data_in}'.format(**kwargs) 267 | args['{port}_V{not_full}'.format(**kwargs)] = \ 268 | '{fifo}_V_V{not_full}'.format(**kwargs) 269 | args['{port}_V{write_enable}'.format(**kwargs)] = \ 270 | '{fifo}_V_V{write_enable}'.format(**kwargs) 271 | for port, fifo in zip(module_trait.output_fifos, module.output_fifos): 272 | kwargs = dict(port=port, fifo=fifo, **FIFO_PORT_SUFFIXES) 273 | args['{port}_V{data_in}'.format(**kwargs)] = \ 274 | '{fifo}{data_in}'.format(**kwargs) 275 | args['{port}_V{not_full}'.format(**kwargs)] = \ 276 | '{fifo}{not_full}'.format(**kwargs) 277 | args['{port}_V{write_enable}'.format(**kwargs)] = \ 278 | '{fifo}{write_enable}'.format(**kwargs) 279 | for port, fifo in zip(module_trait.input_fifos, module.input_fifos): 280 | kwargs = dict(port=port, fifo=fifo, **FIFO_PORT_SUFFIXES) 281 | args['{port}_V{data_out}'.format(**kwargs)] = \ 282 | "{{1'b1, {fifo}{data_out}}}".format(**kwargs) 283 | args['{port}_V{not_empty}'.format(**kwargs)] = \ 284 | '{fifo}{not_empty}'.format(**kwargs) 285 | args['{port}_V{read_enable}'.format(**kwargs)] = \ 286 | '{fifo}{read_enable}'.format(**kwargs) 287 | for dram_ref, bank in module.dram_reads: 288 | kwargs = dict(port=dram_ref.dram_fifo_name(bank), 289 | fifo=util.get_port_name(dram_ref.var, bank), 290 | **FIFO_PORT_SUFFIXES) 291 | args['{port}_V{data_out}'.format(**kwargs)] = \ 292 | "{{1'b1, {fifo}_V_V{data_out}}}".format(**kwargs) 293 | args['{port}_V{not_empty}'.format(**kwargs)] = \ 294 | '{fifo}_V_V{not_empty}'.format(**kwargs) 295 | args['{port}_V{read_enable}'.format(**kwargs)] = \ 296 | '{fifo}_V_V{read_enable}'.format(**kwargs) 297 | printer.module_instance(util.get_func_name(module_trait_id), module.name, 298 | args) 299 | println() 300 | printer.endmodule() 301 | 302 | fifos = set() 303 | for module in super_source.tpo_node_gen(): 304 | for fifo in module.fifos: 305 | fifos.add((fifo.width_in_bits, fifo.depth + 2)) 306 | for fifo in fifos: 307 | printer.fifo_module(*fifo) 308 | 309 | def print_dataflow_hls_interface(printer, top_name, inputs, outputs): 310 | println = printer.println 311 | do_scope = printer.do_scope 312 | un_scope = printer.un_scope 313 | do_indent = printer.do_indent 314 | un_indent = printer.un_indent 315 | m_axi_ports = outputs + inputs 316 | print_func = printer.print_func 317 | 318 | println('#include ') 319 | println('#include ') 320 | println('#include ') 321 | println('#include ') 322 | 323 | println('template') 324 | print_func('void BurstRead', [ 325 | 'hls::stream>* to', 'ap_uint* from', 326 | 'uint64_t data_num'], align=0) 327 | do_scope() 328 | println('load_epoch:', 0) 329 | with printer.for_('uint64_t epoch = 0', 'epoch < data_num', '++epoch'): 330 | println('#pragma HLS pipeline II=1', 0) 331 | println('to->write(from[epoch]);') 332 | un_scope() 333 | 334 | println('template') 335 | print_func('void BurstWrite', [ 336 | 'ap_uint* to', 'hls::stream>* from', 337 | 'uint64_t data_num'], align=0) 338 | do_scope() 339 | println('store_epoch:', 0) 340 | with printer.for_('uint64_t epoch = 0', 'epoch < data_num', '++epoch'): 341 | println('#pragma HLS pipeline II=1', 0) 342 | println('to[epoch] = from->read();') 343 | un_scope() 344 | 345 | params = ['hls::stream<{}>* {}'.format(util.get_c_type(haoda_type), name) 346 | for name, _, haoda_type, _ in m_axi_ports] 347 | print_func('void Dataflow', params, align=0) 348 | do_scope() 349 | for name, _, haoda_type, _ in inputs: 350 | println('volatile {c_type} {name}_read;'.format( 351 | c_type=util.get_c_type(haoda_type), name=name)) 352 | for name, _, haoda_type, _ in inputs: 353 | println('{name}_read = {name}->read();'.format(name=name)) 354 | for name, _, haoda_type, _ in outputs: 355 | println( 356 | '{name}->write({c_type}());'.format( 357 | c_type=util.get_c_type(haoda_type), name=name)) 358 | un_scope() 359 | 360 | params = ['{}* {}'.format(util.get_c_type(haoda_type), name) 361 | for name, _, haoda_type, _ in m_axi_ports] 362 | params.append('uint64_t coalesced_data_num') 363 | print_func('void %s' % top_name, params, align=0) 364 | do_scope() 365 | 366 | println('#pragma HLS dataflow', 0) 367 | 368 | for port_name, bundle_name, _, _ in m_axi_ports: 369 | println('#pragma HLS interface m_axi port={} offset=slave bundle={}'.format( 370 | port_name, bundle_name), 0) 371 | for port_name, _, _, _ in m_axi_ports: 372 | println('#pragma HLS interface s_axilite port={} bundle=control'.format( 373 | port_name), 0) 374 | println('#pragma HLS interface s_axilite port=coalesced_data_num ' 375 | 'bundle=control', 0) 376 | println('#pragma HLS interface s_axilite port=return bundle=control', 0) 377 | println() 378 | 379 | for _, _, haoda_type, name in m_axi_ports: 380 | println('hls::stream<{c_type}> {name}("{name}");'.format( 381 | c_type=util.get_c_type(haoda_type), name=name)) 382 | println('#pragma HLS stream variable={name} depth=32'.format(name=name), 0) 383 | 384 | for port_name, _, haoda_type, buf_name in inputs: 385 | print_func('BurstRead', [ 386 | '&{name}'.format(name=buf_name), '{name}'.format(name=port_name), 387 | 'coalesced_data_num'], suffix=';', align=0) 388 | 389 | params = ['&{}'.format(name) for _, _, _, name in m_axi_ports] 390 | printer.print_func('Dataflow', params, suffix=';', align=0) 391 | 392 | for port_name, _, haoda_type, buf_name in outputs: 393 | print_func('BurstWrite', [ 394 | '{name}'.format(name=port_name), '&{name}'.format(name=buf_name), 395 | 'coalesced_data_num'], 396 | suffix=';', align=0) 397 | 398 | un_scope() 399 | -------------------------------------------------------------------------------- /src/soda/core.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import copy 3 | import itertools 4 | import logging 5 | import operator 6 | 7 | import cached_property 8 | 9 | from haoda import ir 10 | from haoda import util 11 | from haoda.ir import arithmetic 12 | from soda import dataflow 13 | from soda import grammar 14 | from soda import util as soda_util 15 | from soda import visitor 16 | from soda import mutator 17 | 18 | _logger = logging.getLogger().getChild(__name__) 19 | 20 | class Tensor(): 21 | """A tensor that corresponse to an input, local, or output. 22 | 23 | This class is used in the high-level DAG for stencil dependency analysis. 24 | Each tensor either is an input tensor, or has at least 1 parent tensor, which 25 | will be used to generate this tensor. Meanwhile, each tensor either is an 26 | output tensor, or has at least 1 child tensor, which will be computed using 27 | this tensor. 28 | 29 | Attributes: 30 | haoda_type: str, type of the tensor element. 31 | parents: Dict from str of name of Tensor to Tensor. 32 | children: Dict from str of name of Tensor to Tensor. 33 | st_ref: Ref, name, index, and latency stored. 34 | offset: int, shift offset in terms of data elements 35 | lets: Lets of computation. 36 | expr: Expr of computation. 37 | ld_refs: Dict from str of name to dict of Ref loaded. 38 | ld_delays: Dict from str of name to extra delay of the input. 39 | 40 | Property: 41 | name: str, unique in each SODA program. 42 | st_offset: int, stencil offset in terms of data elements. 43 | st_idx, Tuple of int, the index referenced by its parent stage. 44 | ld_indices: Dict from str of name to dict of accessed indices of the input. 45 | ld_offsets: Dict from str of name to dict of offsets of the input. 46 | """ 47 | def __init__(self, stmt, tile_size): 48 | self.haoda_type = stmt.haoda_type 49 | self._tile_size = tile_size 50 | if isinstance(stmt, grammar.LocalStmtOrOutputStmt): 51 | self.st_ref = copy.copy(stmt.ref) 52 | self.st_ref.parent = self 53 | self.lets = stmt.let 54 | self.expr = stmt.expr 55 | elif isinstance(stmt, grammar.InputStmt): 56 | self._name = stmt.name 57 | self.st_ref = None 58 | self.lets = [] 59 | self.expr = None 60 | else: 61 | raise util.InternalError('cannot initialize a Tensor from %s' % 62 | type(stmt)) 63 | _logger.debug('tensor initialized from stmt `%s`', stmt) 64 | # pylint: disable=protected-access 65 | _logger.debug(' at tx position %d', stmt._tx_position) 66 | 67 | # these fields are to be set externally 68 | self.st_delay = 0 69 | self.parents = collections.OrderedDict() 70 | self.children = collections.OrderedDict() 71 | self.ld_refs = collections.OrderedDict() 72 | self.ld_delays = collections.OrderedDict() 73 | 74 | @property 75 | def name(self): 76 | if self.st_ref is not None: 77 | return self.st_ref.name 78 | return self._name 79 | 80 | @property 81 | def st_idx(self): 82 | if self.st_ref is not None: 83 | return self.st_ref.idx 84 | return (0,)*len(self._tile_size) 85 | 86 | @property 87 | def st_offset(self): 88 | return soda_util.serialize(self.st_idx, self._tile_size) + self.st_delay 89 | 90 | @cached_property.cached_property 91 | def ld_indices(self): 92 | return collections.OrderedDict( 93 | (name, collections.OrderedDict((ref.idx, ref) for ref in refs)) 94 | for name, refs in self.ld_refs.items()) 95 | 96 | @cached_property.cached_property 97 | def ld_offsets(self): 98 | return collections.OrderedDict( 99 | (name, collections.OrderedDict( 100 | (soda_util.serialize(ref.idx, self._tile_size), ref) for ref in refs)) 101 | for name, refs in self.ld_refs.items()) 102 | 103 | @property 104 | def c_type(self): 105 | return util.get_c_type(self.haoda_type) 106 | 107 | def propagate_type(self): 108 | if self.expr is None: 109 | return 110 | 111 | var_types = {} 112 | # pylint: disable=access-member-before-definition 113 | for let in self.lets: 114 | var_types[let.name] = let.haoda_type 115 | 116 | def visit_haoda_type(obj, args): 117 | if obj.haoda_type is None: 118 | if isinstance(obj, ir.Var): 119 | obj.haoda_type = var_types[obj.name] 120 | return obj 121 | 122 | self.lets = tuple(_.visit(visit_haoda_type) for _ in self.lets) 123 | self.expr = self.expr.visit(visit_haoda_type) 124 | self.st_ref = self.st_ref.visit(visit_haoda_type) 125 | 126 | def mutate(self, callback, args=None): 127 | self.lets = tuple(_.visit(callback, args) for _ in self.lets) 128 | self.expr = self.expr.visit(callback, args) 129 | self.st_ref = self.st_ref.visit(callback, args) 130 | 131 | def visit_loads(self, callback, args=None): 132 | for let in self.lets: 133 | let.visit(callback, args) 134 | self.expr.visit(callback, args) 135 | 136 | def __str__(self): 137 | return '''Tensor 138 | {haoda_type}: {name} = {expr} 139 | store: {st_ref} with delay {st_delay} 140 | parents: {parents} 141 | children: {children}'''.format( 142 | name=self.name, haoda_type=self.haoda_type, expr=self.expr, 143 | parents=util.idx2str(self.parents), children=util.idx2str(self.children), 144 | st_ref=str(self.st_ref), st_delay=self.st_delay) 145 | 146 | def is_output(self): 147 | return len(self.children) == 0 148 | 149 | def is_input(self): 150 | return len(self.parents) == 0 151 | 152 | def is_producer(self): 153 | return not self.is_output() 154 | 155 | def is_consumer(self): 156 | return not self.is_input() 157 | 158 | class Stencil(): 159 | """ 160 | Attributes: 161 | iterate: int, number of iteration to implement. 162 | burst_width: int, width of bits for DRAM burst access. 163 | app_name: str, application's name. 164 | tile_size: List of int. 165 | unroll_factor: int. 166 | dim: int. 167 | param_stmts: List of ParamStmt. 168 | input_stmts: List of InputStmt. 169 | local_stmts: List of LocalStmt. 170 | output_stmts: List of OutputStmt. 171 | 172 | Cached properties: 173 | tensors: Dict from str of name to Tensor. 174 | input_names: Tuple of str, names of input tensors. 175 | param_names: Tuple of str, names of param tensors. 176 | local_names: Tuple of str, names of local tensors. 177 | output_names: Tuple of str, names of output tensors. 178 | """ 179 | def __init__(self, **kwargs): 180 | self.iterate = kwargs.pop('iterate') 181 | if self.iterate < 1: 182 | raise util.SemanticError('cannot iterate %d times' % self.iterate) 183 | # platform determined 184 | self.burst_width = kwargs.pop('burst_width') 185 | # application determined 186 | self.app_name = kwargs.pop('app_name') 187 | # parameters that can be explored 188 | self.tile_size = tuple(kwargs.pop('tile_size')) 189 | self.unroll_factor = kwargs.pop('unroll_factor') 190 | # stage-independent 191 | self.dim = kwargs.pop('dim') 192 | self.param_stmts = kwargs.pop('param_stmts') 193 | # stage-specific 194 | self.input_stmts = kwargs.pop('input_stmts') 195 | self.local_stmts = kwargs.pop('local_stmts') 196 | self.output_stmts = kwargs.pop('output_stmts') 197 | 198 | if 'dram_in' in kwargs: 199 | dram_in = kwargs.pop('dram_in') 200 | if dram_in is not None: 201 | if ':' in dram_in: 202 | input_stmt_map = {_.name : _ for _ in self.input_stmts} 203 | for dram_map in dram_in.split('^'): 204 | var_name, bank_list = dram_map.split(':') 205 | if var_name not in input_stmt_map: 206 | raise util.SemanticError('no input named `{}`'.format(var_name)) 207 | input_stmt_map[var_name].dram = tuple(map(int, 208 | bank_list.split('.'))) 209 | else: 210 | for input_stmt in self.input_stmts: 211 | input_stmt.dram = tuple(map(int, dram_in.split('.'))) 212 | 213 | if 'dram_out' in kwargs: 214 | dram_out = kwargs.pop('dram_out') 215 | if dram_out is not None: 216 | if ':' in dram_out: 217 | output_stmt_map = {_.name : _ for _ in self.output_stmts} 218 | for dram_map in dram_out.split(','): 219 | var_name, bank_list = dram_map.split(':') 220 | if var_name not in output_stmt_map: 221 | raise util.SemanticError('no output named `{}`'.format(var_name)) 222 | output_stmt_map[var_name].dram = tuple(map(int, 223 | bank_list.split('.'))) 224 | else: 225 | for output_stmt in self.output_stmts: 226 | output_stmt.dram = tuple(map(int, dram_out.split('.'))) 227 | 228 | if self.iterate > 1: 229 | if len(self.input_stmts) != len(self.output_stmts): 230 | raise util.SemanticError( 231 | 'number of input tensors must be the same as output if iterate > 1 ' 232 | 'times, currently there are %d input(s) but %d output(s)' % 233 | (len(self.input_stmts), len(self.output_stmts))) 234 | if self.input_types != self.output_types: 235 | raise util.SemanticError( 236 | 'input must have the same type(s) as output if iterate > 1 ' 237 | 'times, current input has type %s but output has type %s' % 238 | (util.lst2str(self.input_types), util.lst2str(self.output_types))) 239 | _logger.debug('pipeline %d iterations of [%s] -> [%s]' % (self.iterate, 240 | ', '.join('%s: %s' % (stmt.haoda_type, stmt.name) 241 | for stmt in self.input_stmts), 242 | ', '.join('%s: %s' % (stmt.haoda_type, stmt.name) 243 | for stmt in self.output_stmts))) 244 | 245 | for stmt in itertools.chain(self.local_stmts, self.output_stmts): 246 | _logger.debug('simplify %s', stmt.name) 247 | stmt.expr = arithmetic.simplify(stmt.expr) 248 | stmt.let = arithmetic.simplify(stmt.let) 249 | 250 | # soda frontend successfully parsed 251 | # triggers cached property 252 | # replicate tensors for iterative stencil 253 | # pylint: disable=pointless-statement 254 | self.tensors 255 | _logger.debug('producer tensors: [%s]', 256 | ', '.join(tensor.name for tensor in self.producer_tensors)) 257 | _logger.debug('consumer tensors: [%s]', 258 | ', '.join(tensor.name for tensor in self.consumer_tensors)) 259 | 260 | # TODO: build Ref table and Var table 261 | # generate reuse buffers and get haoda nodes 262 | # pylint: disable=pointless-statement 263 | self.dataflow_super_source 264 | _logger.debug('dataflow: %s', self.dataflow_super_source) 265 | 266 | _logger.debug('module table: %s', dict(self.module_table)) 267 | _logger.debug('module traits: %s', self.module_traits) 268 | 269 | @cached_property.cached_property 270 | def dataflow_super_source(self): 271 | return dataflow.create_dataflow_graph(self) 272 | 273 | @property 274 | def module_table(self): 275 | return self.dataflow_super_source.module_table 276 | 277 | @property 278 | def module_traits(self): 279 | return self.dataflow_super_source.module_traits 280 | 281 | @cached_property.cached_property 282 | def input_types(self): 283 | return tuple(tensor.haoda_type for tensor in self.input_stmts) 284 | 285 | @cached_property.cached_property 286 | def param_types(self): 287 | return tuple(tensor.haoda_type for tensor in self.param_stmts) 288 | 289 | @cached_property.cached_property 290 | def local_types(self): 291 | return tuple(tensor.haoda_type for tensor in self.local_stmts) 292 | 293 | @cached_property.cached_property 294 | def output_types(self): 295 | return tuple(tensor.haoda_type for tensor in self.output_stmts) 296 | 297 | @cached_property.cached_property 298 | def input_names(self): 299 | return tuple(stmt.name for stmt in self.input_stmts) 300 | 301 | @cached_property.cached_property 302 | def param_names(self): 303 | return tuple(stmt.name for stmt in self.param_stmts) 304 | 305 | @cached_property.cached_property 306 | def local_names(self): 307 | return tuple(stmt.name for stmt in self.local_stmts) 308 | 309 | @cached_property.cached_property 310 | def output_names(self): 311 | return tuple(stmt.name for stmt in self.output_stmts) 312 | 313 | @cached_property.cached_property 314 | def symbol_table(self): 315 | """Constructs a mapping from a tensor's name to its type. 316 | 317 | Returns: 318 | tensor_types: dict from name (str) to haoda_type (str). 319 | """ 320 | tensor_types = {} 321 | for name, haoda_type in zip(self.input_names, self.input_types): 322 | tensor_types[name] = haoda_type 323 | for name, haoda_type in zip(self.local_names, self.local_types): 324 | tensor_types[name] = haoda_type 325 | for name, haoda_type in zip(self.output_names, self.output_types): 326 | tensor_types[name] = haoda_type 327 | return tensor_types 328 | 329 | @cached_property.cached_property 330 | def tensors(self): 331 | """Constructs high-level DAG and creates the tensors. 332 | 333 | Returns: 334 | An collections.OrderedDict mapping a tensor's name to the tensor. 335 | """ 336 | # TODO: check for name conflicts 337 | tensor_map = collections.OrderedDict() 338 | for stmt in self.input_stmts: 339 | tensor = Tensor(stmt, self.tile_size) 340 | tensor_map[stmt.name] = tensor 341 | 342 | def name_in_iter(name, iteration): 343 | if name in self.input_names: 344 | if iteration > 0: 345 | return name+'_iter%d' % iteration 346 | return name 347 | if name in self.output_names: 348 | if iteration < self.iterate-1: 349 | return (self.input_names[self.output_names.index(name)]+ 350 | '_iter%d' % (iteration+1)) 351 | return name 352 | if name in self.local_names: 353 | if iteration > 0: 354 | return name+'_iter%d' % iteration 355 | return name 356 | if name in self.param_names: 357 | return name 358 | raise util.InternalError('unknown name: %s' % name) 359 | 360 | for iteration in range(self.iterate): 361 | _logger.debug('iterate %s', iteration) 362 | _logger.debug('map: %s', self.symbol_table) 363 | def mutate_name_callback(obj, mutated): 364 | if isinstance(obj, ir.Ref): 365 | obj.haoda_type = self.symbol_table[obj.name] 366 | # pylint: disable=cell-var-from-loop 367 | obj.name = name_in_iter(obj.name, iteration) 368 | return obj 369 | tensors = [] 370 | for stmt in itertools.chain(self.local_stmts, self.output_stmts): 371 | tensor = Tensor(stmt.visit(mutate_name_callback), self.tile_size) 372 | loads = visitor.get_load_tuple(tensor) 373 | norm_idx = tuple(min(load.idx[d] for load in loads 374 | if load.name not in self.param_names) 375 | for d in range(self.dim)) 376 | if any(norm_idx): 377 | _logger.debug('normalize index of %s: (%s)', 378 | tensor.name, ', '.join(map(str, norm_idx))) 379 | mutator.shift(tensor, norm_idx, excluded=self.param_names) 380 | tensor_map[tensor.name] = tensor 381 | tensors.append(tensor) 382 | 383 | for tensor in tensors: 384 | _logger.debug('%s', tensor) 385 | 386 | for tensor in tensors: 387 | tensor.propagate_type() 388 | loads = visitor.get_load_dict(tensor) 389 | for parent_name, ld_refs in loads.items(): 390 | ld_refs = sorted(ld_refs, key=lambda ref: soda_util.serialize( 391 | ref.idx, self.tile_size)) 392 | parent_tensor = tensor_map[parent_name] 393 | parent_tensor.children[tensor.name] = tensor 394 | tensor.parents[parent_name] = parent_tensor 395 | tensor.ld_refs[parent_name] = ld_refs 396 | 397 | # high-level DAG construction finished 398 | for tensor in tensor_map.values(): 399 | if tensor.name in self.input_names: 400 | _logger.debug(': %s', tensor) 401 | elif tensor.name in self.output_names: 402 | _logger.debug(': %s', tensor) 403 | else: 404 | _logger.debug(': %s', tensor) 405 | return tensor_map 406 | 407 | @cached_property.cached_property 408 | def chronological_tensors(self): 409 | """Computes the offsets of tensors. 410 | 411 | Returns: 412 | A list of Tensor, in chronological order. 413 | """ 414 | _logger.info('calculate tensor offsets') 415 | processing_queue = collections.deque(list(self.input_names)) 416 | processed_tensors = set(self.input_names) 417 | chronological_tensors = list(map(self.tensors.get, self.input_names)) 418 | for tensor in chronological_tensors: 419 | _logger.debug('tensor <%s> is at offset %d' % 420 | (tensor.name, tensor.st_offset)) 421 | _logger.debug('processing queue: %s', processing_queue) 422 | _logger.debug('processed_tensors: %s', processed_tensors) 423 | while processing_queue: 424 | tensor = self.tensors[processing_queue.popleft()] 425 | _logger.debug('inspecting tensor %s\'s children' % tensor.name) 426 | for child in tensor.children.values(): 427 | if ({x.name for x in child.parents.values()} <= processed_tensors 428 | and child.name not in processed_tensors): 429 | # good, all inputs are processed 430 | # can determine offset of current tensor 431 | _logger.debug( 432 | 'input%s for tensor <%s> (i.e. %s) %s processed', 433 | '' if len(child.parents) == 1 else 's', 434 | child.name, 435 | ', '.join([x.name for x in child.parents.values()]), 436 | 'is' if len(child.parents) == 1 else 'are') 437 | stage_offset = soda_util.serialize(child.st_idx, self.tile_size) 438 | 439 | # synchronization check 440 | def sync(tensor, offset): 441 | if tensor is None: 442 | return offset 443 | _logger.debug('index of tensor <%s>: %s', 444 | tensor.name, tensor.st_idx) 445 | stage_offset = soda_util.serialize(tensor.st_idx, self.tile_size) 446 | _logger.debug('offset of tensor <%s>: %d', 447 | tensor.name, stage_offset) 448 | loads = visitor.get_load_dict(tensor) 449 | for name in loads: 450 | loads[name] = tuple(ref.idx for ref in loads[name]) 451 | _logger.debug('loads: %s', ', '.join( 452 | '%s@%s' % (name, util.lst2str(map(util.idx2str, indices))) 453 | for name, indices in loads.items())) 454 | for n in loads: 455 | loads[n] = soda_util.serialize_iter(loads[n], self.tile_size) 456 | for l in loads.values(): 457 | l[0], l[-1] = (stage_offset - max(l), stage_offset - min(l)) 458 | del l[1:-1] 459 | if len(l) == 1: 460 | l.append(l[-1]) 461 | _logger.debug( 462 | 'load offset range in tensor %s: %s', tensor.name, '{%s}' % ( 463 | ', '.join('%s: [%d:%d]' % (n, *v) 464 | for n, v in loads.items()))) 465 | for parent in tensor.parents.values(): 466 | tensor_distance = next(reversed(tensor.ld_offsets[parent.name])) 467 | _logger.debug('tensor distance: %s', tensor_distance) 468 | _logger.debug( 469 | 'want to access tensor <%s> at offset [%d, %d] ' 470 | 'to generate tensor <%s> at offset %d', 471 | parent.name, offset+loads[parent.name][0], 472 | offset+loads[parent.name][-1], tensor.name, offset) 473 | tensor_offset = (parent.st_delay+tensor_distance-stage_offset) 474 | if offset < tensor_offset: 475 | _logger.debug( 476 | 'but tensor <%s> won\'t be available until offset %d', 477 | parent.name, tensor_offset) 478 | offset = tensor_offset 479 | _logger.debug('need to access tensor <%s> at offset [%d, %d] ' 480 | 'to generate tensor <%s> at offset %d', 481 | parent.name, offset+loads[parent.name][0], 482 | offset+loads[parent.name][-1], tensor.name, 483 | offset) 484 | return offset 485 | 486 | _logger.debug('intend to generate tensor <%s> at offset %d', 487 | child.name, child.st_delay) 488 | synced_offset = sync(child, child.st_delay) 489 | _logger.debug('synced offset: %s', synced_offset) 490 | child.st_delay = synced_offset 491 | _logger.debug('decide to generate tensor <%s> at offset %d', 492 | child.name, child.st_delay) 493 | 494 | # add delay 495 | for sibling in child.parents.values(): 496 | delay = child.st_delay - (sibling.st_delay + 497 | list(child.ld_offsets[sibling.name].keys())[-1] - stage_offset) 498 | if delay > 0: 499 | _logger.debug( 500 | 'tensor %s arrives at tensor <%s> at offset %d < %d; ' 501 | 'add %d delay', sibling.name, child.name, 502 | sibling.st_delay+next(reversed( 503 | child.ld_offsets[sibling.name]))-stage_offset, 504 | child.st_delay, delay) 505 | else: 506 | _logger.debug( 507 | 'tensor %s arrives at tensor <%s> at offset %d = %d; good', 508 | sibling.name, child.name, sibling.st_delay+next(reversed( 509 | child.ld_offsets[sibling.name]))-stage_offset, 510 | child.st_delay) 511 | child.ld_delays[sibling.name] = max(delay, 0) 512 | _logger.debug('set delay of |%s <- %s| to %d' % 513 | (child.name, sibling.name, child.ld_delays[sibling.name])) 514 | 515 | processing_queue.append(child.name) 516 | processed_tensors.add(child.name) 517 | chronological_tensors.append(child) 518 | else: 519 | for parent in tensor.parents.values(): 520 | if parent.name not in processed_tensors: 521 | _logger.debug('tensor %s requires tensor <%s> as an input', 522 | tensor.name, parent.name) 523 | _logger.debug('but tensor <%s> isn\'t processed yet', 524 | parent.name) 525 | _logger.debug('add %s to scheduling queue', 526 | parent.name) 527 | processing_queue.append(parent.name) 528 | 529 | _logger.debug('tensors in insertion order: [%s]', 530 | ', '.join(map(str, self.tensors))) 531 | _logger.debug('tensors in chronological order: [%s]', 532 | ', '.join(t.name for t in chronological_tensors)) 533 | 534 | for tensor in self.tensors.values(): 535 | for name, indices in tensor.ld_indices.items(): 536 | _logger.debug('stage index: %s@%s <- %s@%s', 537 | tensor.name, util.idx2str(tensor.st_idx), 538 | name, util.lst2str(util.idx2str(idx) for idx in indices)) 539 | for tensor in self.tensors.values(): 540 | if tensor.is_input(): 541 | continue 542 | _logger.debug('stage expr: %s = %s', tensor.st_ref, tensor.expr) 543 | for tensor in self.tensors.values(): 544 | for name, offsets in tensor.ld_offsets.items(): 545 | _logger.debug('stage offset: %s@%d <- %s@%s', 546 | tensor.name, soda_util.serialize(tensor.st_idx, 547 | self.tile_size), 548 | name, util.lst2str(offsets)) 549 | for tensor in self.tensors.values(): 550 | for name, delay in tensor.ld_delays.items(): 551 | _logger.debug('stage delay: %s <- %s delayed %d' % 552 | (tensor.name, name, delay)) 553 | 554 | return chronological_tensors 555 | 556 | @cached_property.cached_property 557 | def input_partition(self): 558 | pixel_width_i = sum(self.pixel_width_i) 559 | if self.burst_width/pixel_width_i*self.dram_bank/2 > self.unroll_factor/2: 560 | return int(self.burst_width/pixel_width_i*self.dram_bank/2) 561 | return int(self.unroll_factor/2) 562 | 563 | @cached_property.cached_property 564 | def output_partition(self): 565 | pixel_width_o = sum(self.pixel_width_o) 566 | if self.burst_width/pixel_width_o*self.dram_bank/2 > self.unroll_factor/2: 567 | return int(self.burst_width/pixel_width_o*self.dram_bank/2) 568 | return int(self.unroll_factor/2) 569 | 570 | @cached_property.cached_property 571 | def pixel_width_i(self): 572 | return list(map(util.get_width_in_bits, self.input_stmts)) 573 | 574 | @cached_property.cached_property 575 | def pixel_width_o(self): 576 | return list(map(util.get_width_in_bits, self.output_stmts)) 577 | 578 | @cached_property.cached_property 579 | def producer_tensors(self): 580 | return tuple(filter(Tensor.is_producer, self.tensors.values())) 581 | 582 | @cached_property.cached_property 583 | def consumer_tensors(self): 584 | return tuple(filter(Tensor.is_consumer, self.tensors.values())) 585 | 586 | # return [Tensor, ...] 587 | def _get_parent_tensors_for(self, node): 588 | return {x: self.tensors[x] 589 | for x in {x.name for x in node.get_loads() 590 | if x.name not in self.extra_params}} 591 | 592 | # return {name: [(idx, ...), ...]} 593 | def _get_window_for(self, node): 594 | loads = node.get_loads() # [Load, ...] 595 | load_names = {l.name for l in loads 596 | if l.name not in self.extra_params} 597 | windows = {name: sorted({l.idx for l in loads if l.name == name}, 598 | key=lambda x: soda_util.serialize(x, self.tile_size)) 599 | for name in load_names} 600 | _logger.debug('window for %s@(%s) is %s' % 601 | (node.name, ', '.join(map(str, node.expr[0].idx)), windows)) 602 | return windows 603 | 604 | # return [StageExpr, ...] 605 | def _get_expr_for(self, node): 606 | if isinstance(node, grammar.Output): 607 | return node.expr 608 | if isinstance(node, grammar.Local): 609 | return node.expr 610 | raise util.SemanticError('cannot get expression for %s' % str(type(node))) 611 | 612 | @cached_property.cached_property 613 | def reuse_buffers(self): 614 | """Constructs the reuse buffers. 615 | 616 | Returns: 617 | A dict mapping a tensor's name to its reuse buffers. 618 | """ 619 | unroll_factor = self.unroll_factor 620 | self._reuse_buffer_lengths = {} 621 | reuse_buffers = {} 622 | for tensor in self.producer_tensors: 623 | reuse_buffer = _get_reuse_buffer(self.tile_size, tensor, unroll_factor) 624 | reuse_buffer_length = {} 625 | reuse_buffers[tensor.name] = reuse_buffer 626 | self._reuse_buffer_lengths[tensor.name] = reuse_buffer_length 627 | first = [True]*unroll_factor 628 | for start, end in reuse_buffer[1:]: 629 | if first[start%unroll_factor]: 630 | first[start%unroll_factor] = False 631 | if start >= unroll_factor: 632 | reuse_buffer_length[end] = end//unroll_factor 633 | continue 634 | reuse_buffer_length[end] = (end-start)//unroll_factor 635 | return reuse_buffers 636 | 637 | @cached_property.cached_property 638 | def all_points(self): 639 | all_points = {} 640 | for tensor in self.producer_tensors: 641 | all_points[tensor.name] = _get_points(self.tile_size, tensor, 642 | self.unroll_factor) 643 | return all_points 644 | 645 | @cached_property.cached_property 646 | def next_fifo(self): 647 | """Constructs the next fifo offset mapping. 648 | 649 | Returns: 650 | A dict mapping a tensor's name and offset to the next offset. 651 | """ 652 | next_fifo = {} 653 | for name, reuse_buffer in self.reuse_buffers.items(): 654 | next_fifo[name] = {} 655 | for start, end in reuse_buffer[1:]: 656 | if start < end: 657 | next_fifo[name][start] = end 658 | _logger.debug('next_fifo: %s' % next_fifo) 659 | return next_fifo 660 | 661 | @cached_property.cached_property 662 | def reuse_buffer_lengths(self): 663 | """Constructs the reuse buffer lengths. 664 | 665 | Returns: 666 | A dict mapping a tensor's name to its reuse buffers' lengths. 667 | """ 668 | # pylint: disable=pointless-statement 669 | self.reuse_buffers 670 | return self._reuse_buffer_lengths 671 | 672 | def _get_reuse_chains(tile_size, tensor, unroll_factor): 673 | """Generates reuse chains for a Tensor. 674 | 675 | Generates reuse chains for a Tensor under the given tile size and unroll 676 | factor. 677 | 678 | Args: 679 | tile_size: An iterable representing the tile size in each dimension. 680 | tensor: A Tensor to which the reuse chains belongs. 681 | unroll_factor: An int representing the unroll factor. 682 | 683 | Returns: 684 | A list of tuples where each tuple represents a reuse chain and each 685 | element of the tuple represents the offset from the lastest input. 686 | """ 687 | 688 | _logger.debug('get reuse chains of tensor %s', tensor.name) 689 | 690 | def unroll_offsets(offsets, child): 691 | unrolled_offsets = set() 692 | for unroll_idx in range(unroll_factor): 693 | for offset in offsets: 694 | unrolled_offsets.add(max(offsets) + unroll_idx - offset + 695 | child.ld_delays[tensor.name]) 696 | return unrolled_offsets 697 | 698 | A_dag = set() 699 | for child in tensor.children.values(): 700 | A_dag |= unroll_offsets( 701 | soda_util.serialize_iter(child.ld_indices[tensor.name], tile_size), child) 702 | _logger.debug('A† of tensor %s: %s', tensor.name, A_dag) 703 | 704 | chains = [] 705 | for chain_idx in reversed(range(unroll_factor)): 706 | chains.append(tuple(sorted( 707 | offset for offset in A_dag if offset % unroll_factor == chain_idx))) 708 | _logger.debug('reuse chains: %s', chains) 709 | 710 | for idx, chain in enumerate(chains): 711 | _logger.debug('reuse chain %d of tensor %s: %s', idx, tensor.name, chain) 712 | return chains 713 | 714 | def _get_points(tile_size, tensor, unroll_factor): 715 | """Generates offset-to-point mapping for a Tensor. 716 | 717 | Generates a mapping which can be used to determine the accessed point index 718 | from the offset for a Tensor, under the given tile size and unroll factor. 719 | 720 | Args: 721 | tile_size: An iterable representing the tile size in each dimension. 722 | tensor: A Tensor to which the mapping belongs. 723 | unroll_factor: An int representing the unroll factor. 724 | 725 | Returns: 726 | A dict of name str to a dict of offset to a dict of unroll index to 727 | point index. 728 | """ 729 | 730 | all_points = {} # {name: {offset: {unroll_idx: point_idx}}} 731 | for child in tensor.children.values(): 732 | all_points[child.name] = {} 733 | offsets = child.ld_offsets[tensor.name] 734 | for unroll_idx in range(unroll_factor): 735 | for idx, offset in enumerate(offsets): 736 | all_points[child.name].setdefault( 737 | max(offsets) - offset + child.ld_delays[tensor.name] + unroll_idx, 738 | {})[unroll_factor-1-unroll_idx] = idx 739 | for child in tensor.children.values(): 740 | for offset, points in all_points[child.name].items(): 741 | for unroll_idx, point in points.items(): 742 | _logger.debug( 743 | '%s <- %s @ offset=%d <=> %s @ unroll_idx=%d', 744 | child.name, tensor.name, offset, 745 | util.idx2str(list(child.ld_indices[tensor.name].values())[point].idx), 746 | unroll_idx) 747 | return all_points 748 | 749 | def _get_reuse_buffer(tile_size, tensor, unroll_factor): 750 | """Generates reuse buffer for a Tensor. 751 | 752 | Generates a list representing the reuse buffer for a Tensor, under the given 753 | tile size and unroll factor. 754 | 755 | Args: 756 | tile_size: An iterable representing the tile size in each dimension. 757 | tensor: A Tensor to which the mapping belongs. 758 | unroll_factor: An int representing the unroll factor. 759 | 760 | Returns: 761 | A list whose first element is an int representing the length of the 762 | reuse buffer (capacity of data element), followed by unroll_factor 763 | number of (start, end) tuples, where start and end are the offsets from 764 | the lastest input of each piece of the reuse buffer. 765 | """ 766 | 767 | reuse_buffer = [None] # [length, (start, end), (start, end), ...] 768 | offsets = [] 769 | for chain in _get_reuse_chains(tile_size, tensor, unroll_factor): 770 | reuse_buffer.append((chain[0], chain[0])) 771 | offsets.append(chain[0]) 772 | for j in range(len(chain)-1): 773 | reuse_buffer.append((chain[j], chain[j+1])) 774 | offsets.append(chain[j+1]) 775 | reuse_buffer[0] = max(offsets)+1 776 | _logger.debug('reuse chains of tensor %s: %s' % (tensor.name, reuse_buffer)) 777 | return reuse_buffer 778 | 779 | def get_indices_id(indices): 780 | return '_'.join(str(idx).replace('-', 'm') for idx in indices) 781 | 782 | def get_stencil_distance(stencil_window, tile_size): 783 | return (max(soda_util.serialize_iter(stencil_window, tile_size)) + 784 | soda_util.serialize(get_stencil_window_offset(stencil_window), 785 | tile_size)) 786 | 787 | def get_stencil_dim(points): 788 | dimension = len(next(iter(points))) 789 | return [max_index-min_index+1 for max_index, min_index in zip( 790 | [max([point[dim] for point in points]) for dim in range(dimension)], 791 | [min([point[dim] for point in points]) for dim in range(dimension)])] 792 | 793 | _overall_stencil_window_cache = {} 794 | def get_overall_stencil_window(input_tensor, output_tensor): 795 | if isinstance(input_tensor, collections.Iterable): 796 | all_points = tuple(sorted(set.union(*( 797 | set(get_overall_stencil_window(_, output_tensor)) 798 | for _ in input_tensor)))) 799 | _logger.debug( 800 | 'overall stencil window of %s (%s) <- {%s} is %s (%d points)', 801 | output_tensor.name, ', '.join(['0']*len(output_tensor.st_idx)), 802 | ', '.join(_.name for _ in input_tensor), all_points, len(all_points)) 803 | return all_points 804 | # normalize store index to 0 805 | idx = (id(input_tensor), id(output_tensor)) 806 | if idx in _overall_stencil_window_cache: 807 | return _overall_stencil_window_cache[idx] 808 | _logger.debug('get overall stencil window of %s <- %s', 809 | output_tensor.name, input_tensor.name) 810 | all_points = set() 811 | for name, points in output_tensor.ld_indices.items(): 812 | _logger.debug('%s@%s <- %s', output_tensor.name, 813 | util.idx2str(output_tensor.st_idx), 814 | util.idx2str(points.values())) 815 | if name != input_tensor.name: 816 | recursive_points = get_overall_stencil_window( 817 | input_tensor, output_tensor.parents[name]) 818 | _logger.debug('recursive points: %s', util.idx2str(recursive_points)) 819 | all_points |= set.union(*[{ 820 | tuple(map(lambda a, b, c: a + b - c, _, point, output_tensor.st_idx)) 821 | for _ in recursive_points} for point in points]) 822 | else: 823 | all_points |= {tuple(map(operator.sub, point, output_tensor.st_idx)) 824 | for point in points} 825 | all_points = tuple(sorted(all_points)) 826 | _logger.debug('overall stencil window of %s (%s) <- %s is %s (%d points)', 827 | output_tensor.name, ', '.join(['0']*len(output_tensor.st_idx)), 828 | input_tensor.name, all_points, len(all_points)) 829 | _overall_stencil_window_cache[idx] = all_points 830 | return all_points 831 | 832 | def get_stencil_window_offset(stencil_window): 833 | # only works if window is normalized to store at 0 834 | return tuple(-min(p[d] for p in stencil_window) 835 | for d in range(len(next(iter(stencil_window))))) 836 | -------------------------------------------------------------------------------- /src/soda/dataflow.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import logging 3 | 4 | import cached_property 5 | 6 | from haoda import ir 7 | from haoda import util 8 | 9 | _logger = logging.getLogger().getChild(__name__) 10 | 11 | class SuperSourceNode(ir.Module): 12 | """A node representing the super source in the dataflow graph. 13 | 14 | A super source doesn't have parent nodes. 15 | 16 | Attributes: 17 | fwd_nodes: {(tensor_name, offset): node} 18 | cpt_nodes: {(stage_name, pe_id): node} 19 | _paths: {node: [(src, ... dst), ... ]} 20 | _extra_depths: {(src_node, dst_node): extra_depth)} 21 | """ 22 | 23 | def find_paths(self, node): 24 | if not hasattr(self, '_paths'): 25 | self._paths = {self: [(self,)]} 26 | for src_node, dst_node in self.dfs_edge_generator(): 27 | self._paths.setdefault(dst_node, []).extend( 28 | path+(dst_node,) for path in self._paths[src_node]) 29 | return self._paths[node] 30 | 31 | # TODO: make this general and move it to haoda.ir 32 | def get_extra_depth(self, edge): 33 | if not hasattr(self, '_extra_depths'): 34 | self._extra_depths = collections.OrderedDict() 35 | node_heights = collections.OrderedDict() 36 | for node in self.tpo_node_gen(): 37 | node_heights[node] = max( 38 | (node_heights[parent] + parent.get_latency(node) 39 | for parent in node.parents), default=0) 40 | for parent in node.parents: 41 | extra_depth = node_heights[node] - ( 42 | node_heights[parent] + parent.get_latency(node)) 43 | if extra_depth > 0: 44 | self._extra_depths[(parent, node)] = extra_depth 45 | _logger.debug('\033[31moops\033[0m, need to add %d to %s', 46 | extra_depth, (parent, node)) 47 | return self._extra_depths.get(edge, 0) 48 | 49 | @property 50 | def name(self): 51 | return 'super_source' 52 | 53 | @cached_property.cached_property 54 | def module_table(self): 55 | """Returns a Node to (module_trait, module_trait_id) map.""" 56 | self._module_traits = collections.OrderedDict() 57 | module_table = collections.OrderedDict() 58 | for node in self.tpo_node_gen(): 59 | self._module_traits.setdefault(ir.ModuleTrait(node), []).append(node) 60 | for idx, module_trait in enumerate(self._module_traits): 61 | for node in self._module_traits[module_trait]: 62 | module_table[node] = module_trait, idx 63 | return module_table 64 | 65 | @cached_property.cached_property 66 | def module_traits(self): 67 | # pylint: disable=pointless-statement 68 | self.module_table 69 | return tuple(self._module_traits) 70 | 71 | class SuperSinkNode(ir.Module): 72 | """A node representing the super sink in the dataflow graph. 73 | 74 | A super sink doesn't have child nodes. 75 | """ 76 | @property 77 | def name(self): 78 | return 'super_sink' 79 | 80 | 81 | class ForwardNode(ir.Module): 82 | """A node representing a forward module in the dataflow graph. 83 | 84 | Attributes: 85 | tensor: Tensor corresponding to this node. 86 | offset: Int representing the offset of this tensor. 87 | """ 88 | def __init__(self, **kwargs): 89 | super().__init__() 90 | self.tensor = kwargs.pop('tensor') 91 | self.offset = kwargs.pop('offset') 92 | 93 | def __repr__(self): 94 | return '\033[32mforward %s @%d\033[0m' % (self.tensor.name, self.offset) 95 | 96 | @property 97 | def name(self): 98 | return '{}_offset_{}'.format(self.tensor.name, self.offset) 99 | 100 | class ComputeNode(ir.Module): 101 | """A node representing a compute module in the dataflow graph. 102 | 103 | Attributes: 104 | tensor: Tensor corresponding to this node. 105 | pe_id: Int representing the PE id. 106 | fifo_map: {str: {idx: Node}} 107 | """ 108 | def __init__(self, **kwargs): 109 | super().__init__() 110 | self.tensor = kwargs.pop('tensor') 111 | self.pe_id = kwargs.pop('pe_id') 112 | self.fifo_map = collections.defaultdict(dict) 113 | 114 | def __repr__(self): 115 | return '\033[31mcompute %s #%d\033[0m' % (self.tensor.name, self.pe_id) 116 | 117 | @property 118 | def name(self): 119 | return '{}_pe_{}'.format(self.tensor.name, self.pe_id) 120 | 121 | # pylint: disable=too-many-branches,too-many-statements 122 | def create_dataflow_graph(stencil): 123 | chronological_tensors = stencil.chronological_tensors 124 | super_source = SuperSourceNode() 125 | super_sink = SuperSinkNode() 126 | 127 | # {(tensor_name, offset): node} 128 | super_source.fwd_nodes = collections.OrderedDict() 129 | 130 | # {(stage_name, pe_id): node} 131 | super_source.cpt_nodes = collections.OrderedDict() 132 | 133 | def color_id(node): 134 | if node.__class__ is (ir.Module): 135 | return repr(node) 136 | if node.__class__ is SuperSourceNode: 137 | return '\033[33msuper source\033[0m' 138 | if node.__class__ is SuperSinkNode: 139 | return '\033[36msuper sink\033[0m' 140 | if node.__class__ is ForwardNode: 141 | return '\033[32mforward %s @%d\033[0m' % (node.tensor.name, node.offset) 142 | if node.__class__ is ComputeNode: 143 | return '\033[31mcompute %s #%d\033[0m' % (node.tensor.name, node.pe_id) 144 | return 'unknown node' 145 | 146 | def color_attr(node): 147 | result = [] 148 | for k, v in node.__dict__.items(): 149 | if (node.__class__, k) in ((SuperSourceNode, 'parents'), 150 | (SuperSinkNode, 'children')): 151 | continue 152 | if k in ('parents', 'children'): 153 | result.append('%s: [%s]' % (k, ', '.join(map(color_id, v)))) 154 | else: 155 | result.append('%s: %s' % (k, repr(v))) 156 | return '{%s}' % ', '.join(result) 157 | 158 | def color_print(node): 159 | return '%s: %s' % (color_id(node), color_attr(node)) 160 | 161 | print_node = color_id 162 | 163 | next_fifo = stencil.next_fifo 164 | all_points = stencil.all_points 165 | reuse_buffers = stencil.reuse_buffers 166 | 167 | def add_fwd_nodes(src_name): 168 | dsts = all_points[src_name] 169 | reuse_buffer = reuse_buffers[src_name][1:] 170 | nodes_to_add = [] 171 | for dst_point_dicts in dsts.values(): 172 | for offset in dst_point_dicts: 173 | if (src_name, offset) in super_source.fwd_nodes: 174 | continue 175 | fwd_node = ForwardNode( 176 | tensor=stencil.tensors[src_name], offset=offset) 177 | #depth=stencil.get_reuse_buffer_length(src_name, offset)) 178 | _logger.debug('create %s', print_node(fwd_node)) 179 | # init_offsets is the start of each reuse chain 180 | init_offsets = [start for start, end in reuse_buffer if start == end] 181 | if offset in init_offsets: 182 | if src_name in stencil.input_names: 183 | # fwd from external input 184 | super_source.add_child(fwd_node) 185 | else: 186 | # fwd from output of last stage 187 | # tensor name and offset are used to find the cpt node 188 | cpt_offset = next(unroll_idx 189 | for unroll_idx in range(stencil.unroll_factor) 190 | if init_offsets[unroll_idx] == offset) 191 | cpt_node = super_source.cpt_nodes[(src_name, cpt_offset)] 192 | cpt_node.add_child(fwd_node) 193 | super_source.fwd_nodes[(src_name, offset)] = fwd_node 194 | if offset in next_fifo[src_name]: 195 | nodes_to_add.append( 196 | (fwd_node, (src_name, next_fifo[src_name][offset]))) 197 | for src_node, key in nodes_to_add: 198 | # fwd from another fwd node 199 | src_node.add_child(super_source.fwd_nodes[key]) 200 | 201 | for input_name in stencil.input_names: 202 | add_fwd_nodes(input_name) 203 | 204 | for tensor in chronological_tensors: 205 | if tensor.is_input(): 206 | continue 207 | for unroll_index in range(stencil.unroll_factor): 208 | pe_id = stencil.unroll_factor-1-unroll_index 209 | cpt_node = ComputeNode(tensor=tensor, pe_id=pe_id) 210 | _logger.debug('create %s', print_node(cpt_node)) 211 | super_source.cpt_nodes[(tensor.name, pe_id)] = cpt_node 212 | for input_name, input_window in tensor.ld_indices.items(): 213 | for i in range(len(input_window)): 214 | offset = next(offset for offset, points in 215 | all_points[input_name][tensor.name].items() 216 | if pe_id in points and points[pe_id] == i) 217 | fwd_node = super_source.fwd_nodes[(input_name, offset)] 218 | _logger.debug(' access %s', print_node(fwd_node)) 219 | fwd_node.add_child(cpt_node) 220 | if tensor.is_output(): 221 | for pe_id in range(stencil.unroll_factor): 222 | super_source.cpt_nodes[tensor.name, pe_id].add_child(super_sink) 223 | else: 224 | add_fwd_nodes(tensor.name) 225 | 226 | # pylint: disable=too-many-nested-blocks 227 | for src_node in super_source.tpo_node_gen(): 228 | for dst_node in src_node.children: 229 | # 5 possible edge types: 230 | # 1. src => fwd 231 | # 2. fwd => fwd 232 | # 3. fwd => cpt 233 | # 4. cpt => fwd 234 | # 5. cpt => sink 235 | if isinstance(src_node, SuperSourceNode): 236 | write_lat = 0 237 | elif isinstance(src_node, ForwardNode): 238 | write_lat = 2 239 | elif isinstance(src_node, ComputeNode): 240 | write_lat = src_node.tensor.st_ref.lat 241 | else: 242 | raise util.InternalError('unexpected source node: %s' % repr(src_node)) 243 | 244 | depth = 0 245 | fifo = ir.FIFO(src_node, dst_node, depth, write_lat) 246 | if isinstance(src_node, SuperSourceNode): 247 | lets = [] 248 | # TODO: build an index somewhere 249 | for stmt in stencil.input_stmts: 250 | if stmt.name == dst_node.tensor.name: 251 | break 252 | else: 253 | raise util.InternalError('cannot find tensor %s' % 254 | dst_node.tensor.name) 255 | expr = ir.DRAMRef(haoda_type=dst_node.tensor.haoda_type, 256 | # pylint: disable=undefined-loop-variable 257 | dram=stmt.dram, 258 | var=dst_node.tensor.name, 259 | offset=stencil.unroll_factor-1-dst_node.offset) 260 | elif isinstance(src_node, ForwardNode): 261 | if isinstance(dst_node, ComputeNode): 262 | dst = src_node.tensor.children[dst_node.tensor.name] 263 | src_name = src_node.tensor.name 264 | unroll_idx = dst_node.pe_id 265 | point = all_points[src_name][dst.name][src_node.offset][unroll_idx] 266 | idx = list(dst.ld_indices[src_name].values())[point].idx 267 | _logger.debug('%s%s referenced by <%s> @ unroll_idx=%d is %s', 268 | src_name, util.idx2str(idx), dst.name, unroll_idx, 269 | print_node(src_node)) 270 | dst_node.fifo_map[src_name][idx] = fifo 271 | lets = [] 272 | delay = stencil.reuse_buffer_lengths[src_node.tensor.name]\ 273 | [src_node.offset] 274 | offset = src_node.offset - delay 275 | for parent in src_node.parents: # fwd node has only 1 parent 276 | for fifo_r in parent.fifos: 277 | if fifo_r.edge == (parent, src_node): 278 | break 279 | if delay > 0: 280 | # TODO: build an index somewhere 281 | for let in src_node.lets: 282 | # pylint: disable=undefined-loop-variable 283 | if isinstance(let.expr, ir.DelayedRef) and let.expr.ref == fifo_r: 284 | var_name = let.name 285 | var_type = let.haoda_type 286 | break 287 | else: 288 | var_name = 'let_%d' % len(src_node.lets) 289 | # pylint: disable=undefined-loop-variable 290 | var_type = fifo_r.haoda_type 291 | lets.append(ir.Let( 292 | haoda_type=var_type, name=var_name, 293 | expr=ir.DelayedRef(delay=delay, ref=fifo_r))) 294 | expr = ir.Var(name=var_name, idx=[]) 295 | expr.haoda_type = var_type 296 | else: 297 | expr = fifo_r # pylint: disable=undefined-loop-variable 298 | elif isinstance(src_node, ComputeNode): 299 | def replace_refs_callback(obj, args): 300 | if isinstance(obj, ir.Ref): 301 | _logger.debug('replace %s with %s', obj, 302 | # pylint: disable=cell-var-from-loop 303 | src_node.fifo_map[obj.name][obj.idx]) 304 | # pylint: disable=cell-var-from-loop 305 | return src_node.fifo_map[obj.name][obj.idx] 306 | return obj 307 | _logger.debug('lets: %s', src_node.tensor.lets) 308 | lets = [_.visit(replace_refs_callback) for _ in src_node.tensor.lets] 309 | _logger.debug('replaced lets: %s', lets) 310 | _logger.debug('expr: %s', src_node.tensor.expr) 311 | expr = src_node.tensor.expr.visit(replace_refs_callback) 312 | _logger.debug('replaced expr: %s', expr) 313 | # TODO: build an index somewhere 314 | if isinstance(dst_node, SuperSinkNode): 315 | for stmt in stencil.output_stmts: 316 | if stmt.name == src_node.tensor.name: 317 | break 318 | else: 319 | raise util.InternalError('cannot find tensor %s' % 320 | src_node.tensor.name) 321 | dram_ref = ir.DRAMRef(haoda_type=src_node.tensor.haoda_type, 322 | # pylint: disable=undefined-loop-variable 323 | dram=stmt.dram, var=src_node.tensor.name, 324 | offset=src_node.pe_id) 325 | dst_node.lets.append(ir.Let( 326 | haoda_type=None, name=dram_ref, expr=fifo)) 327 | else: 328 | raise util.InternalError('unexpected node of type %s' % type(src_node)) 329 | 330 | src_node.exprs[fifo] = expr 331 | src_node.lets.extend(_ for _ in lets if _ not in src_node.lets) 332 | _logger.debug('fifo [%d]: %s%s => %s', fifo.depth, color_id(src_node), 333 | '' if fifo.write_lat is None else ' ~%d' % fifo.write_lat, 334 | color_id(dst_node)) 335 | 336 | for src_node in super_source.tpo_node_gen(): 337 | for dst_node in src_node.children: 338 | src_node.fifo(dst_node).depth += super_source.get_extra_depth( 339 | (src_node, dst_node)) 340 | 341 | for src_node, dst_node in super_source.bfs_edge_gen(): 342 | fifo = src_node.fifo(dst_node) 343 | _logger.debug('fifo [%d]: %s%s => %s', fifo.depth, color_id(src_node), 344 | '' if fifo.write_lat is None else ' ~%d' % fifo.write_lat, 345 | color_id(dst_node)) 346 | return super_source 347 | -------------------------------------------------------------------------------- /src/soda/grammar.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from haoda import ir 4 | from soda import util 5 | 6 | _logger = logging.getLogger().getChild(__name__) 7 | 8 | GRAMMAR = r''' 9 | SodaProgram: 10 | ( 11 | ('burst' 'width' ':' burst_width=INT) 12 | ('iterate' ':' iterate=INT) 13 | ('kernel' ':' app_name=ID) 14 | ('unroll' 'factor' ':' unroll_factor=INT) 15 | (input_stmts=InputStmt)+ 16 | (param_stmts=ParamStmt)* 17 | (local_stmts=LocalStmt)* 18 | (output_stmts=OutputStmt)+ 19 | )#; 20 | 21 | YesOrNo: 'yes'|'no'; 22 | 23 | Comment: /\s*#.*$/; 24 | 25 | FuncName: 'cos'|'sin'|'tan'|'acos'|'asin'|'atan'|'atan2'| 26 | 'cosh'|'sinh'|'tanh'|'acosh'|'asinh'|'atanh'| 27 | 'exp'|'frexp'|'ldexp'|'log'|'log10'|'modf'|'exp2'|'expm1'|'ilogb'|'log1p'|'log2'|'logb'|'scalbn'|'scalbln'| 28 | 'pow'|'sqrt'|'cbrt'|'hypot'| 29 | 'erf'|'erfc'|'tgamma'|'lgamma'| 30 | 'ceil'|'floor'|'fmod'|'trunc'|'round'|'lround'|'llround'|'rint'|'lrint'|'llrint'|'nearbyint'|'remainder'|'remquo'| 31 | 'copysign'|'nan'|'nextafter'|'nexttoward'|'fdim'|'fmax'|'fmin'|'fabs'|'abs'|'fma'| 32 | 'min'|'max'|'select'; 33 | 34 | InputStmt: 'input' ('dram' dram=INT ('.' dram=INT)*)? haoda_type=Type ':' name=ID ('(' (tile_size=INT ',')* '*' ')')?; 35 | LocalStmt: 'local' haoda_type=Type ':' (let=Let)* ref=Ref '=' expr=Expr; 36 | OutputStmt: 'output' ('dram' dram=INT ('.' dram=INT)*)? haoda_type=Type ':' (let=Let)* ref=Ref '=' expr=Expr; 37 | 38 | ParamStmt: 'param' ('dram' dram=INT ('.' dram=INT)*)? haoda_type=Type (',' attr=ParamAttr)* ':' name=ID ('[' size=INT ']')*; 39 | ParamAttr: 'dup' dup=Int | partitioning=Partitioning; 40 | Partitioning: 41 | 'partition' strategy='complete' ('dim' '=' dim=Int)? | 42 | 'partition' strategy='cyclic' 'factor' '=' factor=Int ('dim' '=' dim=Int)?; 43 | ''' + ir.GRAMMAR 44 | 45 | class InputStmt(ir.Node): 46 | """Node for input statement, represents a tiled input tensor. 47 | 48 | Attributes: 49 | haoda_type: Type of this input tensor. 50 | dram: [int], dram id used to read this input 51 | name: str, name of this input tensor. 52 | tile_size: list of tile sizes. The last dimension should be 0. 53 | """ 54 | SCALAR_ATTRS = 'haoda_type', 'name' 55 | LINEAR_ATTRS = ('tile_size', 'dram',) 56 | def __init__(self, **kwargs): 57 | super().__init__(**kwargs) 58 | # pylint: disable=access-member-before-definition 59 | if not self.dram: 60 | self.dram = (0,) 61 | self.tile_size += (0,) 62 | 63 | def __str__(self): 64 | result = 'input {}: {}'.format(self.haoda_type, self.name) 65 | if self.tile_size[:-1]: 66 | result += '({}, *)'.format(', '.join(map(str, self.tile_size[:-1]))) 67 | return result 68 | 69 | class LocalStmtOrOutputStmt(ir.Node): 70 | SCALAR_ATTRS = 'haoda_type', 'ref', 'expr' 71 | LINEAR_ATTRS = ('let',) 72 | def __init__(self, **kwargs): 73 | super().__init__(**kwargs) 74 | var_types = {} 75 | # pylint: disable=access-member-before-definition 76 | for let in self.let: 77 | var_types[let.name] = let.haoda_type 78 | def set_var_type(obj, var_types): 79 | if isinstance(obj, ir.Var) and obj.name in var_types: 80 | obj.haoda_type = var_types[obj.name] 81 | return obj 82 | self.let = tuple(_.visit(set_var_type, var_types) for _ in self.let) 83 | self.expr = self.expr.visit(set_var_type, var_types) 84 | 85 | @property 86 | def name(self): 87 | return self.ref.name 88 | 89 | def __str__(self): 90 | if self.let: 91 | let = '\n {}\n '.format('\n '.join(map(str, self.let))) 92 | else: 93 | let = '' 94 | return '{} {}:{} {} = {}'.format(type(self).__name__[:-4].lower(), 95 | self.haoda_type, let, self.ref, 96 | ir.unparenthesize(self.expr)) 97 | 98 | class LocalStmt(LocalStmtOrOutputStmt): 99 | pass 100 | 101 | class OutputStmt(LocalStmtOrOutputStmt): 102 | LINEAR_ATTRS = LocalStmtOrOutputStmt.LINEAR_ATTRS + ('dram',) 103 | def __init__(self, **kwargs): 104 | super().__init__(**kwargs) 105 | # pylint: disable=access-member-before-definition 106 | if not self.dram: 107 | self.dram = (0,) 108 | 109 | class ParamStmt(ir.Node): 110 | SCALAR_ATTRS = 'haoda_type', 'attr', 'name', 'size' 111 | LINEAR_ATTRS = ('dram',) 112 | def __str__(self): 113 | return 'param {}{}: {}{}'.format( 114 | self.haoda_type, ''.join(map(', {}'.format, self.attr)), 115 | self.name, ''.join(map('[{}]'.format, self.size))) 116 | 117 | class ParamAttr(ir.Node): 118 | SCALAR_ATTRS = 'dup', 'partitioning' 119 | def __str__(self): 120 | if self.dup is not None: 121 | return 'dup {}'.format(self.dup) 122 | result = 'partition {0.strategy}'.format(self.partitioning) 123 | if self.partitioning.strategy == 'cyclic': 124 | result += ' factor={}'.format(self.partitioning.factor) 125 | if self.partitioning.dim is not None: 126 | result += ' dim={}'.format(self.partitioning.dim) 127 | return result 128 | 129 | class SodaProgram(ir.Node): 130 | SCALAR_ATTRS = ('burst_width', 'iterate', 'app_name', 'unroll_factor', 131 | 'input_stmts', 'param_stmts', 'local_stmts', 'output_stmts') 132 | def __init__(self, **kwargs): 133 | super().__init__(**kwargs) 134 | for node in self.input_stmts: 135 | if hasattr(self, 'tile_size'): 136 | # pylint: disable=access-member-before-definition 137 | if self.tile_size != node.tile_size: 138 | msg = ('tile size %s doesn\'t match previous one %s' % 139 | # pylint: disable=access-member-before-definition 140 | (node.tile_size, self.tile_size)) 141 | raise util.SemanticError(msg) 142 | elif node.tile_size[:-1]: 143 | self.tile_size = node.tile_size 144 | self.dim = len(self.tile_size) 145 | # deal with 1D case 146 | if not hasattr(self, 'tile_size'): 147 | # pylint: disable=undefined-loop-variable 148 | self.tile_size = node.tile_size 149 | self.dim = len(self.tile_size) 150 | 151 | def __str__(self): 152 | return '\n'.join(filter(None, ( 153 | 'burst width: {}'.format(self.burst_width), 154 | 'iterate: {}'.format(self.iterate), 155 | 'kernel: {}'.format(self.app_name), 156 | 'unroll factor: {}'.format(self.unroll_factor), 157 | '\n'.join(map(str, self.input_stmts)), 158 | '\n'.join(map(str, self.param_stmts)), 159 | '\n'.join(map(str, self.local_stmts)), 160 | '\n'.join(map(str, self.output_stmts))))) 161 | 162 | CLASSES = ( 163 | InputStmt, 164 | LocalStmt, 165 | OutputStmt, 166 | ir.Let, 167 | ir.Ref, 168 | ir.Expr, 169 | ir.LogicAnd, 170 | ir.BinaryOr, 171 | ir.Xor, 172 | ir.BinaryAnd, 173 | ir.EqCmp, 174 | ir.LtCmp, 175 | ir.AddSub, 176 | ir.MulDiv, 177 | ir.Unary, 178 | ir.Operand, 179 | ir.Cast, 180 | ir.Call, 181 | ir.Var, 182 | ParamStmt, 183 | ParamAttr, 184 | SodaProgram, 185 | ) 186 | -------------------------------------------------------------------------------- /src/soda/mutator.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Set, Tuple 2 | import collections 3 | import logging 4 | import operator 5 | import types 6 | 7 | from haoda import ir 8 | from soda import core 9 | from soda import visitor as soda_visitor 10 | 11 | _logger = logging.getLogger().getChild(__name__) 12 | 13 | def shift(obj, offset, excluded=(), op=operator.sub, verbose=False): 14 | """Shift soda.ir.Ref with the given offset. 15 | 16 | All soda.ir.Ref, excluding the given names, will be shifted with the 17 | given offset using the given operator. The operator will be applied pointwise 18 | on the original index and the given offset. 19 | 20 | Args: 21 | obj: A haoda.ir.Node or a soda.core.Tensor object. 22 | offset: Second operand given to the operator. 23 | excluded: Sequence of names to be excluded from the mutation. Default to (). 24 | op: Shifting operator. Should be either add or sub. Default to sub. 25 | verbose: Whether to log shiftings. Default to False. 26 | Returns: 27 | Mutated obj. If obj is an IR node, it will be a different object than the 28 | input. If obj is a tensor, it will be the same object but with fields 29 | mutated. 30 | """ 31 | if op not in (operator.add, operator.sub): 32 | _logger.warn('shifting with neither + nor -, which most likely is an error') 33 | def visitor(obj, args): 34 | if isinstance(obj, ir.Ref): 35 | if obj.name not in excluded: 36 | new_idx = tuple(op(a, b) for a, b in zip(obj.idx, offset)) 37 | if verbose: 38 | _logger.debug('reference %s(%s) shifted to %s(%s)', 39 | obj.name, ', '.join(map(str, obj.idx)), 40 | obj.name, ', '.join(map(str, new_idx))) 41 | obj.idx = new_idx 42 | if isinstance(obj, ir.Node): 43 | return obj.visit(visitor) 44 | if isinstance(obj, core.Tensor): 45 | obj.mutate(visitor) 46 | else: 47 | raise TypeError('argument is not an IR node or a tensor') 48 | return obj 49 | 50 | def normalize(obj): 51 | """Make the least access index 0. 52 | 53 | Works on an ir.Node or an iterable of ir.Nodes. If it is shifted, a different 54 | object is constructed and returned. Otherwise, obj will be returned as-is. 55 | 56 | Args: 57 | obj: A node or an iterable of nodes. 58 | Returns: 59 | Normalized node or iterable. 60 | Raises: 61 | TypeError: If argument is not an ir.Node or an iterable of ir.Nodes. 62 | """ 63 | if isinstance(obj, types.GeneratorType): 64 | return normalize(tuple(obj)) 65 | norm_idx = soda_visitor.get_normalize_index(obj) 66 | shifter = lambda x: shift(x, norm_idx) if any(norm_idx) else x 67 | if isinstance(obj, collections.Iterable): 68 | return type(obj)(map(shifter, obj)) 69 | if isinstance(obj, ir.Node): 70 | return shifter(obj) 71 | raise TypeError('argument is not an ir.Node or an iterable of ir.Nodes') 72 | -------------------------------------------------------------------------------- /src/soda/util.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import operator 3 | 4 | def serialize(vec, tile_size): 5 | return sum((vec[i]*functools.reduce(operator.mul, tile_size[:i]) 6 | for i in range(1, len(tile_size))), 7 | vec[0]) 8 | 9 | def serialize_iter(iterative, tile_size): 10 | return [serialize(x, tile_size) for x in iterative] 11 | 12 | def deserialize(offset, tile_size): 13 | return tuple(deserialize_generator(offset, tile_size)) 14 | 15 | def deserialize_generator(offset, tile_size): 16 | for size in tile_size[:-1]: 17 | yield offset % size 18 | offset = offset // size 19 | yield offset 20 | -------------------------------------------------------------------------------- /src/soda/visitor.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from haoda import ir 4 | from soda import core 5 | 6 | def get_load_tuple(obj): 7 | """Get all load references as a tuple. 8 | 9 | Args: 10 | obj: A haoda.ir.Node object or a soda.core.Tensor object. 11 | 12 | Returns: 13 | A tuple of all the load references. 14 | 15 | Raises: 16 | TypeError: If obj is not an IR node or a Tensor. 17 | """ 18 | def visitor(obj, loads): 19 | if isinstance(obj, ir.Ref): 20 | loads.append(obj) 21 | return obj 22 | loads = [] 23 | if isinstance(obj, ir.Node): 24 | obj.visit(visitor, loads) 25 | elif isinstance(obj, core.Tensor): 26 | obj.visit_loads(visitor, loads) 27 | else: 28 | raise TypeError('argument is not an IR node or a Tensor') 29 | return tuple(loads) 30 | 31 | def get_load_set(obj): 32 | """Get all unique load references as a tuple. 33 | 34 | Args: 35 | obj: A haoda.ir.Node object. 36 | 37 | Returns: 38 | A tuple of all unique loads. 39 | 40 | Raises: 41 | TypeError: If obj is not an IR node. 42 | """ 43 | def visitor(obj, loads): 44 | if isinstance(obj, ir.Ref): 45 | loads[obj] = None 46 | return obj 47 | loads = collections.OrderedDict() 48 | if isinstance(obj, ir.Node): 49 | obj.visit(visitor, loads) 50 | else: 51 | raise TypeError('argument is not an IR node or a Tensor') 52 | return tuple(loads) 53 | 54 | def get_load_dict(obj): 55 | """Get all load references as a dict mapping names to lists of loads. 56 | 57 | Args: 58 | obj: A soda.core.Tensor object. 59 | 60 | Returns: 61 | A dict mapping accessed tensor names to the corresponding lists of loads. 62 | 63 | Raises: 64 | TypeError: If obj is not a Tensor. 65 | """ 66 | def visitor(obj, loads): 67 | if isinstance(obj, ir.Ref): 68 | loads.setdefault(obj.name, []).append(obj) 69 | return obj 70 | loads = collections.OrderedDict() 71 | if isinstance(obj, core.Tensor): 72 | obj.visit_loads(visitor, loads) 73 | else: 74 | raise TypeError('argument is not a Tensor') 75 | return loads 76 | 77 | def get_normalize_index(obj) -> tuple: 78 | """Get the normalize index that will make the least access index 0. 79 | 80 | Args: 81 | obj: A node or an iterable of nodes. 82 | Returns: 83 | Normalize index as a tuple. 84 | Raises: 85 | TypeError: If argument is not an ir.Node or an iterable of ir.Nodes. 86 | """ 87 | if not isinstance(obj, (collections.Iterable, ir.Node)): 88 | raise TypeError('argument is not an ir.Node or an iterable of ir.Nodes') 89 | if isinstance(obj, ir.Node): 90 | obj = (obj,) 91 | try: 92 | return min(sum(map(get_load_tuple, obj), ()), 93 | key=lambda load: tuple(reversed(load.idx))).idx 94 | except ValueError as e: 95 | if str(e) == 'min() arg is an empty sequence': 96 | return () 97 | raise e 98 | -------------------------------------------------------------------------------- /src/sodac: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import argparse 3 | import logging 4 | import os 5 | import sys 6 | 7 | import textx 8 | 9 | from haoda import util 10 | from soda import core 11 | from soda import grammar 12 | from soda.codegen.xilinx import opencl as xocl 13 | 14 | logging.basicConfig(level=logging.WARNING, 15 | format='%(levelname)s:%(name)s:%(lineno)d: %(message)s') 16 | logger = logging.getLogger().getChild(os.path.basename(__file__)) 17 | 18 | def main(): 19 | parser = argparse.ArgumentParser( 20 | prog='sodac', 21 | description='Stencil with Optimized Dataflow Architecture ' 22 | '(SODA) compiler') 23 | parser.add_argument('--verbose', '-v', 24 | action='count', 25 | dest='verbose', 26 | help='increase verbosity') 27 | parser.add_argument('--quiet', '-q', 28 | action='count', 29 | dest='quiet', 30 | help='decrease verbosity') 31 | parser.add_argument('--burst-width', 32 | type=int, 33 | dest='burst_width', 34 | help='override burst width') 35 | parser.add_argument('--unroll-factor', 36 | type=int, 37 | metavar='UNROLL_FACTOR', 38 | dest='unroll_factor', 39 | help='override unroll factor') 40 | parser.add_argument('--tile-size', 41 | type=int, 42 | nargs='+', 43 | metavar='TILE_SIZE', 44 | dest='tile_size', 45 | help='override tile size; ' 46 | '0 means no overriding on that dimension') 47 | parser.add_argument('--dram-in', 48 | type=str, 49 | dest='dram_in', 50 | help='override DRAM configuration for input') 51 | parser.add_argument('--dram-out', 52 | type=str, 53 | dest='dram_out', 54 | help='override DRAM configuration for output') 55 | parser.add_argument('--iterate', 56 | type=int, 57 | metavar='#ITERATION', 58 | dest='iterate', 59 | help='override iterate directive; ' 60 | 'repeat execution multiple times iteratively') 61 | parser.add_argument(type=str, 62 | dest='soda_src', 63 | metavar='file', 64 | help='soda source code') 65 | 66 | xocl.add_arguments(parser.add_argument_group('Xilinx OpenCL backend')) 67 | 68 | args = parser.parse_args() 69 | verbose = 0 if args.verbose is None else args.verbose 70 | quiet = 0 if args.quiet is None else args.quiet 71 | logging_level = (quiet-verbose)*10+logging.getLogger().getEffectiveLevel() 72 | if logging_level > logging.CRITICAL: 73 | logging_level = logging.CRITICAL 74 | if logging_level < logging.DEBUG: 75 | logging_level = logging.DEBUG 76 | logging.getLogger().setLevel(logging_level) 77 | logger.info('set log level to %s', logging.getLevelName(logging_level)) 78 | # TODO: check tile size 79 | 80 | soda_mm = textx.metamodel_from_str(grammar.GRAMMAR, classes=grammar.CLASSES) 81 | logger.info('build metamodel') 82 | try: 83 | if args.soda_src == '-': 84 | soda_file_name = sys.stdin.name 85 | soda_model = soda_mm.model_from_str(sys.stdin.read()) 86 | else: 87 | with open(args.soda_src, 'r') as soda_file: 88 | soda_model = soda_mm.model_from_str(soda_file.read()) 89 | soda_file_name = soda_file.name 90 | logger.info('%s parsed as soda file', soda_file_name) 91 | logger.debug('soda program parsed:\n %s', 92 | str(soda_model).replace('\n', '\n ')) 93 | 94 | tile_size = [] 95 | for dim in range(soda_model.dim-1): 96 | if (args.tile_size is not None and 97 | dim < len(args.tile_size) and 98 | args.tile_size[dim] > 0): 99 | tile_size.append(args.tile_size[dim]) 100 | else: 101 | tile_size.append(soda_model.tile_size[dim]) 102 | tile_size.append(0) 103 | 104 | if args.unroll_factor is not None: 105 | unroll_factor = args.unroll_factor 106 | else: 107 | unroll_factor = soda_model.unroll_factor 108 | 109 | stencil = core.Stencil( 110 | burst_width=args.burst_width if args.burst_width is not None 111 | else soda_model.burst_width, 112 | iterate=args.iterate if args.iterate is not None 113 | else soda_model.iterate, 114 | dram_in=args.dram_in, 115 | dram_out=args.dram_out, 116 | app_name=soda_model.app_name, 117 | input_stmts=soda_model.input_stmts, 118 | param_stmts=soda_model.param_stmts, 119 | local_stmts=soda_model.local_stmts, 120 | output_stmts=soda_model.output_stmts, 121 | dim=soda_model.dim, 122 | tile_size=tile_size, 123 | unroll_factor=unroll_factor) 124 | 125 | logger.debug('stencil obtained: %s', stencil) 126 | 127 | xocl.print_code(stencil, args) 128 | 129 | except textx.exceptions.TextXSyntaxError as e: 130 | logger.error(e) 131 | sys.exit(1) 132 | except util.SemanticError as e: 133 | logger.error(e) 134 | sys.exit(1) 135 | except util.SemanticWarn as w: 136 | logger.warning(w) 137 | 138 | if __name__ == '__main__': 139 | main() 140 | -------------------------------------------------------------------------------- /tests/src/blur.soda: -------------------------------------------------------------------------------- 1 | kernel: blur 2 | burst width: 512 3 | unroll factor: 16 4 | input uint16: input(2000, *) 5 | local uint16: 6 | blur_x(0, 0) = (input(0, 0) + input(0, 1) + input(0, 2)) / 3 7 | output uint16: 8 | blur_y(0, 0) = (blur_x(0, 0) + blur_x(1, 0) + blur_x(2, 0)) / 3 9 | iterate: 1 10 | -------------------------------------------------------------------------------- /tests/src/denoise2d.soda: -------------------------------------------------------------------------------- 1 | kernel: denoise2d 2 | burst width: 512 3 | unroll factor: 4 4 | iterate: 1 5 | 6 | input float: f 7 | input float: u(32, *) 8 | local float: diff_u(0, 0) = u(0, 0) - u( 0, -1) 9 | local float: diff_d(0, 0) = u(0, 0) - u( 0, 1) 10 | local float: diff_l(0, 0) = u(0, 0) - u(-1, 0) 11 | local float: diff_r(0, 0) = u(0, 0) - u( 1, 0) 12 | local float: g(0, 0) = 1.0f / sqrt(1.0f+ 13 | diff_u(0, 0) * diff_u(0, 0)+ 14 | diff_d(0, 0) * diff_d(0, 0)+ 15 | diff_l(0, 0) * diff_l(0, 0)+ 16 | diff_r(0, 0) * diff_r(0, 0)) 17 | local float: r0(0, 0) = u(0, 0) * f(0, 0) * 4.9f 18 | local float: 19 | r1(0, 0) = (r0(0, 0) * (2.5f + r0(0, 0) * (10.2f + r0(0, 0))))* 20 | (4.3f + r0(0, 0) * (5.4f + r0(0, 0) * ( 6.3f + r0(0, 0)))) 21 | output float: 22 | output(0, 0) = (u(0, 0) + 7.7f * 23 | (u( 0, 1) * g( 0, 1) + 24 | u( 0, -1) * g( 0, -1) + 25 | u(-1, 0) * g(-1, 0) + 26 | u( 1, 0) * g( 1, 0) + 27 | 5.7f * f(0, 0) * r1(0, 0))) * (11.1f + 7.7f * 28 | (g( 0, 1) + 29 | g( 0, -1) + 30 | g(-1, 0) + 31 | g( 1, 0) + 5.7f)) 32 | -------------------------------------------------------------------------------- /tests/src/denoise3d.soda: -------------------------------------------------------------------------------- 1 | kernel: denoise3d 2 | burst width: 512 3 | unroll factor: 2 4 | iterate: 1 5 | 6 | input float: f 7 | input float: u(32, 32, *) 8 | local float: diff_u(0, 0, 0) = u(0, 0, 0) - u( 0, -1, 0) 9 | local float: diff_d(0, 0, 0) = u(0, 0, 0) - u( 0, 1, 0) 10 | local float: diff_l(0, 0, 0) = u(0, 0, 0) - u(-1, 0, 0) 11 | local float: diff_r(0, 0, 0) = u(0, 0, 0) - u( 1, 0, 0) 12 | local float: diff_i(0, 0, 0) = u(0, 0, 0) - u( 0, 0, -1) 13 | local float: diff_o(0, 0, 0) = u(0, 0, 0) - u( 0, 0, 1) 14 | local float: g(0, 0, 0) = 1.0f / sqrt(0.00005f+ 15 | diff_u(0, 0, 0)*diff_u(0, 0, 0)+ 16 | diff_d(0, 0, 0)*diff_d(0, 0, 0)+ 17 | diff_l(0, 0, 0)*diff_l(0, 0, 0)+ 18 | diff_r(0, 0, 0)*diff_r(0, 0, 0)+ 19 | diff_i(0, 0, 0)*diff_i(0, 0, 0)+ 20 | diff_o(0, 0, 0)*diff_o(0, 0, 0)) 21 | local float: r0(0, 0, 0) = u(0, 0, 0) * f(0, 0, 0) * (1.0f/0.03f) 22 | local float: 23 | r1(0, 0, 0) = (r0(0, 0, 0) * (2.38944f + r0(0, 0, 0) * (0.950037f + r0(0, 0, 0)))) / (4.65314f + r0(0, 0, 0) * (2.57541f + r0(0, 0, 0) * (1.48937f + r0(0, 0, 0)))) 24 | output float: 25 | output(0, 0, 0) = (u(0, 0, 0) + 5.0f * (u(1, 0, 0) * g(1, 0, 0) + u(-1, 0, 0) * g(-1, 0, 0) + u(0, 1, 0) * g(0, 1, 0) + u(0, -1, 0) * g(0, -1, 0) + u(0, 0, 1) * g(0, 0, 1) + u(0, 0, -1) * g(0, 0, -1) + (1.0f/0.03f) * f(0, 0, 0)*r1(0, 0, 0)) ) / (1.0f + 5.0f*(g(1, 0, 0) + g(-1, 0, 0) + g(0, 1, 0) + g(0, -1, 0) + g(0, 0, 1) + g(0, 0, -1) + (1.0f/0.03f))) 26 | -------------------------------------------------------------------------------- /tests/src/heat3d.soda: -------------------------------------------------------------------------------- 1 | kernel: heat3d 2 | burst width: 512 3 | unroll factor: 2 4 | input float: in(32, 32, *) 5 | output float: out(0, 0, 0) = 6 | .125f * (in(1, 0, 0) - 2.f * in(0, 0, 0) + in(-1, 0, 0)) + 7 | .125f * (in(0, 1, 0) - 2.f * in(0, 0, 0) + in( 0, -1, 0)) + 8 | .125f * (in(0, 0, 1) - 2.f * in(0, 0, 0) + in( 0, 0, -1)) + 9 | in(0, 0, 0) 10 | iterate: 2 11 | -------------------------------------------------------------------------------- /tests/src/jacobi2d.soda: -------------------------------------------------------------------------------- 1 | kernel: jacobi2d 2 | burst width: 512 3 | unroll factor: 2 4 | input float: t1(32, *) 5 | output float: t0(0, 0) = ( 6 | t1( 0, 1) + t1( 1, 0) + t1( 0, 0) + t1( 0, -1) + t1(-1, 0)) * 0.2f 7 | iterate: 2 8 | -------------------------------------------------------------------------------- /tests/src/jacobi3d.soda: -------------------------------------------------------------------------------- 1 | kernel: jacobi3d 2 | burst width: 512 3 | unroll factor: 2 4 | input float: t1(32, 32, *) 5 | output float: t0(0, 0, 0) = (t1(0, 0, 0) 6 | + t1(1, 0, 0) + t1(-1, 0, 0) 7 | + t1(0, 1, 0) + t1( 0, -1, 0) 8 | + t1(0, 0, 1) + t1( 0, 0, -1) 9 | ) * 0.142857142f 10 | iterate: 2 11 | -------------------------------------------------------------------------------- /tests/src/seidel2d.soda: -------------------------------------------------------------------------------- 1 | kernel: seidel2d 2 | burst width: 512 3 | unroll factor: 2 4 | input float: input(32, *) 5 | local float: 6 | tmp(0, 0) = (input(-1, 0) + input( 0, 0) + input( 1, 0)) * .3333333f 7 | output float: 8 | output(0, 0) = (tmp(0, -1) + tmp(0, 0) + tmp(0, 1)) * .3333333f 9 | iterate: 2 10 | -------------------------------------------------------------------------------- /tests/src/sobel2d.soda: -------------------------------------------------------------------------------- 1 | kernel: sobel2d 2 | burst width: 512 3 | unroll factor: 2 4 | input uint16: img(32, *) 5 | local uint16: mag_x(0, 0) = 6 | (img(1, -1) - img(-1, -1)) + 7 | (img(1, 0) - img(-1, 0)) * 3 + 8 | (img(1, 1) - img(-1, 1)) 9 | local uint16: mag_y(0, 0) = 10 | (img(-1, 1) - img(-1, -1)) + 11 | (img( 0, 1) - img( 0, -1)) * 3 + 12 | (img( 1, 1) - img( 1, -1)) 13 | output uint16: 14 | mag(0, 0) = 65535 - (mag_x(0, 0) * mag_x(0, 0) + mag_y(0, 0) * mag_y(0, 0)) 15 | iterate: 1 16 | -------------------------------------------------------------------------------- /tests/test-compilation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eo pipefail 3 | base_dir="$(dirname $0)/.." 4 | include_path="$(vivado_hls -l /dev/null -root_dir|tail -n2|head -n1)/include" 5 | for file in "${base_dir}/tests/src/"*.soda 6 | do 7 | echo -n "Compiling $(basename "${file}") ..." 8 | "${base_dir}/src/sodac" "${file}" --xocl-kernel - $@ \ 9 | | g++ -x c++ -std=c++11 -fsyntax-only "-I${include_path}" -c - 10 | echo " PASS" 11 | done 12 | --------------------------------------------------------------------------------