├── .gitignore ├── .pylintrc ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.rst ├── nn_dataflow ├── __init__.py ├── core │ ├── __init__.py │ ├── buf_shr_scheme.py │ ├── cost.py │ ├── data_category_enum.py │ ├── data_dim_loops.py │ ├── data_layout.py │ ├── fmap_range.py │ ├── int_range.py │ ├── inter_layer_pipeline.py │ ├── layer.py │ ├── loop_blocking.py │ ├── loop_blocking_scheme.py │ ├── loop_blocking_solver.py │ ├── loop_enum.py │ ├── map_strategy.py │ ├── mem_hier_enum.py │ ├── nested_loop_desc.py │ ├── network.py │ ├── nn_dataflow.py │ ├── nn_dataflow_scheme.py │ ├── node_region.py │ ├── option.py │ ├── parallel_enum.py │ ├── partition.py │ ├── partition_scheme.py │ ├── phy_dim2.py │ ├── pipeline_segment.py │ ├── pipeline_segment_timing.py │ ├── resource.py │ ├── scheduling.py │ └── scheduling_constraint.py ├── nns │ ├── __init__.py │ ├── alex_net.py │ ├── googlenet.py │ ├── lstm_gnmt.py │ ├── lstm_phoneme.py │ ├── lstm_showtell.py │ ├── mlp_l.py │ ├── mlp_m.py │ ├── mlp_s.py │ ├── resnet152.py │ ├── resnet50.py │ ├── vgg19_net.py │ ├── vgg_net.py │ └── zfnet.py ├── tests │ ├── __init__.py │ ├── dataflow_test │ │ ├── __init__.py │ │ ├── test_nn_dataflow.py │ │ └── test_scheduling.py │ ├── loop_blocking_test │ │ ├── __init__.py │ │ ├── test_loop_blocking.py │ │ ├── test_loop_blocking_fixture.py │ │ ├── test_loop_blocking_partition.py │ │ ├── test_loop_blocking_scheme.py │ │ └── test_loop_blocking_solver.py │ ├── map_strategy_test │ │ ├── __init__.py │ │ ├── test_map_strategy.py │ │ ├── test_map_strategy_eyeriss.py │ │ └── test_map_strategy_fixture.py │ ├── nns_test │ │ ├── __init__.py │ │ └── test_nns.py │ ├── partition_test │ │ ├── __init__.py │ │ ├── test_gen_partition.py │ │ ├── test_partition_fixture.py │ │ ├── test_proc_data_range.py │ │ └── test_unit_nhops_to_proc_region.py │ ├── pipeline_test │ │ ├── __init__.py │ │ ├── test_inter_layer_pipeline.py │ │ ├── test_pipeline_fixture.py │ │ ├── test_pipeline_segment.py │ │ └── test_pipeline_segment_timing.py │ ├── tool_test │ │ ├── __init__.py │ │ ├── test_nn_dataflow_search.py │ │ └── test_nn_layer_stats.py │ └── unit_test │ │ ├── __init__.py │ │ ├── test_buf_shr_scheme.py │ │ ├── test_cost.py │ │ ├── test_data_dim_loops.py │ │ ├── test_data_layout.py │ │ ├── test_fmap_range.py │ │ ├── test_int_range.py │ │ ├── test_layer.py │ │ ├── test_nested_loop_desc.py │ │ ├── test_network.py │ │ ├── test_nn_dataflow_scheme.py │ │ ├── test_node_region.py │ │ ├── test_option.py │ │ ├── test_partition_scheme.py │ │ ├── test_phy_dim2.py │ │ ├── test_resource.py │ │ ├── test_scheduling_condition.py │ │ ├── test_scheduling_constraint.py │ │ ├── test_scheduling_result.py │ │ ├── test_util.py │ │ └── test_version.py ├── tools │ ├── __init__.py │ ├── nn_dataflow_search.py │ └── nn_layer_stats.py ├── util.py └── version.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *$py.class 3 | 4 | eggs/ 5 | .eggs/ 6 | *.egg-info/ 7 | *.egg 8 | 9 | # Editor related. 10 | settings.json -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | # pylint configuration file 2 | 3 | [MASTER] 4 | extension-pkg-whitelist=numpy 5 | 6 | [MESSAGES CONTROL] 7 | disable= 8 | I0011, # locally-disabled, 9 | C0305, # trailing-newlines, 10 | C0325, # superfluous-parens, 11 | C0415, # import-outside-toplevel, 12 | W0105, # pointless-string-statement, 13 | W0141, # bad-builtin, 14 | 15 | [BASIC] 16 | # Allow single-char and two-char variable names 17 | variable-rgx=[a-z_][a-z0-9_]*$ 18 | 19 | [DESIGN] 20 | max-args=25 21 | max-attributes=15 22 | max-branches=25 23 | max-locals=50 24 | max-module-lines=2000 25 | max-statements=200 26 | 27 | [SIMILARITIES] 28 | ignore-imports=yes 29 | 30 | [TYPECHECK] 31 | ignored-modules=numpy 32 | ignored-classes=numpy 33 | 34 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 3.6 5 | 6 | install: 7 | - pip install . 8 | - pip install -r requirements.txt 9 | - pip install pylint 10 | - pip install coveralls 11 | 12 | script: 13 | - pytest -n 12 --cov=nn_dataflow 14 | 15 | after_success: 16 | - pylint --disable=R0801 nn_dataflow/ 17 | - coveralls 18 | 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Tsinghua IDEAL, Stanford MAST 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt *.md *.rst 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://travis-ci.org/stanford-mast/nn_dataflow.svg?branch=master 2 | :target: https://travis-ci.org/stanford-mast/nn_dataflow 3 | .. image:: https://coveralls.io/repos/github/stanford-mast/nn_dataflow/badge.svg?branch=master 4 | :target: https://coveralls.io/github/stanford-mast/nn_dataflow?branch=master 5 | 6 | 7 | Neural Network Dataflow Scheduling 8 | ================================== 9 | 10 | This Python tool allows you to explore the energy-efficient dataflow scheduling 11 | for neural networks (NNs), including array mapping, loop blocking and 12 | reordering, and (coarse-grained) parallel processing within and across layers. 13 | 14 | For hardware, we assume an Eyeriss-style NN accelerator [Chen16]_, i.e., a 2D 15 | array of processing elements (PEs) with a local register file in each PE, and a 16 | global SRAM buffer shared by all PEs. We further support a tiled architecture 17 | with multiple nodes that can partition and process the NN computations in 18 | parallel. Each node is an Eyeriss-style engine as above. 19 | 20 | In software, we decouple the dataflow scheduling into three subproblems: 21 | 22 | - Array mapping, which deals with mapping one 2D convolution computation (one 23 | 2D ifmap convolves with one 2D filter to get one 2D ofmap) onto the hardware 24 | PE array. We support row stationary mapping [Chen16]_. 25 | - Loop blocking and reordering, which decides the order between all 2D 26 | convolutions by blocking and reordering the nested loops. We support 27 | exhaustive search over all blocking and reordering schemes [Yang16]_, and 28 | analytical bypass solvers [Gao17]_. 29 | - Parallel processing, which partitions the NN computations across the multiple 30 | tiled engines. We support both intra-layer and inter-layer parallelism. For 31 | intra-layer, we support batch partitioning, fmap partitioning, output 32 | partitioning, input partitioning, and the combination between them (hybrid) 33 | [Gao17]_. We also explore various dataflow optimizations including access 34 | forwarding and buffer sharing [Gao19]_. We use exhaustive search within each 35 | layer. For inter-layer, we support spatial pipelining (inter-layer 36 | pipelining) and temporal pipelining (time multiplexing without writing back 37 | intermediate data) as well as their optimized scheduling [Gao19]_. We use 38 | layer-wise greedy beam search across layers. 39 | 40 | See the details in our ASPLOS'17 [Gao17]_ and ASPLOS'19 [Gao19]_ papers. 41 | 42 | If you use this tool in your work, we kindly request that you reference our 43 | paper(s) below, and send us a citation of your work. 44 | 45 | - Gao et al., "TETRIS: Scalable and Efficient Neural Network Acceleration with 46 | 3D Memory", in ASPLOS, April 2017. 47 | 48 | - Gao et al., "TANGRAM: Optimized Coarse-Grained Dataflow for Scalable NN 49 | Accelerators", in ASPLOS. April 2019. 50 | 51 | 52 | Install 53 | ------- 54 | 55 | ``nn_dataflow`` supports Python 3.6 and above. 56 | 57 | ``nn_dataflow`` can be directly used without installation if you have first 58 | defined the environment variable ``PYTHONPATH`` to include the top directory path. 59 | See the Usage section below for details. 60 | 61 | ``nn_dataflow`` has been registered on `PyPI 62 | `_, so it can be installed through 63 | ``pip`` as:: 64 | 65 | > pip install nn-dataflow 66 | 67 | And ``pip`` will take care of all dependencies. 68 | 69 | To only install ``nn_dataflow`` in local user install directory (without 70 | ``sudo``), and/or to install in editable mode, at the top directory do:: 71 | 72 | > pip install --user -e . 73 | 74 | 75 | Usage 76 | ----- 77 | 78 | First, define the NN structure in ``nn_dataflow/nns``. We already defined 79 | several popular NNs for you, including AlexNet, VGG-16, GoogLeNet, ResNet-152, 80 | etc. 81 | 82 | Then, use ``nn_dataflow/tools/nn_dataflow_search.py`` to search for the optimal 83 | dataflow for the NN. For detailed options, type:: 84 | 85 | > python ./nn_dataflow/tools/nn_dataflow_search.py -h 86 | 87 | You can specify NN batch size and word size, PE array dimensions, number of 88 | tile nodes, register file and global buffer capacity, and the energy cost of 89 | all components. Note that, the energy cost of array bus should be the average 90 | energy of transferring the data from the buffer to one PE, *not* local neighbor 91 | transfer; the unit static energy cost should be the static energy of *all* 92 | nodes in one clock cycle. 93 | 94 | Other options include: 95 | 96 | - ``-g``, ``--goal``: ``E``, ``D``, or ``ED``. the optimization goal, e(nergy), 97 | d(elay), or ED product. 98 | - ``--mem-type``: ``2D`` or ``3D``. With 2D memory, memory channels are only on 99 | the four corners of the chip; with 3D memory, memory channels are on the top 100 | of all tile nodes (one per each). 101 | - ``--bus-width``: the multicast bus bit width in the PE array for one data 102 | type. Set to 0 to ignore multicast overheads. 103 | - ``--dram-bw``: ``float`` or ``inf``. Total DRAM bandwidth for all tile nodes, 104 | in bytes per cycle. 105 | - ``--disable-bypass``: a combination of ``i``, ``o``, ``f``, whether to 106 | disallow global buffer bypass for ifmaps, ofmaps, and weights. 107 | - ``--solve-loopblocking``: whether to use analytical bypass solvers for loop 108 | blocking and reordering. See [Gao17]_. 109 | - ``--hybrid-partitioning``: whether to use hybrid partitioning in [Gao17]_. 110 | If not enabled, use naive partitioning, i.e., fmap partitioning for CONV 111 | layers, and output partitioning for FC layers. 112 | - ``--batch-partitioning`` and ``--ifmap-partitioning``: whether the hybrid 113 | partitioning also explores batch and input partitioning. 114 | - ``--enable-access-forwarding``: access forwarding, where the nodes fetch 115 | disjoint subsets of data and forward them to other nodes. See [Gao19]_. 116 | - ``--enable-gbuf-sharing``: buffer sharing, where the global buffer capacity is 117 | shared across nodes through NoC. See [Gao19]_. 118 | - ``--enable-save-writeback``: allow to elide the intermediate data writeback to 119 | memory when switching between layers if it is possible to store the entire 120 | data set in on-chip buffers. 121 | - ``--interlayer-partition``: whether to use inter-layer pipelining to 122 | partition resources across multiple layers and process them simultaneously. 123 | - ``--layer-pipeline-time-overhead``, ``--layer-pipeline-max-degree``: 124 | constrain the configuration space of inter-layer pipelining, by specifying 125 | the maximum execution time overhead, or the maximum pipelining degree. 126 | - ``--disable-interlayer-opt``: disable optimizations and only allow basic 127 | inter-layer pipelining. 128 | 129 | 130 | Code Structure 131 | -------------- 132 | 133 | - ``nn_dataflow`` 134 | - ``core`` 135 | - Top-level dataflow exploration: ``nn_dataflow``, 136 | ``nn_dataflow_scheme``. 137 | - Layer scheduling: ``scheduling``. 138 | - Array mapping: ``map_strategy``. 139 | - Loop blocking and reordering: ``loop_blocking``, 140 | ``loop_blocking_scheme``, ``loop_blocking_solver``. 141 | - Intra-layer partitioning: ``partition``, ``partition_scheme``, 142 | ``buf_shr_scheme``. 143 | - Inter-layer pipelining: ``inter_layer_pipeline``, 144 | ``pipeline_segment``. 145 | - Network and layer: ``network``, ``layer``. 146 | - ``nns``: example NN definitions. 147 | - ``tests``: unit tests. 148 | - ``tools``: executables. 149 | 150 | 151 | Verification and Testing 152 | ------------------------ 153 | 154 | To verify the tool against the Eyeriss result [Chen16]_, see 155 | ``nn_dataflow/tests/dataflow_test/test_nn_dataflow.py``. 156 | 157 | To run (unit) tests, do one of the following:: 158 | 159 | > python -m unittest discover 160 | 161 | > python -m pytest 162 | 163 | > pytest 164 | 165 | To check code coverage with ``pytest-cov`` plug-in:: 166 | 167 | > pytest --cov=nn_dataflow 168 | 169 | 170 | Copyright & License 171 | ------------------- 172 | 173 | ``nn_dataflow`` is free software; you can redistribute it and/or modify it 174 | under the terms of the `BSD License `__ as published by the Open 175 | Source Initiative, revised version. 176 | 177 | ``nn_dataflow`` was originally written by Mingyu Gao at Stanford University, 178 | and per Stanford University policy, the copyright of this original code remains 179 | with the Board of Trustees of Leland Stanford Junior University. 180 | 181 | 182 | References 183 | ---------- 184 | 185 | .. [Gao19] Gao, Yang, Pu, Horowitz, and Kozyrakis, `TANGRAM: Optimized 186 | Coarse-Grained Dataflow for Scalable NN Accelerators 187 | `__, in ASPLOS. April, 2019. 188 | 189 | .. [Gao17] Gao, Pu, Yang, Horowitz, and Kozyrakis, `TETRIS: Scalable and 190 | Efficient Neural Network Acceleration with 3D Memory 191 | `__, in ASPLOS. April, 2017. 192 | 193 | .. [Chen16] Chen, Emer, and Sze, `Eyeriss: A Spatial Architecture for 194 | Energy-Efficient Dataflow for Convolutional Neural Networks 195 | `__, in ISCA. June, 2016. 196 | 197 | .. [Yang16] Yang, Pu, Rister, Bhagdikar, Richardson, Kvatinsky, 198 | Ragan-Kelley, Pedram, and Horowitz, `A Systematic Approach to Blocking 199 | Convolutional Neural Networks `__, arXiv 200 | preprint, 2016. 201 | 202 | -------------------------------------------------------------------------------- /nn_dataflow/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | __version__ = '2.1' 18 | 19 | -------------------------------------------------------------------------------- /nn_dataflow/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from . import loop_blocking 18 | from . import loop_blocking_solver 19 | from . import partition 20 | from . import data_category_enum as DataCategoryEnum 21 | from . import loop_enum as LoopEnum 22 | from . import mem_hier_enum as MemHierEnum 23 | from . import parallel_enum as ParallelEnum 24 | from .buf_shr_scheme import BufShrScheme 25 | from .cost import Cost 26 | from .data_dim_loops import DataDimLoops 27 | from .data_layout import DataLayout 28 | from .fmap_range import FmapPosition, FmapRange, FmapRangeMap 29 | from .int_range import IntRange 30 | from .inter_layer_pipeline import InterLayerPipeline 31 | from .layer import Layer, InputLayer, ConvLayer, FCLayer, \ 32 | LocalRegionLayer, PoolingLayer, EltwiseLayer 33 | from .loop_blocking_scheme import LoopBlockingScheme 34 | from .map_strategy import MapStrategy, MapStrategyEyeriss 35 | from .nested_loop_desc import NestedLoopDesc 36 | from .network import Network 37 | from .node_region import NodeRegion 38 | from .nn_dataflow_scheme import NNDataflowScheme 39 | from .option import Option 40 | from .partition_scheme import PartitionScheme 41 | from .phy_dim2 import PhyDim2 42 | from .pipeline_segment import PipelineSegment 43 | from .pipeline_segment_timing import PipelineSegmentTiming 44 | from .resource import Resource 45 | from .scheduling import SchedulingCondition, SchedulingResult, Scheduling 46 | from .scheduling_constraint import SchedulingConstraint, \ 47 | SchedulingConstraintLayerPipeline 48 | 49 | from .nn_dataflow import NNDataflow 50 | 51 | -------------------------------------------------------------------------------- /nn_dataflow/core/cost.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | 19 | from . import mem_hier_enum as me 20 | 21 | COST_LIST = ['mac_op', 22 | 'mem_hier', 23 | 'noc_hop', 24 | 'idl_unit', 25 | ] 26 | 27 | class Cost(namedtuple('Cost', COST_LIST)): 28 | ''' 29 | Cost specification, including MAC operation cost, memory hierarchy cost, 30 | NoC hop cost, and idle unit-time cost. 31 | ''' 32 | 33 | def __new__(cls, *args, **kwargs): 34 | ntp = super(Cost, cls).__new__(cls, *args, **kwargs) 35 | 36 | if hasattr(ntp.mac_op, '__len__'): 37 | raise TypeError('Cost: mac_op must be a scalar') 38 | if not isinstance(ntp.mem_hier, tuple): 39 | raise TypeError('Cost: mem_hier must be a tuple') 40 | if len(ntp.mem_hier) != me.NUM: 41 | raise ValueError('Cost: mem_hier must have length {}' 42 | .format(me.NUM)) 43 | if hasattr(ntp.noc_hop, '__len__'): 44 | raise TypeError('Cost: noc_hop must be a scalar') 45 | if hasattr(ntp.idl_unit, '__len__'): 46 | raise TypeError('Cost: idl_unit must be a scalar') 47 | 48 | return ntp 49 | 50 | def mem_hier_at(self, mhe): 51 | ''' 52 | Return cost of memory hierarchy level `mhe`. 53 | ''' 54 | try: 55 | return self.mem_hier[mhe] 56 | except (IndexError, TypeError): 57 | return None 58 | 59 | -------------------------------------------------------------------------------- /nn_dataflow/core/data_category_enum.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | ''' 18 | Enum for data types. 19 | ''' 20 | FIL = 0 21 | IFM = 1 22 | OFM = 2 23 | NUM = 3 24 | 25 | -------------------------------------------------------------------------------- /nn_dataflow/core/data_dim_loops.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from . import loop_enum as le 18 | from .. import util 19 | 20 | class DataDimLoops(util.ContentHashClass): 21 | ''' 22 | A tuple of loops that are the dimensions of the data. 23 | ''' 24 | 25 | def __init__(self, *lpe_list): 26 | for lpe in lpe_list: 27 | if lpe not in range(le.NUM): 28 | raise ValueError('DataDimLoops: arguments must be LoopEnum.') 29 | 30 | self.lpe_tuple = tuple(sorted(set(lpe_list))) 31 | 32 | def loops(self): 33 | ''' 34 | Get the loops that are the dimensions of the data. 35 | ''' 36 | return self.lpe_tuple 37 | 38 | def take(self, lpe_indexed): 39 | ''' 40 | Get the elements in `lpe_indexed` that correspond to the loops of the 41 | data. 42 | ''' 43 | return [lpe_indexed[lpe] for lpe in self.lpe_tuple] 44 | 45 | def drop(self, lpe_indexed): 46 | ''' 47 | Get the elements in `lpe_indexed` that do not correspond to the loops 48 | of the data. 49 | ''' 50 | return [lpe_indexed[lpe] for lpe in range(le.NUM) 51 | if lpe not in self.lpe_tuple] 52 | 53 | def __repr__(self): 54 | return '{}({})'.format( 55 | self.__class__.__name__, 56 | ', '.join([repr(lpe) for lpe in self.lpe_tuple])) 57 | 58 | -------------------------------------------------------------------------------- /nn_dataflow/core/data_layout.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | import itertools 19 | 20 | from .fmap_range import FmapPosition, FmapRange, FmapRangeMap 21 | from .node_region import NodeRegion 22 | from .partition_scheme import PartitionScheme 23 | 24 | DATA_LAYOUT_LIST = ['frngs', 25 | 'regions', 26 | 'parts', 27 | ] 28 | 29 | class DataLayout(namedtuple('DataLayout', DATA_LAYOUT_LIST)): 30 | ''' 31 | The data layout for batched i/ofmap. 32 | ''' 33 | 34 | def __new__(cls, *args, **kwargs): 35 | ntp = super(DataLayout, cls).__new__(cls, *args, **kwargs) 36 | 37 | if not isinstance(ntp.frngs, tuple): 38 | raise TypeError('DataLayout: frngs must be a tuple.') 39 | for fr in ntp.frngs: 40 | if not isinstance(fr, FmapRange): 41 | raise TypeError('DataLayout: elements in frngs must be a ' 42 | 'FmapRange object.') 43 | if not isinstance(ntp.regions, tuple): 44 | raise TypeError('DataLayout: regions must be a tuple.') 45 | for nr in ntp.regions: 46 | if not isinstance(nr, NodeRegion): 47 | raise TypeError('DataLayout: elements in regions must be a ' 48 | 'NodeRegion object.') 49 | if not isinstance(ntp.parts, tuple): 50 | raise TypeError('DataLayout: parts must be a tuple.') 51 | for p in ntp.parts: 52 | if not isinstance(p, PartitionScheme): 53 | raise TypeError('DataLayout: elements in parts must be a ' 54 | 'PartitionScheme object.') 55 | 56 | cls._validate_frngs(ntp.frngs) 57 | cls._validate_parts(ntp.parts, ntp.regions) 58 | 59 | if not len(ntp.frngs) == len(ntp.regions) == len(ntp.parts): 60 | raise ValueError('DataLayout: {} must have the same length.' 61 | .format(', '.join(DATA_LAYOUT_LIST))) 62 | 63 | return ntp 64 | 65 | def complete_fmap_range(self): 66 | ''' 67 | Get the complete FmapRange, i.e., a perfect hyper cube starting from 68 | origin point (0, ..., 0) with no holes. 69 | ''' 70 | return FmapRange(self.frngs[0].fp_beg, self.frngs[-1].fp_end) 71 | 72 | def fmap_range_map(self): 73 | ''' 74 | Get an `FmapRangeMap` instance, mapping from fmap range to absolute 75 | node coordinate. 76 | ''' 77 | frmap = FmapRangeMap() 78 | 79 | for frng, region, part in zip(self.frngs, self.regions, self.parts): 80 | 81 | for pidx in part.gen_pidx(): 82 | pcoord = part.coordinate(region, pidx) 83 | pfrng = part.fmap_range(frng, pidx) 84 | 85 | frmap.add(pfrng, pcoord) 86 | 87 | return frmap 88 | 89 | def nhops_to(self, fmap_range, *dest_list, **kwargs): 90 | ''' 91 | Get the total number of hops to transfer the FmapRange `fmap_range` to 92 | destinations `dest_list` given as a list of absolute coordinates. 93 | 94 | If `forwarding` is True, the data can be forwarded between destinations 95 | rather than all from the source. 96 | ''' 97 | forwarding = kwargs.pop('forwarding', False) 98 | if kwargs: 99 | raise ValueError('DataLayout: method nhops_to() got an unexpected ' 100 | 'keyword argument: {}.' 101 | .format(kwargs.popitem()[0])) 102 | 103 | # The number of hops to transfer data to each destination individually. 104 | nhops_list = [0] * len(dest_list) 105 | 106 | for frng, region, part in zip(self.frngs, self.regions, self.parts): 107 | 108 | # Skip non-overlapped fmap range. 109 | if fmap_range.overlap_size(frng) == 0: 110 | continue 111 | 112 | for pidx in part.gen_pidx(): 113 | psrc = part.coordinate(region, pidx) 114 | pfrng = part.fmap_range(frng, pidx) 115 | size = fmap_range.overlap_size(pfrng) 116 | 117 | nhops_list = [n + size * d.hop_dist(psrc) 118 | for n, d in zip(nhops_list, dest_list)] 119 | 120 | if forwarding: 121 | # The number of hops to the first node and its coordinate. 122 | nhops, coord = min(zip(nhops_list, dest_list)) 123 | 124 | # Size of all data. 125 | total_size = self.complete_fmap_range().overlap_size(fmap_range) 126 | 127 | # Data can be forwarded from all sources to any destination. 128 | src_set = {coord} 129 | dst_set = set(dest_list) - src_set 130 | 131 | while dst_set: 132 | # Each forward step, get the min-distance pair of source and 133 | # destination. 134 | src, dst = min(itertools.product(src_set, dst_set), 135 | key=lambda sd: sd[1].hop_dist(sd[0])) 136 | dst_set.remove(dst) 137 | src_set.add(dst) 138 | nhops += total_size * dst.hop_dist(src) 139 | 140 | else: 141 | nhops = sum(nhops_list) 142 | 143 | return nhops 144 | 145 | def is_in(self, *regions): 146 | ''' 147 | Whether the layout is completely in the given NodeRegion's `regions`. 148 | Region types must match. Each fmap range can be split into multiple 149 | given regions. 150 | ''' 151 | return all(any(region.type == r.type and r.contains_node(coord) 152 | for r in regions) 153 | for region in self.regions for coord in region.iter_node()) 154 | 155 | @classmethod 156 | def concat(cls, *data_layout_list): 157 | ''' 158 | Concatenate multiple `DataLayout` objects along the channel dimension. 159 | ''' 160 | frngs = [] 161 | regions = [] 162 | parts = [] 163 | 164 | n_offset = 0 165 | 166 | for dl in data_layout_list: 167 | 168 | # Check type. 169 | if not isinstance(dl, DataLayout): 170 | raise TypeError('DataLayout: only DataLayout object can be ' 171 | 'concatenated.') 172 | 173 | # Concatenate frngs along n dimension. 174 | for frng in dl.frngs: 175 | fpb = frng.fp_beg 176 | fpe = frng.fp_end 177 | frng2 = FmapRange(FmapPosition(b=fpb.b, n=fpb.n + n_offset, 178 | h=fpb.h, w=fpb.w), 179 | FmapPosition(b=fpe.b, n=fpe.n + n_offset, 180 | h=fpe.h, w=fpe.w)) 181 | frngs.append(frng2) 182 | n_offset += frng.size('n') 183 | 184 | # Regions and partitions are the same. 185 | regions += dl.regions 186 | parts += dl.parts 187 | 188 | return DataLayout(frngs=tuple(frngs), regions=tuple(regions), 189 | parts=tuple(parts)) 190 | 191 | @classmethod 192 | def _validate_frngs(cls, frngs): 193 | ''' 194 | Validate the fmap ranges. 195 | ''' 196 | if not frngs: 197 | raise ValueError('DataLayout: no frngs.') 198 | 199 | _, n_end = frngs[0].beg_end('n') 200 | bhw_beg_end = frngs[0].beg_end('b', 'h', 'w') 201 | 202 | if frngs[0].fp_beg != FmapPosition(0, 0, 0, 0): 203 | raise ValueError('DataLayout: frngs must begin at 0.') 204 | 205 | for frng in frngs[1:]: 206 | if frng.beg_end('b', 'h', 'w') != bhw_beg_end: 207 | raise ValueError('DataLayout: frng dim b, h, w mismatch.') 208 | nb, ne = frng.beg_end('n') 209 | if nb != n_end: 210 | raise ValueError('DataLayout: frng dim n is discontinuous.') 211 | n_end = ne 212 | 213 | @classmethod 214 | def _validate_parts(cls, parts, regions): 215 | ''' 216 | Validate the partitioning schemes. 217 | ''' 218 | for region, part in zip(regions, parts): 219 | if not part.is_applicable_to_fmap_range(): 220 | raise ValueError('DataLayout: invalid partitioning scheme for ' 221 | 'fmap range.') 222 | 223 | if any(pd > rd for pd, rd in zip(part.dim(), region.dim)): 224 | raise ValueError('DataLayout: partitioning scheme does not fit ' 225 | 'in node region.') 226 | 227 | -------------------------------------------------------------------------------- /nn_dataflow/core/int_range.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | import numbers 19 | 20 | class IntRange(namedtuple('IntRange', ['beg', 'end'])): 21 | ''' 22 | A range of integer numbers. 23 | ''' 24 | 25 | def __new__(cls, *args, **kwargs): 26 | ntp = super(IntRange, cls).__new__(cls, *args, **kwargs) 27 | 28 | if not isinstance(ntp.beg, numbers.Integral): 29 | raise TypeError('IntRange: begin value must be an integer.') 30 | if not isinstance(ntp.end, numbers.Integral): 31 | raise TypeError('IntRange: end value must be an integer.') 32 | if ntp.beg > ntp.end: 33 | raise ValueError('IntRange: begin value {} > end value {}?' 34 | .format(ntp.beg, ntp.end)) 35 | 36 | return ntp 37 | 38 | def size(self): 39 | ''' 40 | Get the size of the range. 41 | ''' 42 | return self.end - self.beg 43 | 44 | def empty(self): 45 | ''' 46 | Whether the range is empty. 47 | ''' 48 | return self.beg == self.end 49 | 50 | def range(self): 51 | ''' 52 | Generator for the range. 53 | ''' 54 | for v in range(self.beg, self.end): 55 | yield v 56 | 57 | def overlap(self, other): 58 | ''' 59 | Get the overlapped IntRange of the two. 60 | ''' 61 | if not isinstance(other, IntRange): 62 | raise TypeError('IntRange: an IntRange object is required.') 63 | try: 64 | return IntRange(max(self.beg, other.beg), min(self.end, other.end)) 65 | except ValueError: 66 | # Non-overlapped. 67 | return IntRange(0, 0) 68 | 69 | def offset(self, val): 70 | ''' 71 | Get a new IntRange by offseting `val`. 72 | ''' 73 | return IntRange(self.beg + val, self.end + val) 74 | 75 | -------------------------------------------------------------------------------- /nn_dataflow/core/loop_blocking.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import heapq 18 | import itertools 19 | from multiprocessing.pool import Pool 20 | 21 | from . import loop_blocking_solver 22 | from . import loop_enum as le 23 | from .. import util 24 | from .buf_shr_scheme import BufShrScheme 25 | from .layer import ConvLayer 26 | from .loop_blocking_scheme import LoopBlockingScheme 27 | 28 | ''' 29 | Loop blocking optimization. 30 | 31 | Include loop blocking and reordering. 32 | 33 | For our problem, only deal with nifm, nofm, and batch loops. 34 | ''' 35 | 36 | def skip_conv(bl_ts, bl_ords): 37 | ''' 38 | Skip the given loop blocking scheme for CONV layer, if it has regularized 39 | equivalent, or it is suboptimal. 40 | 41 | Equivalence of loop blocking schemes: 42 | 43 | - changing the position of a trivial loop (with blocking factor 1) makes no 44 | difference to the access pattern. 45 | - reorder non-innermost non-trivial loops has no effect on reuse, although 46 | the access pattern changes. 47 | 48 | Therefore a scheme is regularized if: 49 | 50 | - all the trivial loops (with blocking factor 1) are at the outermost of 51 | this level, and are in order, i.e., smaller LoopEnum at inner. 52 | - the non-innermost non-trivial loops are in order, i.e., smaller LoopEnum 53 | at inner. 54 | 55 | A scheme is suboptimal if the closest innermost non-trivial loop of an 56 | outer level (skipping the levels with all trivial loops) is the same type 57 | (i.e., has the same LoopEnum value) as one of the non-innermost non-trivial 58 | loops of this level. For the last (innermost) level, all non-trivial loops 59 | should be considered, i.e., no innermost non-trivial loop. 60 | 61 | This is because an equivalent scheme can reorder the non-innermost loops to 62 | put the one loop adjacent to the outer-level innermost loop. Then this loop 63 | can be merged to the outer level, which results in the same access pattern 64 | but has smaller data size for this level. 65 | ''' 66 | 67 | outer_level_innermost_nt_loop = None 68 | 69 | for t_, ord_ in itertools.zip_longest(bl_ts, bl_ords, fillvalue=None): 70 | 71 | # Non-trivial loops. 72 | nt_loops = [lpe for lpe in range(le.NUM) if t_[lpe] > 1] 73 | 74 | # Innermost non-trivial loops. 75 | try: 76 | innermost_nt_loop = min(nt_loops, key=lambda lpe, o=ord_: o[lpe]) 77 | except (ValueError, TypeError): 78 | # All trivial loops, or order is None type (last level). 79 | innermost_nt_loop = None 80 | 81 | # Scheme is suboptimal if the outer-level innermost non-trivial loop is 82 | # a non-innermost non-trivial loops at this level. 83 | if outer_level_innermost_nt_loop != innermost_nt_loop \ 84 | and outer_level_innermost_nt_loop in nt_loops: 85 | return True 86 | if innermost_nt_loop is not None: 87 | outer_level_innermost_nt_loop = innermost_nt_loop 88 | 89 | if ord_: 90 | # Order the LoopEnum values, from innermost to outermost. 91 | # The sort key is a three-tuple: 92 | # - innermost non-trivial loop should be kept at the innermost. 93 | # - non-trivial loops should be inside trivial loops. 94 | # - within each part, order by LoopEnum value. 95 | lp_ord = sorted(range(le.NUM), 96 | key=lambda lpe, inl=innermost_nt_loop, nls=nt_loops: 97 | (lpe != inl, lpe not in nls, lpe)) 98 | 99 | if any(lp_ord[ord_[lpe]] != lpe for lpe in range(le.NUM)): 100 | return True 101 | 102 | return False 103 | 104 | 105 | def _loop_blocking_cmp_key(options, cost): 106 | if options.opt_goal == 'ed': 107 | return lambda lbs: lbs.get_access_cost(cost) * lbs.time 108 | if options.opt_goal == 'd': 109 | return lambda lbs: (lbs.time, lbs.get_access_cost(cost)) 110 | assert options.opt_goal == 'e' 111 | return lambda lbs: (lbs.get_access_cost(cost), lbs.time) 112 | 113 | 114 | def _gen_loopblocking_perprocess( 115 | nested_loop_desc, resource, bufshr, constraint, cost, options, 116 | gen_tifm, gen_tofm, gen_tbat, gen_ords): 117 | 118 | def _gen_bl_ts(): 119 | ''' 120 | Generator for blocking factors. 121 | 122 | Transpose LoopEnum-major to BL-major. 123 | ''' 124 | gen_lp_ts = [None] * le.NUM 125 | gen_lp_ts[le.IFM], gen_lp_ts[le.OFM], gen_lp_ts[le.BAT] = \ 126 | constraint.filter_gen_ts(gen_tifm, gen_tofm, gen_tbat) 127 | for lp_ts in itertools.product(*gen_lp_ts): 128 | bl_ts = tuple(zip(*lp_ts)) 129 | yield bl_ts 130 | 131 | def _sweep(): 132 | ''' Sweep all. ''' 133 | is_conv_loops = (nested_loop_desc.data_loops == ConvLayer.data_loops()) 134 | for bl_ts, bl_ords in itertools.product(_gen_bl_ts(), gen_ords): 135 | if is_conv_loops and skip_conv(bl_ts, bl_ords): 136 | continue 137 | if not constraint.is_valid_top_bl(bl_ts[0], bl_ords[0]): 138 | continue 139 | lbs = LoopBlockingScheme( 140 | nested_loop_desc, bl_ts, bl_ords, resource, bufshr, 141 | options) 142 | yield lbs 143 | 144 | return heapq.nsmallest(options.ntops, _sweep(), 145 | key=_loop_blocking_cmp_key(options, cost)) 146 | 147 | 148 | def gen_loopblocking(nested_loop_desc, resource, part, constraint, cost, 149 | options): 150 | ''' 151 | Generator for loop blocking. 152 | ''' 153 | 154 | # Buffer sharing scheme. 155 | bufshr = BufShrScheme(resource.proc_region, part, 156 | nested_loop_desc.data_loops) 157 | 158 | # Solver only works for CONV layer. 159 | if options.sw_solve_loopblocking \ 160 | and nested_loop_desc.data_loops == ConvLayer.data_loops(): 161 | gen = loop_blocking_solver.gen_loopblocking_gbuf_reside 162 | 163 | for bl_ts, bl_ords in gen(nested_loop_desc, resource, options): 164 | lbs = LoopBlockingScheme(nested_loop_desc, bl_ts, bl_ords, 165 | resource, bufshr, options) 166 | if constraint.is_valid_top_bl(lbs.bl_ts[0], lbs.bl_ords[0]): 167 | yield lbs 168 | return 169 | 170 | ## Exhaustive search. 171 | 172 | results = [] 173 | 174 | def retrieve_result(): 175 | ''' Retrieve results from multiprocessing.Pool. ''' 176 | for r in results: 177 | for t in r.get(timeout=3600): 178 | yield t 179 | 180 | def retrieve_result_st(): 181 | ''' Retrieve results from single-process processing. ''' 182 | for r in results: 183 | for t in r: 184 | yield t 185 | 186 | if options.nprocesses > 1: 187 | pool = Pool(processes=options.nprocesses) 188 | apply_func = pool.apply_async 189 | retrieve_func = retrieve_result() 190 | else: 191 | pool = None 192 | apply_func = util.apply 193 | retrieve_func = retrieve_result_st() 194 | 195 | # Exhaustive generators. 196 | gen_tifm = util.factorize(nested_loop_desc.loopcnt[le.IFM], 3) 197 | gen_tofm = util.factorize(nested_loop_desc.loopcnt[le.OFM], 3) 198 | gen_tbat = util.factorize(nested_loop_desc.loopcnt[le.BAT], 3) 199 | gen_ords = itertools.product(itertools.permutations(range(le.NUM)), 200 | itertools.permutations(range(le.NUM))) 201 | 202 | # Split the design space for multiprocessing. 203 | # Let each process factorize tbat and orders, which constantly have many 204 | # factors that can amortize the multiprocessing overhead. 205 | # Note that we must materialize them into lists, since generators cannot be 206 | # pickled. See 207 | # http://peadrop.com/blog/2009/12/29/why-you-cannot-pickle-generators/ 208 | list_tbat = list(gen_tbat) 209 | list_ords = list(gen_ords) 210 | for tifm, tofm in itertools.product(gen_tifm, gen_tofm): 211 | r = apply_func(_gen_loopblocking_perprocess, 212 | (nested_loop_desc, resource, bufshr, constraint, cost, 213 | options, [tifm], [tofm], list_tbat, list_ords)) 214 | results.append(r) 215 | 216 | for lbs in heapq.nsmallest(options.ntops, retrieve_func, 217 | key=_loop_blocking_cmp_key(options, cost)): 218 | yield lbs 219 | 220 | if pool is not None: 221 | pool.close() 222 | pool.join() 223 | 224 | -------------------------------------------------------------------------------- /nn_dataflow/core/loop_blocking_solver.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import math 18 | 19 | from . import data_category_enum as de 20 | from . import loop_enum as le 21 | from .. import util 22 | from .layer import ConvLayer 23 | 24 | ''' 25 | Analytical solvers for loop blocking. 26 | ''' 27 | 28 | def _solve_gbuf_reside(nested_loop_desc, resource, reside_dce): 29 | ''' 30 | Solve the analytical optimal loop blocking scheme, with the given data 31 | category `reside_dce` is the only one in GBUF; all the other data 32 | categories bypass GBUF. 33 | 34 | At the GBUF blocking level, the loops for the reside data category are at 35 | the outer, meaning it is only accessed once into GBUF. The others bypass 36 | GBUF and are streamed multiple times from DRAM to REGF. 37 | 38 | Let x, y, z be the three LoopEnum values, and x, y are for `reside_dce`, 39 | then the nested loop is: 40 | 41 | tx0/ty0, tz0, (tz1 = 1), tx1/ty1 (= 1, ?), tx2/ty2/tz2 42 | 43 | Note that tz1 = 1 is required since tz0 is the innermost of the outer level 44 | (otherwise tz1 can merge into tz0). The REGF level can only allow one 45 | non-trivial loop, so either tx1 or ty1 must also be 1. 46 | 47 | Opt I. 48 | 49 | min accesses to DRAM = 50 | (Nx * Ny * sgxy) * fxy + (Ny * Nz * sgyz) * fyz * tx0 51 | + (Nx * Nz * sgxz) * fxz * ty0 52 | s.t. 53 | 1 <= tx0 <= Nx 54 | 1 <= ty0 <= Ny 55 | (Nx // tx0) * (Ny // ty0) * sgxy <= Sgbuf 56 | min{(srxy + srxz) * (Nx // tx0) + sryz, 57 | (srxy + sryz) * (Ny // ty0) + srxz} <= Sregf 58 | 59 | Nx, Ny, Nz are the total loop factors. 60 | sgxy, sgyz, sgxz are the data unit sizes in GBUF. 61 | srxy, sryz, srxz are the data unit sizes in REGF. 62 | 63 | The last constraint is for the feasibility of REGF capacity. Note that tz2 64 | could be 1. If ty2 is minimized to 1 (so ty1 is not 1), tx1 must be 1; 65 | similarly, if tx2 is minimized to 1 (so tx1 is not 1), ty1 must be 1. At 66 | least one of these two cases must be feasible for REGF capacity. 67 | 68 | Although opt I is a convex optimization, we need to further require tx0 and 69 | ty0 to be factors of Nx and Ny, respectively. So we use exhaustive search 70 | to solve opt I. 71 | 72 | Opt II. 73 | 74 | min fetch to GBUF for `reside_dce` = 75 | 1 if tx1 = ty1 = 1 76 | tz0 elsewise 77 | s.t. 78 | tx2 * ty2 * srxy + ty2 * tz2 * sryz + tx2 * tz2 * srxz <= Sregf 79 | 80 | If tx1 and ty1 could be 1, which means the reside data category could put 81 | all GBUF data Nx // tx0 and Ny // ty0 directly into REGF, then it is the 82 | optimal case. 83 | 84 | Otherwise, since tz1 = 1, min tz0 is equivalent to 85 | 86 | max tz2 = 87 | (Sregf - tx2 * ty2 * srxy) / (ty2 * sryz + tx2 * srxz) 88 | 89 | Special adjustment. 90 | 91 | The above model assumes tz0 is a non-trivial loop. If the final solution 92 | has tz0 = 1, the bypass data categories may not bypass. For example, if ty0 93 | is the innermost loop of the top level, data xz will have 1 fetch to DRAM, 94 | but ty0 fetch to GBUF. So we have to adjust the scheme by merging tx1 or 95 | ty1 into tx0 or ty0, and ensure it to be the inner loop at the top level. 96 | ''' 97 | 98 | ldce = [reside_dce] # xy, yz, xz 99 | llpe = [] # x, y, z 100 | lfacc = [] # xy, yz, xz 101 | 102 | if ldce[0] == de.FIL: 103 | llpe += [le.IFM, le.OFM, le.BAT] 104 | ldce += [de.OFM, de.IFM] 105 | lfacc += [1., 2., 1.] 106 | elif ldce[0] == de.IFM: 107 | llpe += [le.IFM, le.BAT, le.OFM] 108 | ldce += [de.OFM, de.FIL] 109 | lfacc += [1., 2., 1.] 110 | else: 111 | assert ldce[0] == de.OFM 112 | llpe += [le.OFM, le.BAT, le.IFM] 113 | ldce += [de.IFM, de.FIL] 114 | lfacc += [2., 1., 1.] 115 | 116 | lnum = [nested_loop_desc.loopcnt[lpe] for lpe in llpe] # x, y, z 117 | lsgbuf = [nested_loop_desc.usize_gbuf_of(dce) for dce in ldce] # xy, yz, xz 118 | lsregf = [nested_loop_desc.usize_regf_of(dce) for dce in ldce] # xy, yz, xz 119 | 120 | size_gbuf, size_regf = resource.size_gbuf, resource.size_regf 121 | 122 | def goal_opt1(tx0, ty0): 123 | ''' Opt I goal function. min goal(). ''' 124 | lnumloops = [lnum[0] * lnum[1], lnum[1] * lnum[2], lnum[0] * lnum[2]] 125 | ltloops = [1, tx0, ty0] 126 | return sum(util.prod(tpl) for tpl 127 | in zip(lnumloops, lsgbuf, lfacc, ltloops)) 128 | 129 | def constraints_opt1(tx0, ty0): 130 | ''' Opt I constraints. s.t. constraints(). ''' 131 | if (lnum[0] // tx0) * (lnum[1] // ty0) * lsgbuf[0] > size_gbuf: 132 | return False 133 | if min(lnum[0] // tx0 * (lsregf[0] + lsregf[2]) + lsregf[1], 134 | lnum[1] // ty0 * (lsregf[0] + lsregf[1]) + lsregf[2]) \ 135 | > size_regf: 136 | return False 137 | return True 138 | 139 | # Exhaustive search for opt I. 140 | min_goal = float('inf') 141 | for tx0_, _ in util.factorize(lnum[0], 2): 142 | for ty0_, _ in util.factorize(lnum[1], 2): 143 | # Satisfy constraints. 144 | if not constraints_opt1(tx0_, ty0_): 145 | continue 146 | # Minimize goal. 147 | goal = goal_opt1(tx0_, ty0_) 148 | if goal < min_goal: 149 | min_goal = goal 150 | tx0, ty0 = tx0_, ty0_ 151 | 152 | def goal_opt2(tx2, ty2): 153 | ''' Opt II goal function. max goal(). ''' 154 | tz2 = (size_regf - tx2 * ty2 * lsregf[0]) * 1. \ 155 | / (ty2 * lsregf[1] + tx2 * lsregf[2]) 156 | if tz2 < 0: 157 | return -float('inf') 158 | tz2_adj = util.closest_factor(lnum[2], tz2) 159 | if tz2_adj[0] <= tz2: 160 | return tz2_adj[0] 161 | return -float('inf') 162 | 163 | # Try tx1 = ty1 = 1. 164 | tx2, ty2 = lnum[0] // tx0, lnum[1] // ty0 165 | tz2 = goal_opt2(tx2, ty2) 166 | 167 | if math.isinf(tz2): 168 | # Candidates of tx2, ty2. 169 | txy2_cands = [(1, lnum[1] // ty0), (lnum[0] // tx0, 1)] 170 | 171 | # Select. 172 | tx2, ty2 = max(txy2_cands, key=lambda txy2: goal_opt2(*txy2)) 173 | tz2 = goal_opt2(tx2, ty2) 174 | 175 | assert not math.isinf(tz2) 176 | tz0 = lnum[2] // tz2 177 | tx1 = lnum[0] // tx0 // tx2 178 | ty1 = lnum[1] // ty0 // ty2 179 | 180 | # Loop orders. 181 | # Loop z is at the innermost of the top level. Do not care x, y. 182 | bl_ord_0 = [0] * le.NUM 183 | bl_ord_0[llpe[0]] = 2 184 | bl_ord_0[llpe[1]] = 1 185 | bl_ord_0[llpe[2]] = 0 186 | # The non-1 loop x or y is at the innermost of the middle level. 187 | bl_ord_1 = [0] * le.NUM 188 | bl_ord_1[llpe[0]] = 0 if tx1 > 1 else 1 189 | bl_ord_1[llpe[1]] = 1 if tx1 > 1 else 0 190 | bl_ord_1[llpe[2]] = 2 191 | 192 | # Special adjustment when tz0 = 1: merge tx1/ty1 into tx0/ty0. 193 | if tz0 == 1: 194 | tx0 *= tx1 195 | tx1 = 1 196 | ty0 *= ty1 197 | ty1 = 1 198 | # Also maintain the order. 199 | bl_ord_0 = bl_ord_1 200 | 201 | # Compose return values. 202 | lp_ts = [None] * le.NUM 203 | lp_ts[llpe[0]] = (tx0, tx1, tx2) 204 | lp_ts[llpe[1]] = (ty0, ty1, ty2) 205 | lp_ts[llpe[2]] = (tz0, 1, tz2) 206 | bl_ts = tuple(zip(*lp_ts)) 207 | 208 | bl_ords = (tuple(bl_ord_0), tuple(bl_ord_1)) 209 | 210 | return bl_ts, bl_ords 211 | 212 | 213 | def gen_loopblocking_gbuf_reside(nested_loop_desc, resource, options): 214 | ''' 215 | Generator for loop blocking schemes that are solved from gbuf reside 216 | analytical models. 217 | ''' 218 | if nested_loop_desc.data_loops != ConvLayer.data_loops(): 219 | raise ValueError('loop_blocking_solver: solver only applies to ' 220 | 'CONV layer nested loops') 221 | 222 | reside_dce_list = [] 223 | # reside_dce_list is a list of DataCategoryEnum, each element is a config 224 | # with only that data category in gbuf, i.e., the others are all bypassed. 225 | for reside_dce in range(de.NUM): 226 | if all(options.sw_gbuf_bypass[dce] for dce in range(de.NUM) 227 | if dce != reside_dce): 228 | reside_dce_list.append(reside_dce) 229 | 230 | for reside_dce in reside_dce_list: 231 | yield _solve_gbuf_reside(nested_loop_desc, resource, reside_dce) 232 | 233 | -------------------------------------------------------------------------------- /nn_dataflow/core/loop_enum.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | ''' 18 | Enum for loop types. 19 | ''' 20 | IFM = 0 21 | OFM = 1 22 | BAT = 2 23 | NUM = 3 24 | 25 | -------------------------------------------------------------------------------- /nn_dataflow/core/mem_hier_enum.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | ''' 18 | Enum for memory hierarchy. 19 | ''' 20 | DRAM = 0 21 | GBUF = 1 22 | ITCN = 2 23 | REGF = 3 24 | NUM = 4 25 | 26 | -------------------------------------------------------------------------------- /nn_dataflow/core/nested_loop_desc.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | 19 | from . import data_category_enum as de 20 | from . import loop_enum as le 21 | from . import mem_hier_enum as me 22 | from .. import util 23 | from .data_dim_loops import DataDimLoops 24 | 25 | NESTED_LOOP_DESC_LIST = ['loopcnt', 26 | 'usize_gbuf', 27 | 'usize_regf', 28 | 'unit_access', 29 | 'unit_ops', 30 | 'unit_time', 31 | 'data_loops', 32 | ] 33 | 34 | class NestedLoopDesc(namedtuple('NestedLoopDesc', NESTED_LOOP_DESC_LIST)): 35 | ''' 36 | Naive nested loop description. 37 | 38 | For our problem, only deal with the loops given by `LoopEnum`. 39 | ''' 40 | 41 | def __new__(cls, *args, **kwargs): 42 | ntp = super(NestedLoopDesc, cls).__new__(cls, *args, **kwargs) 43 | 44 | if not isinstance(ntp.loopcnt, tuple): 45 | raise TypeError('NestedLoopDesc: loopcnt must be a tuple.') 46 | if len(ntp.loopcnt) != le.NUM: 47 | raise ValueError('NestedLoopDesc: loopcnt must have length {}.' 48 | .format(le.NUM)) 49 | 50 | if not isinstance(ntp.usize_gbuf, tuple): 51 | raise TypeError('NestedLoopDesc: usize_gbuf must be a tuple.') 52 | if not isinstance(ntp.usize_regf, tuple): 53 | raise TypeError('NestedLoopDesc: usize_regf must be a tuple.') 54 | if len(ntp.usize_gbuf) != de.NUM: 55 | raise ValueError('NestedLoopDesc: usize_gbuf must have length {}.' 56 | .format(de.NUM)) 57 | if len(ntp.usize_regf) != de.NUM: 58 | raise ValueError('NestedLoopDesc: usize_regf must have length {}.' 59 | .format(de.NUM)) 60 | 61 | if not isinstance(ntp.unit_access, tuple): 62 | raise TypeError('NestedLoopDesc: unit_access must be a tuple.') 63 | if len(ntp.unit_access) != me.NUM: 64 | raise ValueError('NestedLoopDesc: unit_access must have length {}.' 65 | .format(me.NUM)) 66 | for ua in ntp.unit_access: 67 | if not isinstance(ua, tuple): 68 | raise TypeError('NestedLoopDesc: element in unit_access ' 69 | 'must be a tuple.') 70 | if len(ua) != de.NUM: 71 | raise ValueError('NestedLoopDesc: element in unit_access ' 72 | 'must have length {}.'.format(de.NUM)) 73 | 74 | if not isinstance(ntp.data_loops, tuple): 75 | raise TypeError('NestedLoopDesc: data_loops must be a tuple.') 76 | if len(ntp.data_loops) != de.NUM: 77 | raise ValueError('NestedLoopDesc: data_loops must have length {}.' 78 | .format(de.NUM)) 79 | for dls in ntp.data_loops: 80 | if not isinstance(dls, DataDimLoops): 81 | raise TypeError('NestedLoopDesc: element in data_loops ' 82 | 'must be a DataDimLoops instance.') 83 | 84 | return ntp 85 | 86 | def usize_gbuf_of(self, dce): 87 | ''' 88 | Get the occupied gbuf size by data category `dce` for one loop body. 89 | ''' 90 | return self.usize_gbuf[dce] 91 | 92 | def usize_regf_of(self, dce): 93 | ''' 94 | Get the occupied regf size by data category `dce` for one loop body. 95 | ''' 96 | return self.usize_regf[dce] 97 | 98 | def unit_access_at_of(self, mhe, dce=None): 99 | ''' 100 | Get the number of accesses for one loop body at memory hierarchy `mhe` 101 | of data category `dce`. 102 | 103 | If `dce` is None, return total accesses of all data. 104 | ''' 105 | if dce is None: 106 | return sum(self.unit_access[mhe]) 107 | return self.unit_access[mhe][dce] 108 | 109 | def total_ops(self): 110 | ''' 111 | Get the total number of ops for all loops. 112 | ''' 113 | return self.unit_ops * util.prod(self.loopcnt) 114 | 115 | def total_access_at_of(self, mhe, dce=None): 116 | ''' 117 | Get the total number of accesses, i.e., accessing all data once, at 118 | memory hierarchy `mhe` of data category `dce`. 119 | 120 | If `dce` is None, return total accesses of all data. 121 | ''' 122 | if dce is None: 123 | return sum(self.total_access_at_of(mhe, dce2) 124 | for dce2 in range(de.NUM)) 125 | 126 | return self.unit_access_at_of(mhe, dce) \ 127 | * util.prod(self.data_loops[dce].take(self.loopcnt)) 128 | 129 | -------------------------------------------------------------------------------- /nn_dataflow/core/node_region.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import itertools 18 | from collections import namedtuple 19 | 20 | from .. import util 21 | from .phy_dim2 import PhyDim2 22 | 23 | NODE_REGION_LIST = ['dim', 24 | 'origin', 25 | 'dist', 26 | 'type', 27 | 'wtot', 28 | 'wbeg', 29 | ] 30 | 31 | class NodeRegion(namedtuple('NodeRegion', NODE_REGION_LIST)): 32 | ''' 33 | A node region defined by the dimension and origin offset. 34 | 35 | The `type` attribute specifies the region type, which could be `PROC` for 36 | computation processing nodes or 'DRAM' for off-chip data storage nodes. 37 | 38 | The node region can be optionally folded along the w dimension in a zig-zag 39 | manner. The folding scheme is defined by (wtot, wbeg). `wtot` is always 40 | positive, representing the number of nodes between two turns (total width). 41 | `wbeg` is the number of nodes before reaching the first turning boundary, 42 | with its sign representing the direction. E.g., 43 | 44 | ... 45 | ****************** 46 | ******** 47 | | wbeg | 48 | 49 | or 50 | 51 | ... 52 | ****************** 53 | ********* 54 | | -wbeg | 55 | 56 | With folded region, `origin` points to the first node. 57 | 58 | NOTE: we cannot overload __contains__ and __iter__ as a node container, 59 | because the base namedtuple already defines them. 60 | ''' 61 | 62 | # Type enums. 63 | PROC = 0 64 | DRAM = 1 65 | NUM = 2 66 | 67 | def __new__(cls, *args, **kwargs): 68 | 69 | # Set default values. 70 | kwargs2 = kwargs.copy() 71 | if len(args) <= NODE_REGION_LIST.index('dist'): 72 | kwargs2.setdefault('dist', PhyDim2(1, 1)) 73 | if len(args) <= NODE_REGION_LIST.index('wtot'): 74 | # Default to dim.w but we haven't checked dim yet. Replace later. 75 | kwargs2.setdefault('wtot', None) 76 | if len(args) <= NODE_REGION_LIST.index('wbeg'): 77 | # Default to wtot. Also replace later. 78 | kwargs2.setdefault('wbeg', None) 79 | 80 | ntp = super(NodeRegion, cls).__new__(cls, *args, **kwargs2) 81 | 82 | if not isinstance(ntp.dim, PhyDim2): 83 | raise TypeError('NodeRegion: dim must be a PhyDim2 object.') 84 | if not isinstance(ntp.origin, PhyDim2): 85 | raise TypeError('NodeRegion: origin must be a PhyDim2 object.') 86 | if not isinstance(ntp.dist, PhyDim2): 87 | raise TypeError('NodeRegion: dist must be a PhyDim2 object.') 88 | 89 | if ntp.type not in range(cls.NUM): 90 | raise ValueError('NodeRegion: type must be a valid type enum.') 91 | 92 | if ntp.wtot is None: 93 | ntp = ntp._replace(wtot=ntp.dim.w) 94 | if ntp.wbeg is None: 95 | ntp = ntp._replace(wbeg=ntp.wtot) 96 | 97 | if not isinstance(ntp.wtot, int): 98 | raise TypeError('NodeRegion: wtot must be an int.') 99 | if not isinstance(ntp.wbeg, int): 100 | raise TypeError('NodeRegion: wbeg must be an int.') 101 | 102 | if not (0 < abs(ntp.wbeg) <= ntp.wtot) and ntp.dim.size() > 0: 103 | raise ValueError('NodeRegion: |wbeg| must be in (0, wtot].') 104 | 105 | return ntp 106 | 107 | def contains_node(self, coordinate): 108 | ''' Whether the region contains the given absolute node coordinate. ''' 109 | return coordinate in self.iter_node() 110 | 111 | def iter_node(self): 112 | ''' Iterate through all absolute node coordinates in the region. ''' 113 | for rel_coord in itertools.product(*[range(d) for d in self.dim]): 114 | yield self.rel2abs(PhyDim2(*rel_coord)) 115 | 116 | def rel2abs(self, rel_coordinate): 117 | ''' Convert relative node coordinate to absolute node coordinate. ''' 118 | if not isinstance(rel_coordinate, PhyDim2): 119 | raise TypeError('NodeRegion: relative coordinate must be ' 120 | 'a PhyDim2 object.') 121 | if not all(0 <= c < d for c, d in zip(rel_coordinate, self.dim)): 122 | raise ValueError('NodeRegion: relative coordinate {} is not in ' 123 | 'node region {}.'.format(rel_coordinate, self)) 124 | 125 | # Add starting offset to start from the boundary before the first node, 126 | # then modulo wtot to get the delta h and w to this boundary point. 127 | h, w = divmod(rel_coordinate.w + self.wtot - abs(self.wbeg), self.wtot) 128 | # Direction for w, changing every time when h increments. 129 | direction = (-1 if self.wbeg < 0 else 1) * (-1 if h % 2 else 1) 130 | # Make w relative to the left boundary. 131 | w = w if direction > 0 else self.wtot - 1 - w 132 | 133 | abs_coordinate = self.origin \ 134 | + PhyDim2(h=h * self.dim.h + rel_coordinate.h, 135 | w=w - (self.wtot - self.wbeg if self.wbeg > 0 136 | else -self.wbeg - 1)) \ 137 | * self.dist 138 | 139 | return abs_coordinate 140 | 141 | def allocate(self, request_list): 142 | ''' 143 | Allocate node subregions spatially within the node region according to 144 | the given `request_list` which is a list of numbers of nodes requested. 145 | 146 | Return a list of NodeRegion instances, whose origins are absolute 147 | offset (not relative to the origin of self). The allocation may fail if 148 | and only if the total number of nodes requested is larger than the 149 | number of nodes in the region, in which case an empty list is returned. 150 | 151 | The strategy is to allocate stripe-wise in a zig-zag order, allowing 152 | for folding in width. We first determine a stripe height as the 153 | greatest common divisor of the requested numbers of nodes. Then 154 | allocate each request as (stripe height, request size / stripe height) 155 | to fill in the stripe, and move to the next stripe after the current 156 | one is filled. If the width of a request is larger than the remaining 157 | width of the current stripe, we use up the remaining width, and fold 158 | the request width to the next stripe. 159 | ''' 160 | 161 | if sum(request_list) > self.dim.size(): 162 | return [] 163 | 164 | hstrp = util.gcd(self.dim.h, *request_list) 165 | subregions = [] 166 | 167 | wtot = self.dim.w 168 | ofs_h, ofs_w = 0, 0 169 | move_right = True 170 | 171 | for req in request_list: 172 | 173 | # Subregion. 174 | assert req % hstrp == 0 175 | width = req // hstrp 176 | 177 | subdim = PhyDim2(hstrp, width) 178 | if move_right: 179 | origin = PhyDim2(ofs_h, ofs_w) 180 | wbeg = min(wtot - ofs_w, width) 181 | assert wbeg > 0 182 | else: 183 | origin = PhyDim2(ofs_h, self.dim.w - ofs_w - 1) 184 | wbeg = -min(wtot - ofs_w, width) 185 | assert wbeg < 0 186 | 187 | subregions.append(NodeRegion(dim=subdim, 188 | origin=self.origin \ 189 | + origin * self.dist, 190 | dist=self.dist, 191 | type=self.type, 192 | wtot=wtot, 193 | wbeg=wbeg)) 194 | 195 | # Move the offset 196 | ofs_w += width 197 | while ofs_w >= self.dim.w: 198 | # Overflow, fold to the next stripe. 199 | ofs_w -= self.dim.w 200 | ofs_h += hstrp 201 | move_right = not move_right 202 | 203 | # Not moved outside the region. 204 | assert ofs_h + hstrp <= self.dim.h or ofs_w == 0 205 | 206 | return subregions 207 | 208 | -------------------------------------------------------------------------------- /nn_dataflow/core/option.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | 19 | from . import data_category_enum as de 20 | 21 | OPTION_LIST = ['sw_gbuf_bypass', 22 | 'sw_solve_loopblocking', 23 | 'hw_access_forwarding', 24 | 'hw_gbuf_sharing', 25 | 'hw_gbuf_save_writeback', 26 | 'partition_hybrid', 27 | 'partition_batch', 28 | 'partition_ifmaps', 29 | 'partition_interlayer', 30 | 'layer_pipeline_time_ovhd', 31 | 'layer_pipeline_max_degree', 32 | 'layer_pipeline_opt', 33 | 'opt_goal', 34 | 'ntops', 35 | 'nprocesses', 36 | 'verbose', 37 | ] 38 | 39 | class Option(namedtuple('Option', OPTION_LIST)): 40 | ''' 41 | Schedule options. 42 | ''' 43 | 44 | def __new__(cls, *args, **kwargs): 45 | 46 | if len(args) > len(OPTION_LIST): 47 | raise TypeError('Option: can take at most {} arguments ({} given).' 48 | .format(len(OPTION_LIST), len(args))) 49 | 50 | if not set(kwargs).issubset(OPTION_LIST): 51 | raise TypeError('Option: got an unexpected keyword argument {}.' 52 | .format(next(k for k in kwargs 53 | if k not in OPTION_LIST))) 54 | 55 | # Combine args and kwargs. 56 | kwdict = kwargs.copy() 57 | for k, v in zip(OPTION_LIST, args): 58 | if k in kwdict: 59 | raise TypeError('Option: got multiple values for ' 60 | 'keyword argument {}.' 61 | .format(k)) 62 | kwdict[k] = v 63 | 64 | kwdict.setdefault('sw_gbuf_bypass', (False,) * de.NUM) 65 | kwdict.setdefault('sw_solve_loopblocking', False) 66 | kwdict.setdefault('hw_access_forwarding', False) 67 | kwdict.setdefault('hw_gbuf_sharing', False) 68 | kwdict.setdefault('hw_gbuf_save_writeback', False) 69 | kwdict.setdefault('partition_hybrid', False) 70 | kwdict.setdefault('partition_batch', False) 71 | kwdict.setdefault('partition_ifmaps', False) 72 | kwdict.setdefault('partition_interlayer', False) 73 | kwdict.setdefault('layer_pipeline_time_ovhd', float('inf')) 74 | kwdict.setdefault('layer_pipeline_max_degree', float('inf')) 75 | kwdict.setdefault('layer_pipeline_opt', True) 76 | kwdict.setdefault('opt_goal', 'e') 77 | kwdict.setdefault('ntops', 1) 78 | kwdict.setdefault('nprocesses', 1) 79 | kwdict.setdefault('verbose', False) 80 | 81 | assert set(kwdict) == set(OPTION_LIST) 82 | 83 | ntp = super(Option, cls).__new__(cls, **kwdict) 84 | 85 | if not isinstance(ntp.sw_gbuf_bypass, tuple): 86 | raise TypeError('Option: sw_gbuf_bypass must be a tuple') 87 | if len(ntp.sw_gbuf_bypass) != de.NUM: 88 | raise ValueError('Option: sw_gbuf_bypass must have length {}' 89 | .format(de.NUM)) 90 | 91 | if ntp.sw_solve_loopblocking and ntp.hw_gbuf_sharing: 92 | raise ValueError('Option: sw_solve_loopblocking and ' 93 | 'hw_gbuf_sharing cannot be simultaneously ' 94 | 'enabled.') 95 | 96 | if ntp.hw_access_forwarding and ntp.hw_gbuf_sharing: 97 | raise ValueError('Option: hw_access_forwarding is implied by ' 98 | 'hw_gbuf_sharing, thus cannot be both enabled.') 99 | 100 | if ntp.sw_solve_loopblocking and ntp.hw_gbuf_save_writeback: 101 | raise ValueError('Option: sw_solve_loopblocking and ' 102 | 'hw_gbuf_save_writeback cannot be simultaneously ' 103 | 'enabled.') 104 | 105 | if ntp.partition_ifmaps and not ntp.partition_hybrid: 106 | raise ValueError('Option: partition_ifmaps requires ' 107 | 'partition_hybrid to be set.') 108 | 109 | if not isinstance(ntp.layer_pipeline_time_ovhd, (int, float)): 110 | raise KeyError('Option: layer_pipeline_time_ovhd must be a ' 111 | 'number.') 112 | if ntp.layer_pipeline_time_ovhd < 0: 113 | raise ValueError('Option: layer_pipeline_time_ovhd must be ' 114 | 'positive.') 115 | 116 | if not isinstance(ntp.layer_pipeline_max_degree, (int, float)): 117 | raise KeyError('Option: layer_pipeline_max_degree must be a ' 118 | 'number.') 119 | if ntp.layer_pipeline_max_degree < 0: 120 | raise ValueError('Option: layer_pipeline_max_degree must be ' 121 | 'positive.') 122 | 123 | if ntp.opt_goal not in ['e', 'd', 'ed']: 124 | raise ValueError('Option: opt_goal is invalid, must be one of ' 125 | '\'e\', \'d\', and \'ed\'.') 126 | 127 | return ntp 128 | 129 | @staticmethod 130 | def option_list(): 131 | ''' List of options. ''' 132 | return OPTION_LIST 133 | 134 | -------------------------------------------------------------------------------- /nn_dataflow/core/parallel_enum.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | ''' 18 | Enum for parallel partition taxonomy. 19 | ''' 20 | OUTP = 0 21 | OFMP = 1 22 | BATP = 2 23 | INPP = 3 24 | NUM = 4 25 | 26 | -------------------------------------------------------------------------------- /nn_dataflow/core/phy_dim2.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | from functools import reduce 19 | from operator import add, sub, neg, mul 20 | 21 | class PhyDim2(namedtuple('PhyDim2', ['h', 'w'])): 22 | ''' 23 | Denote a physical 2D dimension. 24 | ''' 25 | 26 | def size(self): 27 | ''' Total size. ''' 28 | return int(reduce(mul, self, 1)) 29 | 30 | def hop_dist(self, other): 31 | ''' Hop distance between twn coordinate. ''' 32 | if not isinstance(other, PhyDim2): 33 | raise TypeError('PhyDim2: hop_dist only applies on two PhyDim2 ' 34 | 'instances.') 35 | return abs(self.h - other.h) + abs(self.w - other.w) 36 | 37 | def __add__(self, other): 38 | ''' Return element-wise `self + other`. ''' 39 | if not isinstance(other, PhyDim2): 40 | other = PhyDim2(other, other) 41 | return PhyDim2(*map(add, self, other)) 42 | 43 | def __sub__(self, other): 44 | ''' Return element-wise `self - other`. ''' 45 | if not isinstance(other, PhyDim2): 46 | other = PhyDim2(other, other) 47 | return PhyDim2(*map(sub, self, other)) 48 | 49 | def __neg__(self): 50 | ''' Return element-wise negative. ''' 51 | return PhyDim2(*map(neg, self)) 52 | 53 | def __mul__(self, other): 54 | ''' Return element-wise `self * other`. ''' 55 | if not isinstance(other, PhyDim2): 56 | other = PhyDim2(other, other) 57 | return PhyDim2(*map(mul, self, other)) 58 | 59 | __rmul__ = __mul__ 60 | 61 | -------------------------------------------------------------------------------- /nn_dataflow/core/resource.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from collections import namedtuple 18 | import math 19 | 20 | from .node_region import NodeRegion 21 | from .phy_dim2 import PhyDim2 22 | 23 | RESOURCE_LIST = ['proc_region', 24 | 'dram_region', 25 | 'src_data_region', 26 | 'dst_data_region', 27 | 'dim_array', 28 | 'size_gbuf', 29 | 'size_regf', 30 | 'array_bus_width', 31 | 'dram_bandwidth', 32 | 'no_time_mux', 33 | ] 34 | 35 | class Resource(namedtuple('Resource', RESOURCE_LIST)): 36 | ''' 37 | Hardware resource specification. 38 | 39 | The origins of node region and memory regions are all absolute. 40 | ''' 41 | 42 | def __new__(cls, *args, **kwargs): 43 | ntp = super(Resource, cls).__new__(cls, *args, **kwargs) 44 | 45 | if not isinstance(ntp.proc_region, NodeRegion): 46 | raise TypeError('Resource: proc_region must be ' 47 | 'a NodeRegion instance.') 48 | if ntp.proc_region.type != NodeRegion.PROC: 49 | raise ValueError('Resource: proc_region must have type PROC.') 50 | 51 | if not isinstance(ntp.dram_region, NodeRegion): 52 | raise TypeError('Resource: dram_region must be ' 53 | 'a NodeRegion instance.') 54 | if ntp.dram_region.type != NodeRegion.DRAM: 55 | raise ValueError('Resource: dram_region must have type DRAM.') 56 | 57 | if not isinstance(ntp.src_data_region, NodeRegion): 58 | raise TypeError('Resource: src_data_region must be ' 59 | 'a NodeRegion instance.') 60 | if not isinstance(ntp.dst_data_region, NodeRegion): 61 | raise TypeError('Resource: dst_data_region must be ' 62 | 'a NodeRegion instance.') 63 | 64 | if not isinstance(ntp.dim_array, PhyDim2): 65 | raise TypeError('Resource: dim_array must be a PhyDim2 object.') 66 | 67 | if hasattr(ntp.size_gbuf, '__len__'): 68 | raise TypeError('Resource: size_gbuf must be a scalar') 69 | if hasattr(ntp.size_regf, '__len__'): 70 | raise TypeError('Resource: size_regf must be a scalar') 71 | 72 | if not isinstance(ntp.array_bus_width, int) \ 73 | and not math.isinf(ntp.array_bus_width): 74 | raise TypeError('Resource: array_bus_width must be an integer ' 75 | 'or infinity.') 76 | if ntp.array_bus_width <= 0: 77 | raise ValueError('Resource: array_bus_width must be positive.') 78 | 79 | if not isinstance(ntp.dram_bandwidth, (float, int)): 80 | raise TypeError('Resource: dram_bandwidth must be a number') 81 | if ntp.dram_bandwidth <= 0: 82 | raise ValueError('Resource: dram_bandwidth must be positive.') 83 | 84 | if not isinstance(ntp.no_time_mux, bool): 85 | raise TypeError('Resource: no_time_mux must be boolean') 86 | 87 | return ntp 88 | 89 | -------------------------------------------------------------------------------- /nn_dataflow/core/scheduling_constraint.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import numbers 18 | 19 | from . import loop_enum as le 20 | from .. import util 21 | from .loop_blocking_scheme import LoopBlockingScheme 22 | 23 | class SchedulingConstraint(util.ContentHashClass): 24 | ''' 25 | Layer scheduling constraint, which constrains top loop blocking factors. 26 | ''' 27 | 28 | def __init__(self, topbat=0, topifm=0, topofm=0, update_dict=None): 29 | ''' 30 | `topbat`, `topifm`, `topofm` specify the top-level loop blocking 31 | factors. 32 | 33 | `update_dict` specifies lazily updated rules to refine the constraint 34 | with previous scheduling results. It should be a mapping, from previous 35 | layer name to a function which takes two arguments: self, and the 36 | SchedulingResult instance of that layer. 37 | ''' 38 | if any(n < 0 or not isinstance(n, numbers.Integral) 39 | for n in [topbat, topifm, topofm]): 40 | raise ValueError('SchedulingConstraint: ' 41 | 'constrained factors must be positive integers.') 42 | 43 | if not update_dict: 44 | update_dict = {} 45 | if not isinstance(update_dict, dict): 46 | raise TypeError('SchedulingConstraint: ' 47 | 'update_dict must be a dict instance.') 48 | update_dict = util.HashableDict.fromdict(update_dict) 49 | for val in update_dict.values(): 50 | if not callable(val): 51 | raise TypeError('SchedulingConstraint: ' 52 | 'values in update_dict must be callable.') 53 | 54 | self.topbat = topbat 55 | self.topifm = topifm 56 | self.topofm = topofm 57 | self.update_dict = update_dict 58 | 59 | def is_valid_top_bl(self, top_bl_t, top_bl_ord): 60 | ''' 61 | Whether the given `top_bl_t` and `top_bl_lpe` are valid with the 62 | constraint. 63 | ''' 64 | if self.update_dict: 65 | raise ValueError('SchedulingConstraint: update_dict is not empty, ' 66 | 'rules have not been updated.') 67 | 68 | if self.topbat and self.topbat != top_bl_t[le.BAT]: 69 | return False 70 | if self.topifm and self.topifm != top_bl_t[le.IFM]: 71 | return False 72 | if self.topofm and self.topofm != top_bl_t[le.OFM]: 73 | return False 74 | 75 | del top_bl_ord 76 | 77 | return True 78 | 79 | def is_valid_part(self, part): 80 | ''' 81 | Whether the given `part` is valid with the constraint. 82 | ''' 83 | # pylint: disable=unused-argument 84 | if self.update_dict: 85 | raise ValueError('SchedulingConstraint: update_dict is not empty, ' 86 | 'rules have not been updated.') 87 | 88 | return True 89 | 90 | def filter_gen_ts(self, gen_tifm, gen_tofm, gen_tbat): 91 | ''' Get the filtered generators for loop blocking factors. ''' 92 | return self._filter_gen(gen_tifm, self.topifm), \ 93 | self._filter_gen(gen_tofm, self.topofm), \ 94 | self._filter_gen(gen_tbat, self.topbat) 95 | 96 | def update_by_prev(self, prev_results): 97 | ''' 98 | Based on the previous layer scheduling results `prev_results` as a 99 | mapping from previous layer name to SchedulingResult instance, use the 100 | rules specified by `update_dict` to update the constraint. 101 | ''' 102 | for layer_name in self.update_dict: 103 | self.update_dict[layer_name](self, prev_results[layer_name]) 104 | self.update_dict = util.HashableDict() # clear updated rules. 105 | 106 | @staticmethod 107 | def _filter_gen(gen, topt=0): 108 | ''' Get a new generator which filters the top factor. ''' 109 | for tpl in gen: 110 | if topt in (0, tpl[0]): 111 | yield tpl 112 | 113 | def __repr__(self): 114 | return '{}({})'.format( 115 | self.__class__.__name__, 116 | ', '.join(['{}={}'.format(k, repr(v)) 117 | for k, v in self.__dict__.items()])) 118 | 119 | 120 | class SchedulingConstraintLayerPipeline(SchedulingConstraint): 121 | ''' 122 | Layer scheduling constraint for inter-layer pipelining. 123 | 124 | Constraint includes: 125 | - topbat: top BAT loop blocking factor, which decides the number of groups 126 | for batch pipelining. It must match between all layers in a pipeline 127 | segment. 128 | - topifm/topofm: top IFM/OFM blocking factor, which decides the number of 129 | groups for fmap data forwarding between adjacent spatial scheduled layers 130 | in a pipeline segment. It must match between forwarding 131 | source/destination layers. 132 | - fbifm/fbofm: whether to fully buffer the fmap data of the layer on-chip. 133 | It indicates the baseline double-buffering between pipelined layers. 134 | 135 | For loop orders, the BAT loop must be at the outermost for batch 136 | pipelining. Then the loop associated with the forwarded data (IFM or OFM) 137 | must follow at the second outermost. If a data category (IFM or OFM) is 138 | fully buffered, then the corresponding loop is a trivial loop, which can be 139 | at any where. 140 | ''' 141 | 142 | def __init__(self, topbat=0, topifm=0, topofm=0, fbifm=False, fbofm=False, 143 | update_dict=None): 144 | 145 | if fbifm: 146 | # Fully-buffered IFM <=> topifm = 1. 147 | if topifm not in (0, 1): 148 | raise ValueError('SchedulingConstraintLayerPipeline: ' 149 | 'fully-buffered IFM implies topifm = 1.') 150 | topifm = 1 151 | 152 | if fbofm: 153 | # Fully-buffered OFM <=> topofm = 1. 154 | if topofm not in (0, 1): 155 | raise ValueError('SchedulingConstraintLayerPipeline: ' 156 | 'fully-buffered OFM implies topofm = 1.') 157 | topofm = 1 158 | 159 | if topifm > 1 and topofm > 1: 160 | raise ValueError('SchedulingConstraintLayerPipeline: ' 161 | 'impossible to have both topifm and topofm > 1, ' 162 | 'at least one of IFM and OFM must be a trivial ' 163 | 'loop (= 1) or not constrained (= 0).') 164 | 165 | super(SchedulingConstraintLayerPipeline, self).__init__( 166 | topbat=topbat, topifm=topifm, topofm=topofm, 167 | update_dict=update_dict) 168 | 169 | def is_valid_top_bl(self, top_bl_t, top_bl_ord): 170 | 171 | if not super(SchedulingConstraintLayerPipeline, self).is_valid_top_bl( 172 | top_bl_t, top_bl_ord): 173 | return False 174 | 175 | # Loop orders. 176 | # Ordered loops from outer to inner. 177 | ord_lpe = LoopBlockingScheme.ordered_loops(top_bl_t, top_bl_ord, 178 | lpe_only=True) 179 | if self.topbat > 1: 180 | if ord_lpe.pop(0) != le.BAT: 181 | return False 182 | # topifm and topofm cannot trigger together. 183 | if self.topifm > 1: 184 | if ord_lpe.pop(0) != le.IFM: 185 | return False 186 | if self.topofm > 1: 187 | if ord_lpe.pop(0) != le.OFM: 188 | return False 189 | 190 | return True 191 | 192 | -------------------------------------------------------------------------------- /nn_dataflow/nns/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | def import_network(name): 18 | ''' 19 | Import an example network. 20 | ''' 21 | import importlib 22 | 23 | if name not in all_networks(): 24 | raise ImportError('nns: NN {} has not been defined!'.format(name)) 25 | netmod = importlib.import_module('.' + name, 'nn_dataflow.nns') 26 | network = netmod.NN 27 | return network 28 | 29 | 30 | def all_networks(): 31 | ''' 32 | Get all defined networks. 33 | ''' 34 | import os 35 | 36 | nns_dir = os.path.dirname(os.path.abspath(__file__)) 37 | nns = [f[:-len('.py')] for f in os.listdir(nns_dir) 38 | if f.endswith('.py') and not f.startswith('__')] 39 | return list(sorted(nns)) 40 | 41 | 42 | def add_lstm_cell(network, name, size, xin, cin=None, hin=None): 43 | ''' 44 | Add a LSTM cell named `name` to the `network`, with the dimension `size`. 45 | `xin`, `cin`, `hin` are the layers' names whose outputs are x_t, C_{t-1}, 46 | h_{t-1}, respectively. Return the layers' names whose outputs are C_t, h_t. 47 | ''' 48 | from nn_dataflow.core import Network 49 | from nn_dataflow.core import InputLayer, FCLayer, EltwiseLayer 50 | 51 | if not isinstance(network, Network): 52 | raise TypeError('add_lstm_cell: network must be a Network instance.') 53 | 54 | if cin is None: 55 | cin = '{}_cinit'.format(name) 56 | network.add_ext(cin, InputLayer(size, 1)) 57 | if hin is None: 58 | hin = '{}_hinit'.format(name) 59 | network.add_ext(hin, InputLayer(size, 1)) 60 | 61 | if (cin not in network) or (hin not in network) or (xin not in network): 62 | raise ValueError('add_lstm_cell: cin {}, hin {}, xin {} must all be ' 63 | 'in the network.'.format(cin, hin, xin)) 64 | 65 | def gate_name(gate): 66 | ''' Name of a gate. ''' 67 | return '{}_{}gate'.format(name, gate) 68 | 69 | # Candidate. 70 | cand_name = '{}_cand'.format(name) 71 | prevs = (hin, xin) if hin else (xin,) 72 | network.add(cand_name, FCLayer(len(prevs) * size, size), prevs=prevs) 73 | 74 | # Three gates. 75 | prevs = (hin, xin) if hin else (xin,) 76 | for g in ['i', 'f', 'o']: 77 | network.add(gate_name(g), FCLayer(len(prevs) * size, size), prevs=prevs) 78 | 79 | # C_t. 80 | cout_name = '{}_cout'.format(name) 81 | cout_f_name = cout_name + '_f' 82 | prevs = (cin, gate_name('f')) if cin else (gate_name('f'),) 83 | network.add(cout_f_name, EltwiseLayer(size, 1, len(prevs)), prevs=prevs) 84 | cout_i_name = cout_name + '_i' 85 | prevs = (cand_name, gate_name('i')) 86 | network.add(cout_i_name, EltwiseLayer(size, 1, 2), prevs=prevs) 87 | prevs = (cout_i_name, cout_f_name) 88 | network.add(cout_name, EltwiseLayer(size, 1, 2), prevs=prevs) 89 | 90 | # h_t. 91 | hout_name = '{}_hout'.format(name) 92 | prevs = (cout_name, gate_name('o')) 93 | network.add(hout_name, EltwiseLayer(size, 1, 2), prevs=prevs) 94 | 95 | return cout_name, hout_name 96 | 97 | -------------------------------------------------------------------------------- /nn_dataflow/nns/alex_net.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer 19 | 20 | ''' 21 | AlexNet 22 | 23 | Krizhevsky, Sutskever, and Hinton, 2012 24 | ''' 25 | 26 | NN = Network('AlexNet') 27 | 28 | NN.set_input_layer(InputLayer(3, 224)) 29 | 30 | NN.add('conv1_a', ConvLayer(3, 48, 55, 11, 4), prevs=(NN.INPUT_LAYER_KEY,)) 31 | NN.add('conv1_b', ConvLayer(3, 48, 55, 11, 4), prevs=(NN.INPUT_LAYER_KEY,)) 32 | NN.add('pool1_a', PoolingLayer(48, 27, 3, strd=2), prevs=('conv1_a',)) 33 | NN.add('pool1_b', PoolingLayer(48, 27, 3, strd=2), prevs=('conv1_b',)) 34 | # Norm layer is ignored. 35 | 36 | NN.add('conv2_a', ConvLayer(48, 128, 27, 5), prevs=('pool1_a',)) 37 | NN.add('conv2_b', ConvLayer(48, 128, 27, 5), prevs=('pool1_b',)) 38 | NN.add('pool2_a', PoolingLayer(128, 13, 3, strd=2), prevs=('conv2_a',)) 39 | NN.add('pool2_b', PoolingLayer(128, 13, 3, strd=2), prevs=('conv2_b',)) 40 | # Norm layer is ignored. 41 | 42 | NN.add('conv3_a', ConvLayer(256, 192, 13, 3), prevs=('pool2_a', 'pool2_b')) 43 | NN.add('conv3_b', ConvLayer(256, 192, 13, 3), prevs=('pool2_a', 'pool2_b')) 44 | NN.add('conv4_a', ConvLayer(192, 192, 13, 3), prevs=('conv3_a',)) 45 | NN.add('conv4_b', ConvLayer(192, 192, 13, 3), prevs=('conv3_b',)) 46 | NN.add('conv5_a', ConvLayer(192, 128, 13, 3), prevs=('conv4_a',)) 47 | NN.add('conv5_b', ConvLayer(192, 128, 13, 3), prevs=('conv4_b',)) 48 | NN.add('pool3_a', PoolingLayer(128, 6, 3, strd=2), prevs=('conv5_a',)) 49 | NN.add('pool3_b', PoolingLayer(128, 6, 3, strd=2), prevs=('conv5_b',)) 50 | 51 | NN.add('fc1', FCLayer(256, 4096, 6), prevs=('pool3_a', 'pool3_b')) 52 | NN.add('fc2', FCLayer(4096, 4096)) 53 | NN.add('fc3', FCLayer(4096, 1000)) 54 | 55 | -------------------------------------------------------------------------------- /nn_dataflow/nns/googlenet.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer 19 | 20 | ''' 21 | GoogLeNet 22 | 23 | ILSVRC 2014 24 | ''' 25 | 26 | NN = Network('GoogLeNet') 27 | 28 | NN.set_input_layer(InputLayer(3, 224)) 29 | 30 | NN.add('conv1', ConvLayer(3, 64, 112, 7, 2)) 31 | NN.add('pool1', PoolingLayer(64, 56, 3, strd=2)) 32 | # Norm layer is ignored. 33 | 34 | NN.add('conv2_3x3_reduce', ConvLayer(64, 64, 56, 1)) 35 | NN.add('conv2_3x3', ConvLayer(64, 192, 56, 3)) 36 | # Norm layer is ignored. 37 | NN.add('pool2', PoolingLayer(192, 28, 3, strd=2)) 38 | 39 | 40 | def add_inception(network, incp_id, sfmap, nfmaps_in, nfmaps_1, nfmaps_3r, 41 | nfmaps_3, nfmaps_5r, nfmaps_5, nfmaps_pool, prevs): 42 | ''' Add an inception module to the network. ''' 43 | pfx = 'inception_{}_'.format(incp_id) 44 | # 1x1 branch. 45 | network.add(pfx + '1x1', ConvLayer(nfmaps_in, nfmaps_1, sfmap, 1), 46 | prevs=prevs) 47 | # 3x3 branch. 48 | network.add(pfx + '3x3_reduce', ConvLayer(nfmaps_in, nfmaps_3r, sfmap, 1), 49 | prevs=prevs) 50 | network.add(pfx + '3x3', ConvLayer(nfmaps_3r, nfmaps_3, sfmap, 3)) 51 | # 5x5 branch. 52 | network.add(pfx + '5x5_reduce', ConvLayer(nfmaps_in, nfmaps_5r, sfmap, 1), 53 | prevs=prevs) 54 | network.add(pfx + '5x5', ConvLayer(nfmaps_5r, nfmaps_5, sfmap, 5)) 55 | # Pooling branch. 56 | network.add(pfx + 'pool_proj', ConvLayer(nfmaps_in, nfmaps_pool, sfmap, 1), 57 | prevs=prevs) 58 | # Merge branches. 59 | return (pfx + '1x1', pfx + '3x3', pfx + '5x5', pfx + 'pool_proj') 60 | 61 | 62 | _PREVS = ('pool2',) 63 | 64 | # Inception 3. 65 | _PREVS = add_inception(NN, '3a', 28, 192, 64, 96, 128, 16, 32, 32, 66 | prevs=_PREVS) 67 | _PREVS = add_inception(NN, '3b', 28, 256, 128, 128, 192, 32, 96, 64, 68 | prevs=_PREVS) 69 | 70 | NN.add('pool3', PoolingLayer(480, 14, 3, strd=2), prevs=_PREVS) 71 | _PREVS = ('pool3',) 72 | 73 | # Inception 4. 74 | _PREVS = add_inception(NN, '4a', 14, 480, 192, 96, 208, 16, 48, 64, 75 | prevs=_PREVS) 76 | _PREVS = add_inception(NN, '4b', 14, 512, 160, 112, 224, 24, 64, 64, 77 | prevs=_PREVS) 78 | _PREVS = add_inception(NN, '4c', 14, 512, 128, 128, 256, 24, 64, 64, 79 | prevs=_PREVS) 80 | _PREVS = add_inception(NN, '4d', 14, 512, 112, 144, 288, 32, 64, 64, 81 | prevs=_PREVS) 82 | _PREVS = add_inception(NN, '4e', 14, 528, 256, 160, 320, 32, 128, 128, 83 | prevs=_PREVS) 84 | 85 | NN.add('pool4', PoolingLayer(832, 7, 3, strd=2), prevs=_PREVS) 86 | _PREVS = ('pool4',) 87 | 88 | # Inception 5. 89 | _PREVS = add_inception(NN, '5a', 7, 832, 256, 160, 320, 32, 128, 128, 90 | prevs=_PREVS) 91 | _PREVS = add_inception(NN, '5b', 7, 832, 384, 192, 384, 48, 128, 128, 92 | prevs=_PREVS) 93 | 94 | NN.add('pool5', PoolingLayer(1024, 1, 7), prevs=_PREVS) 95 | 96 | NN.add('fc', FCLayer(1024, 1000)) 97 | 98 | -------------------------------------------------------------------------------- /nn_dataflow/nns/lstm_gnmt.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, EltwiseLayer 19 | 20 | from nn_dataflow.nns import add_lstm_cell 21 | 22 | ''' 23 | LSTM from GNMT. 24 | 25 | Sutskever, Vinyals, Le, Google, NIPS 2014 26 | ''' 27 | 28 | NN = Network('GNMT') 29 | 30 | NN.set_input_layer(InputLayer(1000, 1)) 31 | 32 | NL = 4 33 | 34 | # Word embedding is a simple lookup. 35 | # Exclude or ignore embedding processing. 36 | WE = NN.INPUT_LAYER_KEY 37 | 38 | # layered LSTM. 39 | X = WE 40 | for l in range(NL): 41 | cell = 'cell_l{}'.format(l) 42 | C, H = add_lstm_cell(NN, cell, 1000, X) 43 | X = H 44 | 45 | # log(p), softmax. 46 | NN.add('Wd', EltwiseLayer(1000, 1, 1), prevs=(X,)) 47 | 48 | -------------------------------------------------------------------------------- /nn_dataflow/nns/lstm_phoneme.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, FCLayer 19 | 20 | from nn_dataflow.nns import add_lstm_cell 21 | 22 | ''' 23 | LSTM for phoneme classification. 24 | 25 | Graves and Schmidhuber, 2005 26 | ''' 27 | 28 | NN = Network('PHONEME') 29 | 30 | NN.set_input_layer(InputLayer(26, 1)) 31 | 32 | # Input. 33 | NN.add('We', FCLayer(26, 140), prevs=(NN.INPUT_LAYER_KEY,)) 34 | 35 | # LSTM. 36 | C, H = add_lstm_cell(NN, 'cell', 140, 'We') 37 | 38 | # Output. 39 | NN.add('Wd', FCLayer(140, 61), prevs=(H,)) 40 | 41 | -------------------------------------------------------------------------------- /nn_dataflow/nns/lstm_showtell.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, EltwiseLayer 19 | 20 | from nn_dataflow.nns import add_lstm_cell 21 | 22 | ''' 23 | LSTM from Show and Tell. 24 | 25 | Vinyals et al., Google, CVPR 2015 26 | ''' 27 | 28 | NN = Network('ShowTell') 29 | 30 | NN.set_input_layer(InputLayer(512, 1)) 31 | 32 | # Word embedding is a simple lookup. 33 | # Exclude or ignore embedding processing. 34 | WE = NN.INPUT_LAYER_KEY 35 | 36 | # LSTM. 37 | C, H = add_lstm_cell(NN, 'cell', 512, WE) 38 | 39 | # log(p), softmax. 40 | NN.add('Wd', EltwiseLayer(512, 1, 1), prevs=(H,)) 41 | 42 | -------------------------------------------------------------------------------- /nn_dataflow/nns/mlp_l.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, FCLayer 19 | 20 | ''' 21 | MLP-L 22 | 23 | PRIME, 2016 24 | ''' 25 | 26 | NN = Network('MLP-L') 27 | 28 | NN.set_input_layer(InputLayer(784, 1)) 29 | 30 | NN.add('fc1', FCLayer(784, 1500)) 31 | NN.add('fc2', FCLayer(1500, 1000)) 32 | NN.add('fc3', FCLayer(1000, 500)) 33 | NN.add('fc4', FCLayer(500, 10)) 34 | 35 | -------------------------------------------------------------------------------- /nn_dataflow/nns/mlp_m.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, FCLayer 19 | 20 | ''' 21 | MLP-M 22 | 23 | PRIME, 2016 24 | ''' 25 | 26 | NN = Network('MLP-M') 27 | 28 | NN.set_input_layer(InputLayer(784, 1)) 29 | 30 | NN.add('fc1', FCLayer(784, 1000)) 31 | NN.add('fc2', FCLayer(1000, 500)) 32 | NN.add('fc3', FCLayer(500, 250)) 33 | NN.add('fc4', FCLayer(250, 10)) 34 | 35 | -------------------------------------------------------------------------------- /nn_dataflow/nns/mlp_s.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, FCLayer 19 | 20 | ''' 21 | MLP-S 22 | 23 | PRIME, 2016 24 | ''' 25 | 26 | NN = Network('MLP-S') 27 | 28 | NN.set_input_layer(InputLayer(784, 1)) 29 | 30 | NN.add('fc1', FCLayer(784, 500)) 31 | NN.add('fc2', FCLayer(500, 250)) 32 | NN.add('fc3', FCLayer(250, 10)) 33 | 34 | -------------------------------------------------------------------------------- /nn_dataflow/nns/resnet152.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \ 19 | PoolingLayer, EltwiseLayer 20 | 21 | ''' 22 | ResNet-152 23 | 24 | He, Zhang, Ren, and Sun, 2015 25 | ''' 26 | 27 | NN = Network('ResNet') 28 | 29 | NN.set_input_layer(InputLayer(3, 224)) 30 | 31 | NN.add('conv1', ConvLayer(3, 64, 112, 7, 2)) 32 | NN.add('pool1', PoolingLayer(64, 56, 3, 2)) 33 | 34 | RES_PREV = 'pool1' 35 | 36 | for i in range(3): 37 | NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1)) 38 | NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3)) 39 | NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1)) 40 | 41 | # With residual shortcut. 42 | if i == 0: 43 | NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV,)) 44 | RES_PREV = 'conv2_br' 45 | NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2), 46 | prevs=(RES_PREV, 'conv2_{}_c'.format(i))) 47 | RES_PREV = 'conv2_{}_res'.format(i) 48 | 49 | for i in range(8): 50 | NN.add('conv3_{}_a'.format(i), 51 | ConvLayer(256, 128, 28, 1, 2) if i == 0 52 | else ConvLayer(512, 128, 28, 1)) 53 | NN.add('conv3_{}_b'.format(i), ConvLayer(128, 128, 28, 3)) 54 | NN.add('conv3_{}_c'.format(i), ConvLayer(128, 512, 28, 1)) 55 | 56 | # With residual shortcut. 57 | if i == 0: 58 | NN.add('conv3_br', ConvLayer(256, 512, 28, 1, 2), prevs=(RES_PREV,)) 59 | RES_PREV = 'conv3_br' 60 | NN.add('conv3_{}_res'.format(i), EltwiseLayer(512, 28, 2), 61 | prevs=(RES_PREV, 'conv3_{}_c'.format(i))) 62 | RES_PREV = 'conv3_{}_res'.format(i) 63 | 64 | for i in range(36): 65 | NN.add('conv4_{}_a'.format(i), 66 | ConvLayer(512, 256, 14, 1, 2) if i == 0 67 | else ConvLayer(1024, 256, 14, 1)) 68 | NN.add('conv4_{}_b'.format(i), ConvLayer(256, 256, 14, 3)) 69 | NN.add('conv4_{}_c'.format(i), ConvLayer(256, 1024, 14, 1)) 70 | 71 | # With residual shortcut. 72 | if i == 0: 73 | NN.add('conv4_br', ConvLayer(512, 1024, 14, 1, 2), prevs=(RES_PREV,)) 74 | RES_PREV = 'conv4_br' 75 | NN.add('conv4_{}_res'.format(i), EltwiseLayer(1024, 14, 2), 76 | prevs=(RES_PREV, 'conv4_{}_c'.format(i))) 77 | RES_PREV = 'conv4_{}_res'.format(i) 78 | 79 | for i in range(3): 80 | NN.add('conv5_{}_a'.format(i), 81 | ConvLayer(1024, 512, 7, 1, 2) if i == 0 82 | else ConvLayer(2048, 512, 7, 1)) 83 | NN.add('conv5_{}_b'.format(i), ConvLayer(512, 512, 7, 3)) 84 | NN.add('conv5_{}_c'.format(i), ConvLayer(512, 2048, 7, 1)) 85 | 86 | # With residual shortcut. 87 | if i == 0: 88 | NN.add('conv5_br', ConvLayer(1024, 2048, 7, 1, 2), prevs=(RES_PREV,)) 89 | RES_PREV = 'conv5_br' 90 | NN.add('conv5_{}_res'.format(i), EltwiseLayer(2048, 7, 2), 91 | prevs=(RES_PREV, 'conv5_{}_c'.format(i))) 92 | RES_PREV = 'conv5_{}_res'.format(i) 93 | 94 | NN.add('pool5', PoolingLayer(2048, 1, 7)) 95 | 96 | NN.add('fc', FCLayer(2048, 1000)) 97 | 98 | -------------------------------------------------------------------------------- /nn_dataflow/nns/resnet50.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \ 19 | PoolingLayer, EltwiseLayer 20 | 21 | ''' 22 | ResNet-50 23 | 24 | He, Zhang, Ren, and Sun, 2015 25 | ''' 26 | 27 | NN = Network('ResNet') 28 | 29 | NN.set_input_layer(InputLayer(3, 224)) 30 | 31 | NN.add('conv1', ConvLayer(3, 64, 112, 7, 2)) 32 | NN.add('pool1', PoolingLayer(64, 56, 3, 2)) 33 | 34 | RES_PREV = 'pool1' 35 | 36 | for i in range(3): 37 | NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1)) 38 | NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3)) 39 | NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1)) 40 | 41 | # With residual shortcut. 42 | if i == 0: 43 | NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV,)) 44 | RES_PREV = 'conv2_br' 45 | NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2), 46 | prevs=(RES_PREV, 'conv2_{}_c'.format(i))) 47 | RES_PREV = 'conv2_{}_res'.format(i) 48 | 49 | for i in range(4): 50 | NN.add('conv3_{}_a'.format(i), 51 | ConvLayer(256, 128, 28, 1, 2) if i == 0 52 | else ConvLayer(512, 128, 28, 1)) 53 | NN.add('conv3_{}_b'.format(i), ConvLayer(128, 128, 28, 3)) 54 | NN.add('conv3_{}_c'.format(i), ConvLayer(128, 512, 28, 1)) 55 | 56 | # With residual shortcut. 57 | if i == 0: 58 | NN.add('conv3_br', ConvLayer(256, 512, 28, 1, 2), prevs=(RES_PREV,)) 59 | RES_PREV = 'conv3_br' 60 | NN.add('conv3_{}_res'.format(i), EltwiseLayer(512, 28, 2), 61 | prevs=(RES_PREV, 'conv3_{}_c'.format(i))) 62 | RES_PREV = 'conv3_{}_res'.format(i) 63 | 64 | for i in range(6): 65 | NN.add('conv4_{}_a'.format(i), 66 | ConvLayer(512, 256, 14, 1, 2) if i == 0 67 | else ConvLayer(1024, 256, 14, 1)) 68 | NN.add('conv4_{}_b'.format(i), ConvLayer(256, 256, 14, 3)) 69 | NN.add('conv4_{}_c'.format(i), ConvLayer(256, 1024, 14, 1)) 70 | 71 | # With residual shortcut. 72 | if i == 0: 73 | NN.add('conv4_br', ConvLayer(512, 1024, 14, 1, 2), prevs=(RES_PREV,)) 74 | RES_PREV = 'conv4_br' 75 | NN.add('conv4_{}_res'.format(i), EltwiseLayer(1024, 14, 2), 76 | prevs=(RES_PREV, 'conv4_{}_c'.format(i))) 77 | RES_PREV = 'conv4_{}_res'.format(i) 78 | 79 | for i in range(3): 80 | NN.add('conv5_{}_a'.format(i), 81 | ConvLayer(1024, 512, 7, 1, 2) if i == 0 82 | else ConvLayer(2048, 512, 7, 1)) 83 | NN.add('conv5_{}_b'.format(i), ConvLayer(512, 512, 7, 3)) 84 | NN.add('conv5_{}_c'.format(i), ConvLayer(512, 2048, 7, 1)) 85 | 86 | # With residual shortcut. 87 | if i == 0: 88 | NN.add('conv5_br', ConvLayer(1024, 2048, 7, 1, 2), prevs=(RES_PREV,)) 89 | RES_PREV = 'conv5_br' 90 | NN.add('conv5_{}_res'.format(i), EltwiseLayer(2048, 7, 2), 91 | prevs=(RES_PREV, 'conv5_{}_c'.format(i))) 92 | RES_PREV = 'conv5_{}_res'.format(i) 93 | 94 | NN.add('pool5', PoolingLayer(2048, 1, 7)) 95 | 96 | NN.add('fc', FCLayer(2048, 1000)) 97 | -------------------------------------------------------------------------------- /nn_dataflow/nns/vgg19_net.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer 19 | 20 | ''' 21 | VGGNet-19 22 | 23 | Simonyan and Zisserman, 2014 24 | ''' 25 | 26 | NN = Network('VGG19') 27 | 28 | NN.set_input_layer(InputLayer(3, 224)) 29 | 30 | NN.add('conv1', ConvLayer(3, 64, 224, 3)) 31 | NN.add('conv2', ConvLayer(64, 64, 224, 3)) 32 | NN.add('pool1', PoolingLayer(64, 112, 2)) 33 | 34 | NN.add('conv3', ConvLayer(64, 128, 112, 3)) 35 | NN.add('conv4', ConvLayer(128, 128, 112, 3)) 36 | NN.add('pool2', PoolingLayer(128, 56, 2)) 37 | 38 | NN.add('conv5', ConvLayer(128, 256, 56, 3)) 39 | NN.add('conv6', ConvLayer(256, 256, 56, 3)) 40 | NN.add('conv7', ConvLayer(256, 256, 56, 3)) 41 | NN.add('conv8', ConvLayer(256, 256, 56, 3)) 42 | NN.add('pool3', PoolingLayer(256, 28, 2)) 43 | 44 | NN.add('conv9', ConvLayer(256, 512, 28, 3)) 45 | NN.add('conv10', ConvLayer(512, 512, 28, 3)) 46 | NN.add('conv11', ConvLayer(512, 512, 28, 3)) 47 | NN.add('conv12', ConvLayer(512, 512, 28, 3)) 48 | NN.add('pool4', PoolingLayer(512, 14, 2)) 49 | 50 | NN.add('conv13', ConvLayer(512, 512, 14, 3)) 51 | NN.add('conv14', ConvLayer(512, 512, 14, 3)) 52 | NN.add('conv15', ConvLayer(512, 512, 14, 3)) 53 | NN.add('conv16', ConvLayer(512, 512, 14, 3)) 54 | NN.add('pool5', PoolingLayer(512, 7, 2)) 55 | 56 | NN.add('fc1', FCLayer(512, 4096, 7)) 57 | NN.add('fc2', FCLayer(4096, 4096)) 58 | NN.add('fc3', FCLayer(4096, 1000)) 59 | 60 | -------------------------------------------------------------------------------- /nn_dataflow/nns/vgg_net.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer 19 | 20 | ''' 21 | VGGNet-16 22 | 23 | Simonyan and Zisserman, 2014 24 | ''' 25 | 26 | NN = Network('VGG') 27 | 28 | NN.set_input_layer(InputLayer(3, 224)) 29 | 30 | NN.add('conv1', ConvLayer(3, 64, 224, 3)) 31 | NN.add('conv2', ConvLayer(64, 64, 224, 3)) 32 | NN.add('pool1', PoolingLayer(64, 112, 2)) 33 | 34 | NN.add('conv3', ConvLayer(64, 128, 112, 3)) 35 | NN.add('conv4', ConvLayer(128, 128, 112, 3)) 36 | NN.add('pool2', PoolingLayer(128, 56, 2)) 37 | 38 | NN.add('conv5', ConvLayer(128, 256, 56, 3)) 39 | NN.add('conv6', ConvLayer(256, 256, 56, 3)) 40 | NN.add('conv7', ConvLayer(256, 256, 56, 3)) 41 | NN.add('pool3', PoolingLayer(256, 28, 2)) 42 | 43 | NN.add('conv8', ConvLayer(256, 512, 28, 3)) 44 | NN.add('conv9', ConvLayer(512, 512, 28, 3)) 45 | NN.add('conv10', ConvLayer(512, 512, 28, 3)) 46 | NN.add('pool4', PoolingLayer(512, 14, 2)) 47 | 48 | NN.add('conv11', ConvLayer(512, 512, 14, 3)) 49 | NN.add('conv12', ConvLayer(512, 512, 14, 3)) 50 | NN.add('conv13', ConvLayer(512, 512, 14, 3)) 51 | NN.add('pool5', PoolingLayer(512, 7, 2)) 52 | 53 | NN.add('fc1', FCLayer(512, 4096, 7)) 54 | NN.add('fc2', FCLayer(4096, 4096)) 55 | NN.add('fc3', FCLayer(4096, 1000)) 56 | 57 | -------------------------------------------------------------------------------- /nn_dataflow/nns/zfnet.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import Network 18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer 19 | 20 | ''' 21 | ZFNet 22 | 23 | Zeiler and Fergus, 2013 24 | ''' 25 | 26 | NN = Network('ZFNet') 27 | 28 | NN.set_input_layer(InputLayer(3, 224)) 29 | 30 | NN.add('conv1', ConvLayer(3, 96, 110, 7, 2)) 31 | NN.add('pool1', PoolingLayer(96, 55, 3, strd=2)) 32 | # Norm layer is ignored. 33 | NN.add('conv2', ConvLayer(96, 256, 26, 5, 2)) 34 | NN.add('pool2', PoolingLayer(256, 13, 3, strd=2)) 35 | # Norm layer is ignored. 36 | NN.add('conv3', ConvLayer(256, 512, 13, 3)) 37 | NN.add('conv4', ConvLayer(512, 1024, 13, 3)) 38 | NN.add('conv5', ConvLayer(1024, 512, 13, 3)) 39 | NN.add('pool3', PoolingLayer(512, 6, 3, strd=2)) 40 | NN.add('fc1', FCLayer(512, 4096, 6)) 41 | NN.add('fc2', FCLayer(4096, 4096)) 42 | NN.add('fc3', FCLayer(4096, 1000)) 43 | 44 | -------------------------------------------------------------------------------- /nn_dataflow/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /nn_dataflow/tests/dataflow_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /nn_dataflow/tests/loop_blocking_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from .test_loop_blocking_fixture import TestLoopBlockingFixture 18 | 19 | -------------------------------------------------------------------------------- /nn_dataflow/tests/loop_blocking_test/test_loop_blocking.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import loop_blocking 18 | from nn_dataflow.core import DataCategoryEnum as de 19 | 20 | from . import TestLoopBlockingFixture 21 | 22 | class TestLoopBlocking(TestLoopBlockingFixture): 23 | ''' Tests for loop_blocking module. ''' 24 | 25 | def test_skip_not_reg(self): 26 | ''' skip non-regularized. ''' 27 | 28 | for sch in self._gen_loopblocking_all(): 29 | 30 | skip = loop_blocking.skip_conv(*sch) 31 | reg_sch = self._regularized_scheme(*sch) 32 | 33 | if not skip: 34 | self.assertEqual(reg_sch, sch, 35 | 'test_skip_not_reg: non-skipped {} should be ' 36 | 'regularized to {}' 37 | .format(sch, reg_sch)) 38 | continue 39 | 40 | lbs = self._lbs(*sch, rsrckey='LG') 41 | reg_lbs = self._lbs(*reg_sch, rsrckey='LG') 42 | 43 | self.assertFalse(loop_blocking.skip_conv(*reg_sch), 44 | 'test_skip_not_reg: regularized {} is skipped.' 45 | .format(reg_sch)) 46 | self.assertAlmostEqual(lbs.get_access_cost(self.cost), 47 | reg_lbs.get_access_cost(self.cost), 48 | msg=('test_skip_not_reg: cost mismatch. ' 49 | 'orig {}, reg {}.' 50 | .format(sch, reg_sch))) 51 | self.assertListEqual(lbs.get_access(), reg_lbs.get_access(), 52 | msg=('test_skip_not_reg: access mismatch. ' 53 | 'orig {}, reg {}.' 54 | .format(sch, reg_sch))) 55 | size = self._get_lbs_size(lbs) 56 | reg_size = self._get_lbs_size(reg_lbs) 57 | self.assertTrue(all(all(ss1 >= ss2 for ss1, ss2 in zip(s1, s2)) 58 | for s1, s2 in zip(size, reg_size)), 59 | 'test_skip_not_reg: reg size is larger than eqv.\n' 60 | 'org {} has size {}\nreg {} has size {}' 61 | .format(sch, size, reg_sch, reg_size)) 62 | 63 | def test_skip_ratio(self): 64 | ''' skip ratio. ''' 65 | 66 | cnts = [0, 0] 67 | 68 | for bl_ts, bl_ords in self._gen_loopblocking_all(): 69 | 70 | skip = loop_blocking.skip_conv(bl_ts, bl_ords) 71 | cnts[skip] += 1 72 | 73 | skip_ratio = 1. * cnts[True] / sum(cnts) 74 | self.assertGreater(skip_ratio, 0.95, 75 | 'test_skip_ratio: skip ratio {} too low.' 76 | .format(skip_ratio)) 77 | 78 | def test_gen_loopblocking_all(self): 79 | ''' gen_loopblocking cover all. ''' 80 | 81 | exp_cnt = 0 82 | for bl_ts, bl_ords in self._gen_loopblocking_all(): 83 | exp_cnt += 1 if not loop_blocking.skip_conv(bl_ts, bl_ords) else 0 84 | 85 | cnt = 0 86 | for _ in self._gen_loopblocking(rsrckey='LG'): 87 | cnt += 1 88 | 89 | self.assertEqual(cnt, exp_cnt) 90 | 91 | def test_gen_loopblocking_mp(self): 92 | ''' gen_loopblocking multiprocessing. ''' 93 | 94 | cnt1 = 0 95 | for _ in self._gen_loopblocking(rsrckey='LG'): 96 | cnt1 += 1 97 | 98 | cnt8 = 0 99 | for _ in self._gen_loopblocking(rsrckey='LG', optkey='MP'): 100 | cnt8 += 1 101 | 102 | self.assertEqual(cnt1, cnt8) 103 | 104 | def test_gen_loopblocking_no_eqv(self): 105 | ''' gen_loopblocking no equivalent. ''' 106 | 107 | acc_dict = {} 108 | 109 | for lbs in self._gen_loopblocking(rsrckey='LG', skip_invalid=True): 110 | 111 | # Make the keys hashable (list -> tuple). 112 | size = tuple(tuple(ss for ss in s) for s in self._get_lbs_size(lbs)) 113 | access = tuple(tuple(int(aa) for aa in a) for a in lbs.access) 114 | keys = (size, access) 115 | 116 | self.assertNotIn(keys, acc_dict, 117 | 'test_gen_loopblocking_no_eqv: found equivalents. ' 118 | 'keys: access {} size {}' 119 | .format(access, size)) 120 | acc_dict[keys] = lbs 121 | 122 | def test_gen_loopblocking_ntops(self): 123 | ''' gen_loopblocking ntops. ''' 124 | 125 | tops = list(self._gen_loopblocking(rsrckey='LG', optkey='NTOPS')) 126 | 127 | cost_prev = -float('inf') 128 | 129 | for lbs in self._gen_loopblocking(rsrckey='LG', skip_invalid=True): 130 | 131 | cost_curr = lbs.get_access_cost(self.cost) 132 | self.assertLessEqual(cost_prev, cost_curr) 133 | cost_prev = cost_curr 134 | 135 | if tops: 136 | top_lbs = tops.pop(0) 137 | self.assertAlmostEqual(cost_curr, 138 | top_lbs.get_access_cost(self.cost)) 139 | 140 | def test_gen_loopblocking_byp_sol(self): 141 | ''' gen_loopblocking using bypass solvers. ''' 142 | 143 | cnt = 0 144 | 145 | for lbs in self._gen_loopblocking(optkey='BYPSOL'): 146 | 147 | self.assertTrue(lbs.is_valid()) 148 | 149 | cnt += 1 150 | 151 | self.assertLessEqual(cnt, 8) 152 | 153 | def test_gen_loopblocking_cstr(self): 154 | ''' gen_loopblocking with constraint. ''' 155 | 156 | for lbs in self._gen_loopblocking(rsrckey='LG', cstr=self.cstr): 157 | 158 | self.assertTrue(self.cstr.is_valid_top_bl(lbs.bl_ts[0], 159 | lbs.bl_ords[0])) 160 | 161 | def test_gen_loopblocking_cstr_sol(self): 162 | ''' gen_loopblocking using bypass solvers with constraint. ''' 163 | 164 | cnt1 = len(list(self._gen_loopblocking(optkey='BYPSOL'))) 165 | 166 | lbs_list = list(self._gen_loopblocking(optkey='BYPSOL', cstr=self.cstr)) 167 | self.assertTrue(all( 168 | self.cstr.is_valid_top_bl(lbs.bl_ts[0], lbs.bl_ords[0]) 169 | for lbs in lbs_list)) 170 | cnt2 = len(lbs_list) 171 | 172 | self.assertLessEqual(cnt2, cnt1) 173 | 174 | def _gen_loopblocking(self, wlkey='BASE', rsrckey='BASE', 175 | optkey='BASE', cstr=None, skip_invalid=False): 176 | ''' gen_loopblocking trampoline. ''' 177 | if cstr is None: 178 | cstr = self.none_cstr 179 | for lbs in loop_blocking.gen_loopblocking( 180 | self.nld[wlkey], self.resource[rsrckey], self.part, cstr, 181 | self.cost, self.options[optkey]): 182 | if not skip_invalid or lbs.is_valid(): 183 | yield lbs 184 | 185 | @staticmethod 186 | def _get_lbs_size(lbs): 187 | ''' Get the size info. ''' 188 | assert lbs.is_valid() 189 | return [[lbs.data_size(bl, dce) for dce in range(de.NUM)] 190 | for bl in range(lbs.BL.NUM)] 191 | 192 | -------------------------------------------------------------------------------- /nn_dataflow/tests/loop_blocking_test/test_loop_blocking_solver.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import DataCategoryEnum as de 18 | from nn_dataflow.core import loop_blocking_solver 19 | from nn_dataflow.core import MemHierEnum as me 20 | from nn_dataflow.core import Option 21 | 22 | from . import TestLoopBlockingFixture 23 | 24 | class TestLoopBlockingSolver(TestLoopBlockingFixture): 25 | ''' Tests for loop_blocking_solver module. ''' 26 | 27 | def setUp(self): 28 | 29 | super(TestLoopBlockingSolver, self).setUp() 30 | 31 | # Bypass solver for each reside data category. 32 | self.optkeys_bypsol = ['BYPSOL_{}'.format(dce) for dce in range(de.NUM)] 33 | 34 | for reside_dce in range(de.NUM): 35 | opt_dict = self.options['BYPSOL']._asdict() 36 | byp = [True] * de.NUM 37 | byp[reside_dce] = False 38 | opt_dict['sw_gbuf_bypass'] = tuple(byp) 39 | 40 | self.options[self.optkeys_bypsol[reside_dce]] = Option(**opt_dict) 41 | 42 | def test_reside_sol(self): 43 | ''' Data reside solution. ''' 44 | 45 | for reside_dce in range(de.NUM): 46 | 47 | optkey = self.optkeys_bypsol[reside_dce] 48 | 49 | for bl_ts, bl_ords \ 50 | in loop_blocking_solver.gen_loopblocking_gbuf_reside( 51 | self.nld['BASE'], self.resource['BASE'], 52 | self.options[optkey]): 53 | 54 | lbs = self._lbs(bl_ts, bl_ords, optkey=optkey) 55 | 56 | self.assertTrue(lbs.stored_in_gbuf[reside_dce]) 57 | self.assertFalse(any(lbs.stored_in_gbuf[dce] 58 | for dce in range(de.NUM) 59 | if dce != reside_dce)) 60 | 61 | def test_reside_sol_opt(self, rsrckey='BASE', wlkey='BASE'): 62 | ''' Data reside solution optimal. ''' 63 | 64 | def _cost(lbs): 65 | access = lbs.get_access() 66 | return [int(sum(access[me.DRAM])), int(sum(access[me.GBUF]))] 67 | 68 | min_sch_dict = {} 69 | sol_sch_dict = {} 70 | 71 | # Among all schemes that bypass all non-reside data categories. 72 | for bl_ts, bl_ords in self._gen_loopblocking_all(wlkey=wlkey): 73 | 74 | lbs = self._lbs(bl_ts, bl_ords, wlkey=wlkey, rsrckey=rsrckey, 75 | optkey='BYP') 76 | if not lbs.is_valid(): 77 | continue 78 | 79 | all_reside_dce = [dce for dce in range(de.NUM) 80 | if lbs.stored_in_gbuf[dce]] 81 | # Only look at the cases with one or none reside data category. 82 | if not all_reside_dce: 83 | min_sch = min_sch_dict.get(None, None) 84 | if not min_sch or _cost(lbs) < min_sch: 85 | min_sch_dict[None] = _cost(lbs) 86 | elif len(all_reside_dce) == 1: 87 | dce, = all_reside_dce 88 | min_sch = min_sch_dict.get(dce, None) 89 | if not min_sch or _cost(lbs) < min_sch: 90 | min_sch_dict[dce] = _cost(lbs) 91 | 92 | # Solve each reside data category. 93 | for reside_dce in range(de.NUM): 94 | 95 | optkey = self.optkeys_bypsol[reside_dce] 96 | 97 | for bl_ts, bl_ords \ 98 | in loop_blocking_solver.gen_loopblocking_gbuf_reside( 99 | self.nld[wlkey], self.resource[rsrckey], 100 | self.options[optkey]): 101 | 102 | lbs = self._lbs(bl_ts, bl_ords, wlkey=wlkey, rsrckey=rsrckey, 103 | optkey='BYP') 104 | self.assertTrue(lbs.is_valid()) 105 | self.assertFalse(any(lbs.stored_in_gbuf[dce] 106 | for dce in range(de.NUM) 107 | if dce != reside_dce)) 108 | 109 | true_reside_dce = reside_dce \ 110 | if lbs.stored_in_gbuf[reside_dce] else None 111 | 112 | sol_sch = sol_sch_dict.get(true_reside_dce, None) 113 | if not sol_sch or _cost(lbs) < sol_sch: 114 | sol_sch_dict[true_reside_dce] = _cost(lbs) 115 | 116 | self.assertTrue(sol_sch_dict.items() <= min_sch_dict.items(), 117 | 'test_reside_sol_opt: wlkey {} rsrckey {}: ' 118 | 'solutions do not cover all optimal ones. ' 119 | 'sol {} opt {}.' 120 | .format(wlkey, rsrckey, sol_sch_dict, min_sch_dict)) 121 | 122 | self.assertListEqual( 123 | min(sol_sch_dict.values()), min(min_sch_dict.values()), 124 | 'test_reside_sol_opt: wlkey {} rsrckey {}: ' 125 | 'solutions do not cover the optimal one. sol {} opt {}.' 126 | .format(wlkey, rsrckey, sol_sch_dict, min_sch_dict)) 127 | 128 | def test_reside_sol_opt_resource(self): 129 | ''' Data reside solution optimal with different resources. ''' 130 | 131 | for rsrckey in ['LG', 'SM']: 132 | 133 | self.test_reside_sol_opt(rsrckey=rsrckey) 134 | 135 | def test_reside_sol_opt_pool(self): 136 | ''' Data reside solution optimal with PoolingLayer. ''' 137 | 138 | with self.assertRaisesRegex(ValueError, 'loop_blocking_solver: .*'): 139 | self.test_reside_sol_opt(wlkey='POOL') 140 | 141 | def test_reside_sol_opt_zero(self): 142 | ''' Data reside solution optimal with zero size. ''' 143 | 144 | for wlkey in ['ZERO_FIL', 'ZERO_IFM']: 145 | 146 | self.test_reside_sol_opt(wlkey=wlkey) 147 | 148 | def test_reside_sol_cnt(self): 149 | ''' Data reside solution count. ''' 150 | 151 | all_set = set(loop_blocking_solver.gen_loopblocking_gbuf_reside( 152 | self.nld['BASE'], self.resource['BASE'], self.options['BYPSOL'])) 153 | 154 | union_set = set() 155 | reside_set_list = [] 156 | 157 | for reside_dce in range(de.NUM): 158 | 159 | optkey = self.optkeys_bypsol[reside_dce] 160 | 161 | s = set(loop_blocking_solver.gen_loopblocking_gbuf_reside( 162 | self.nld['BASE'], self.resource['BASE'], self.options[optkey])) 163 | 164 | reside_set_list.append(s) 165 | union_set |= s 166 | 167 | self.assertSetEqual(all_set, union_set) 168 | self.assertEqual(len(union_set), sum(len(s) for s in reside_set_list)) 169 | 170 | -------------------------------------------------------------------------------- /nn_dataflow/tests/map_strategy_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from .test_map_strategy_fixture import TestMapStrategyFixture 18 | 19 | -------------------------------------------------------------------------------- /nn_dataflow/tests/map_strategy_test/test_map_strategy.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from nn_dataflow.core import MapStrategy 18 | 19 | from . import TestMapStrategyFixture 20 | 21 | class TestMapStrategy(TestMapStrategyFixture): 22 | ''' Tests for basic MapStrategy class. ''' 23 | 24 | def setUp(self): 25 | 26 | super(TestMapStrategy, self).setUp() 27 | 28 | self.layer = self.convlayers['conv1'] 29 | self.dim_array = self.resource['BASE'].dim_array 30 | 31 | def test_args(self): 32 | ''' Constructor arguments. ''' 33 | ms = MapStrategy(self.layer, 4, 1, self.dim_array) 34 | 35 | self.assertEqual(ms.layer, self.layer) 36 | self.assertEqual(ms.batch_size, 4) 37 | self.assertEqual(ms.dim_array, self.dim_array) 38 | 39 | def test_inv_args(self): 40 | ''' Constructor arguments invalid. ''' 41 | with self.assertRaisesRegex(TypeError, 'MapStrategy: .*layer.*'): 42 | _ = MapStrategy(None, 4, 1, self.dim_array) 43 | 44 | with self.assertRaisesRegex(ValueError, 'MapStrategy: .*occupancy.*'): 45 | _ = MapStrategy(self.layer, 4, -.1, self.dim_array) 46 | with self.assertRaisesRegex(ValueError, 'MapStrategy: .*occupancy.*'): 47 | _ = MapStrategy(self.layer, 4, 1.1, self.dim_array) 48 | 49 | with self.assertRaisesRegex(TypeError, 'MapStrategy: .*dim_array.*'): 50 | _ = MapStrategy(self.layer, 4, 1, None) 51 | 52 | def test_utilization(self): 53 | ''' Accessor utilization. ''' 54 | ms = MapStrategy(self.layer, 4, 1, self.dim_array) 55 | 56 | with self.assertRaisesRegex(NotImplementedError, 'MapStrategy: .*'): 57 | _ = ms.utilization() 58 | 59 | def test_gen_nested_loop_desc(self): 60 | ''' Generator gen_nested_loop_desc. ''' 61 | ms = MapStrategy(self.layer, 4, 1, self.dim_array) 62 | 63 | with self.assertRaisesRegex(NotImplementedError, 'MapStrategy: .*'): 64 | _ = ms.gen_nested_loop_desc() 65 | 66 | -------------------------------------------------------------------------------- /nn_dataflow/tests/map_strategy_test/test_map_strategy_fixture.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | from collections import OrderedDict 19 | 20 | from nn_dataflow.core import ConvLayer, FCLayer, LocalRegionLayer, PoolingLayer 21 | from nn_dataflow.core import NodeRegion 22 | from nn_dataflow.core import PhyDim2 23 | from nn_dataflow.core import Resource 24 | 25 | class TestMapStrategyFixture(unittest.TestCase): 26 | ''' Base fixture class for MapStrategy tests. ''' 27 | 28 | def setUp(self): 29 | 30 | # AlexNet. 31 | self.convlayers = OrderedDict() 32 | self.convlayers['conv1'] = ConvLayer(3, 96, 55, 11, 4) 33 | self.convlayers['conv2'] = ConvLayer(48, 256, 27, 5) 34 | self.convlayers['conv3'] = ConvLayer(256, 384, 13, 3) 35 | self.convlayers['conv4'] = ConvLayer(192, 384, 13, 3) 36 | self.convlayers['conv5'] = ConvLayer(192, 256, 13, 3) 37 | self.fclayers = {} 38 | self.fclayers['fc1'] = FCLayer(256, 4096, 6) 39 | self.fclayers['fc2'] = FCLayer(4096, 4096) 40 | self.fclayers['fc3'] = FCLayer(4096, 1000) 41 | 42 | # LocalRegionLayer. 43 | self.lrlayers = {} 44 | self.lrlayers['pool1'] = PoolingLayer(64, 7, 2) 45 | self.lrlayers['pool2'] = PoolingLayer(29, 13, 3) 46 | self.lrlayers['pool3'] = PoolingLayer(32, 7, 2, strd=3) 47 | self.lrlayers['lr1'] = LocalRegionLayer(32, 7, nreg=5, sreg=1) 48 | self.lrlayers['lr2'] = LocalRegionLayer(32, 7, nreg=5, sreg=1, strd=2) 49 | 50 | # Fake layers. 51 | self.fake_layers = {} 52 | # With irregular nifm/nofm. 53 | self.fake_layers['IRR'] = ConvLayer(255, 383, 13, 3) 54 | # With small numbers of fmaps. 55 | self.fake_layers['SM'] = ConvLayer(5, 3, 13, 3) 56 | # With large FIL height. 57 | self.fake_layers['LGFIL'] = ConvLayer(64, 64, 13, 22) 58 | 59 | # Resource. 60 | self.resource = {} 61 | proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 62 | type=NodeRegion.PROC) 63 | data_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 64 | type=NodeRegion.DRAM) 65 | # Eyeriss, ISSCC'16, JSSC'17. 66 | self.resource['BASE'] = Resource( 67 | proc_region=proc_region, dram_region=data_region, 68 | src_data_region=data_region, dst_data_region=data_region, 69 | dim_array=PhyDim2(12, 14), size_gbuf=108*1024, size_regf=520, 70 | array_bus_width=float('inf'), dram_bandwidth=float('inf'), 71 | no_time_mux=False) 72 | 73 | -------------------------------------------------------------------------------- /nn_dataflow/tests/nns_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /nn_dataflow/tests/nns_test/test_nns.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow.core import Network 20 | from nn_dataflow.core import InputLayer 21 | 22 | import nn_dataflow.nns as nns 23 | 24 | class TestNNs(unittest.TestCase): 25 | ''' Tests for NN definitions. ''' 26 | 27 | def test_all_networks(self): 28 | ''' Get all_networks. ''' 29 | self.assertIn('alex_net', nns.all_networks()) 30 | self.assertIn('vgg_net', nns.all_networks()) 31 | self.assertGreater(len(nns.all_networks()), 5) 32 | 33 | def test_import_network(self): 34 | ''' Get import_network. ''' 35 | for name in nns.all_networks(): 36 | network = nns.import_network(name) 37 | self.assertIsInstance(network, Network) 38 | 39 | def test_import_network_invalid(self): 40 | ''' Get import_network invalid. ''' 41 | with self.assertRaisesRegex(ImportError, 'nns: .*defined.*'): 42 | _ = nns.import_network('aaa') 43 | 44 | def test_add_lstm_cell(self): 45 | ''' Add LSTM cell. ''' 46 | net = Network('LSTM') 47 | net.set_input_layer(InputLayer(512, 1)) 48 | c, h = nns.add_lstm_cell(net, 'cell0', 512, 49 | net.INPUT_LAYER_KEY, net.INPUT_LAYER_KEY, 50 | net.INPUT_LAYER_KEY) 51 | c, h = nns.add_lstm_cell(net, 'cell1', 512, 52 | net.INPUT_LAYER_KEY, c, h) 53 | c, h = nns.add_lstm_cell(net, 'cell2', 512, 54 | net.INPUT_LAYER_KEY, c, h) 55 | num_weights = 0 56 | for layer in net: 57 | try: 58 | num_weights += net[layer].total_filter_size() 59 | except AttributeError: 60 | pass 61 | self.assertEqual(num_weights, 512 * 512 * 2 * 4 * 3) 62 | 63 | def test_add_lstm_cell_invalid_type(self): 64 | ''' Add LSTM cell with invalid type. ''' 65 | with self.assertRaisesRegex(TypeError, 'add_lstm_cell: .*network.*'): 66 | _ = nns.add_lstm_cell(InputLayer(512, 1), 'cell0', 512, 67 | None, None, None) 68 | 69 | def test_add_lstm_cell_not_in(self): 70 | ''' Add LSTM cell input not in. ''' 71 | net = Network('LSTM') 72 | net.set_input_layer(InputLayer(512, 1)) 73 | with self.assertRaisesRegex(ValueError, 'add_lstm_cell: .*in.*'): 74 | _ = nns.add_lstm_cell(net, 'cell0', 512, 75 | 'a', net.INPUT_LAYER_KEY, 76 | net.INPUT_LAYER_KEY) 77 | 78 | net = Network('LSTM') 79 | net.set_input_layer(InputLayer(512, 1)) 80 | with self.assertRaisesRegex(ValueError, 'add_lstm_cell: .*in.*'): 81 | _ = nns.add_lstm_cell(net, 'cell0', 512, 82 | net.INPUT_LAYER_KEY, 'a', 83 | net.INPUT_LAYER_KEY) 84 | 85 | net = Network('LSTM') 86 | net.set_input_layer(InputLayer(512, 1)) 87 | with self.assertRaisesRegex(ValueError, 'add_lstm_cell: .*in.*'): 88 | _ = nns.add_lstm_cell(net, 'cell0', 512, 89 | net.INPUT_LAYER_KEY, net.INPUT_LAYER_KEY, 90 | 'a') 91 | 92 | -------------------------------------------------------------------------------- /nn_dataflow/tests/partition_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from .test_partition_fixture import TestPartitionFixture 18 | 19 | -------------------------------------------------------------------------------- /nn_dataflow/tests/partition_test/test_partition_fixture.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import itertools 18 | import unittest 19 | 20 | from nn_dataflow.core import partition 21 | from nn_dataflow.core import ConvLayer, FCLayer, LocalRegionLayer, PoolingLayer 22 | from nn_dataflow.core import Option 23 | from nn_dataflow.core import PartitionScheme 24 | from nn_dataflow.core import ParallelEnum as pe 25 | from nn_dataflow.core import PhyDim2 26 | from nn_dataflow import util 27 | 28 | class TestPartitionFixture(unittest.TestCase): 29 | ''' Base fixture class for Partition tests. ''' 30 | 31 | def setUp(self): 32 | 33 | self.layers = {} 34 | self.layers['BASE'] = ConvLayer(64, 64, 28, 3) 35 | self.layers['FC'] = FCLayer(4096, 1000, 6) 36 | self.layers['POOL'] = PoolingLayer(32, 7, 3, strd=2) 37 | self.layers['LR'] = LocalRegionLayer(32, 7, nreg=5, sreg=1) 38 | # With irregular nifm/nofm. 39 | self.layers['IRR'] = ConvLayer(255, 383, 13, 3) 40 | # With small numbers of fmaps. 41 | self.layers['SM'] = ConvLayer(5, 3, 13, 3) 42 | # Super small networks. No partitioning schemes. 43 | self.layers['SSM1'] = ConvLayer(1, 1, 2, 3) 44 | self.layers['SSM2'] = FCLayer(2, 2) 45 | self.layers['SSM3'] = PoolingLayer(1, 2, 2) 46 | 47 | self.batch_size = 8 48 | 49 | self.dim_nodes = {} 50 | self.dim_nodes['BASE'] = PhyDim2(4, 4) 51 | self.dim_nodes['LG'] = PhyDim2(10, 10) 52 | self.dim_nodes['PRIME'] = PhyDim2(3, 3) 53 | 54 | self.options = {} 55 | # Irrelevant options. 56 | optdict = {'ntops': 10000} 57 | self.options['BASE'] = Option(partition_hybrid=True, 58 | partition_batch=True, 59 | partition_ifmaps=True, 60 | **optdict) 61 | self.options['NOBATP'] = Option(partition_hybrid=True, 62 | partition_batch=False, 63 | partition_ifmaps=True, 64 | **optdict) 65 | self.options['NOINPP'] = Option(partition_hybrid=True, 66 | partition_batch=True, 67 | partition_ifmaps=False, 68 | **optdict) 69 | self.options['NOHYB'] = Option(partition_hybrid=False, 70 | partition_batch=True, 71 | partition_ifmaps=False, 72 | **optdict) 73 | self.options['ACCFWD'] = Option(partition_hybrid=True, 74 | partition_batch=True, 75 | partition_ifmaps=True, 76 | hw_access_forwarding=True, 77 | **optdict) 78 | self.options['BUFSHR'] = Option(partition_hybrid=True, 79 | partition_batch=True, 80 | partition_ifmaps=True, 81 | hw_gbuf_sharing=True, 82 | **optdict) 83 | 84 | def _gen_partition(self, wlkey='BASE', dnkey='BASE', optkey='BASE', 85 | guaranteed=False): 86 | ''' Generate PartitionScheme. ''' 87 | for part in partition.gen_partition(self.layers[wlkey], 88 | self.batch_size, 89 | self.dim_nodes[dnkey], 90 | self.options[optkey], 91 | guaranteed=guaranteed): 92 | yield part 93 | 94 | def _gen_partition_full(self, wlkey='BASE', dnkey='BASE'): 95 | ''' Generate all PartitionScheme regardless of equivalence. ''' 96 | 97 | layer = self.layers[wlkey] 98 | dim_nodes = self.dim_nodes[dnkey] 99 | 100 | for ph, pw in itertools.product(util.factorize(dim_nodes.h, pe.NUM), 101 | util.factorize(dim_nodes.w, pe.NUM)): 102 | 103 | pdims = [PhyDim2(h, w) for h, w in zip(ph, pw)] 104 | 105 | # BATP. 106 | if self.batch_size % pdims[pe.BATP].size() != 0: 107 | continue 108 | 109 | # OUTP. 110 | if not util.approx_dividable(layer.nofm, pdims[pe.OUTP].size()): 111 | continue 112 | 113 | # OFMP. 114 | if not util.approx_dividable(layer.hofm, pdims[pe.OFMP].h) \ 115 | or not util.approx_dividable(layer.wofm, pdims[pe.OFMP].w): 116 | continue 117 | 118 | # INPP. 119 | if isinstance(layer, ConvLayer): 120 | if not util.approx_dividable(layer.nifm, 121 | pdims[pe.INPP].size()): 122 | continue 123 | elif isinstance(layer, LocalRegionLayer): 124 | if pdims[pe.INPP].size() > 1: 125 | continue 126 | 127 | # Fully utilize one dimension. 128 | pdims_no_ofmp = pdims[:pe.OFMP] + pdims[pe.OFMP + 1:] 129 | if any(pd.h != 1 and pd.h != dim_nodes.h 130 | and pd.w != 1 and pd.w != dim_nodes.w 131 | for pd in pdims_no_ofmp): 132 | continue 133 | 134 | for order in itertools.permutations(range(pe.NUM)): 135 | 136 | # Batch parallelism should be at the top. 137 | filtered_order = [pae for pae in order 138 | if pdims[pae].size() > 1] 139 | if pe.BATP in filtered_order and filtered_order[0] != pe.BATP: 140 | continue 141 | 142 | yield PartitionScheme(order=order, pdims=pdims) 143 | 144 | -------------------------------------------------------------------------------- /nn_dataflow/tests/pipeline_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from .test_pipeline_fixture import TestPipelineFixture 18 | 19 | -------------------------------------------------------------------------------- /nn_dataflow/tests/tool_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /nn_dataflow/tests/tool_test/test_nn_dataflow_search.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | import os 20 | import subprocess 21 | 22 | class TestNNDataflowSearch(unittest.TestCase): 23 | ''' Tests for NN dataflow search tool. ''' 24 | 25 | def setUp(self): 26 | cwd = os.path.dirname(os.path.abspath(__file__)) 27 | self.cwd = os.path.join(cwd, '..', '..', '..') 28 | self.assertTrue(os.path.isdir(self.cwd)) 29 | self.assertTrue(os.path.isdir( 30 | os.path.join(self.cwd, 'nn_dataflow', 'tools'))) 31 | 32 | self.args = ['python3', '-m', 'nn_dataflow.tools.nn_dataflow_search', 33 | 'alex_net', '--batch', '1', 34 | '--node', '1', '1', '--array', '16', '16', 35 | '--regf', '512', '--gbuf', '131072'] 36 | 37 | def test_default_invoke(self): 38 | ''' Default invoke. ''' 39 | ret = self._call(self.args) 40 | self.assertEqual(ret, 0) 41 | 42 | def test_3d_mem(self): 43 | ''' With 3D memory. ''' 44 | ret = self._call(self.args + ['--mem-type', '3D']) 45 | self.assertEqual(ret, 0) 46 | 47 | def test_no_dataflow(self): 48 | ''' No dataflow scheme found. ''' 49 | args = self.args[:] 50 | args[args.index('--gbuf') + 1] = '2' 51 | args += ['--disable-bypass', 'i', 'o', 'f'] 52 | ret = self._call(args) 53 | self.assertEqual(ret, 2) 54 | 55 | def _call(self, args): 56 | with open(os.devnull, 'w') as output: 57 | result = subprocess.call(args, cwd=self.cwd, 58 | stderr=subprocess.STDOUT, 59 | stdout=output) 60 | 61 | return result 62 | 63 | -------------------------------------------------------------------------------- /nn_dataflow/tests/tool_test/test_nn_layer_stats.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | import os 20 | import subprocess 21 | 22 | class TestNNLayerStats(unittest.TestCase): 23 | ''' Tests for NN layer stats tool. ''' 24 | 25 | def setUp(self): 26 | cwd = os.path.dirname(os.path.abspath(__file__)) 27 | self.cwd = os.path.join(cwd, '..', '..', '..') 28 | self.assertTrue(os.path.isdir(self.cwd)) 29 | self.assertTrue(os.path.isdir( 30 | os.path.join(self.cwd, 'nn_dataflow', 'tools'))) 31 | 32 | self.args = ['python3', '-m', 'nn_dataflow.tools.nn_layer_stats', 33 | 'alex_net', '-b', '16'] 34 | 35 | def test_default_invoke(self): 36 | ''' Default invoke. ''' 37 | ret = self._call(self.args) 38 | self.assertEqual(ret, 0) 39 | 40 | def _call(self, args): 41 | with open(os.devnull, 'w') as output: 42 | result = subprocess.call(args, cwd=self.cwd, 43 | stderr=subprocess.STDOUT, 44 | stdout=output) 45 | return result 46 | 47 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_cost.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow.core import Cost 20 | from nn_dataflow.core import MemHierEnum as me 21 | 22 | class TestCost(unittest.TestCase): 23 | ''' Tests for Cost. ''' 24 | 25 | def test_valid_args(self): 26 | ''' Valid arguments. ''' 27 | cost = Cost(mac_op=1, 28 | mem_hier=(200, 6, 2, 1), 29 | noc_hop=10, 30 | idl_unit=0, 31 | ) 32 | self.assertEqual(cost.mac_op, 1, 'mac_op') 33 | self.assertEqual(cost.mem_hier, (200, 6, 2, 1), 'mem_hier') 34 | self.assertEqual(cost.noc_hop, 10, 'noc_hop') 35 | self.assertEqual(cost.idl_unit, 0, 'idl_unit') 36 | 37 | def test_invalid_mac_op(self): 38 | ''' Invalid mac_op. ''' 39 | with self.assertRaisesRegex(TypeError, 'Cost: .*mac_op.*'): 40 | _ = Cost(mac_op=(1, 2), 41 | mem_hier=(200, 6, 2, 1), 42 | noc_hop=10, 43 | idl_unit=0, 44 | ) 45 | 46 | def test_invalid_mem_hier_type(self): 47 | ''' Invalid mem_hier type. ''' 48 | with self.assertRaisesRegex(TypeError, 'Cost: .*mem_hier.*'): 49 | _ = Cost(mac_op=1, 50 | mem_hier=200, 51 | noc_hop=10, 52 | idl_unit=0, 53 | ) 54 | with self.assertRaisesRegex(TypeError, 'Cost: .*mem_hier.*'): 55 | _ = Cost(mac_op=1, 56 | mem_hier=[200, 6, 2, 1], 57 | noc_hop=10, 58 | idl_unit=0, 59 | ) 60 | 61 | def test_invalid_mem_hier_len(self): 62 | ''' Invalid mem_hier len. ''' 63 | with self.assertRaisesRegex(ValueError, 'Cost: .*mem_hier.*'): 64 | _ = Cost(mac_op=1, 65 | mem_hier=(200, 6), 66 | noc_hop=10, 67 | idl_unit=0, 68 | ) 69 | 70 | def test_invalid_noc_hop(self): 71 | ''' Invalid noc_hop. ''' 72 | with self.assertRaisesRegex(TypeError, 'Cost: .*noc_hop.*'): 73 | _ = Cost(mac_op=1, 74 | mem_hier=(200, 6, 2, 1), 75 | noc_hop=[10, 10], 76 | idl_unit=0, 77 | ) 78 | 79 | def test_invalid_idl_unit(self): 80 | ''' Invalid idl_unit. ''' 81 | with self.assertRaisesRegex(TypeError, 'Cost: .*idl_unit.*'): 82 | _ = Cost(mac_op=1, 83 | mem_hier=(200, 6, 2, 1), 84 | noc_hop=10, 85 | idl_unit=set([1, 2]), 86 | ) 87 | 88 | def test_mem_hier_at(self): 89 | ''' Accessor mem_hier. ''' 90 | cost = Cost(mac_op=1, 91 | mem_hier=(200, 6, 2, 1), 92 | noc_hop=10, 93 | idl_unit=0, 94 | ) 95 | self.assertEqual(cost.mem_hier_at(me.DRAM), 200, 'mem_hier: DRAM') 96 | self.assertEqual(cost.mem_hier_at(me.GBUF), 6, 'mem_hier: GBUF') 97 | self.assertEqual(cost.mem_hier_at(me.ITCN), 2, 'mem_hier: ITCN') 98 | self.assertEqual(cost.mem_hier_at(me.REGF), 1, 'mem_hier: REGF') 99 | 100 | def test_mem_hier_at_error(self): 101 | ''' Accessor mem_hier error. ''' 102 | cost = Cost(mac_op=1, 103 | mem_hier=(200, 6, 2, 1), 104 | noc_hop=10, 105 | idl_unit=0, 106 | ) 107 | self.assertIsNone(cost.mem_hier_at(me.NUM)) 108 | self.assertIsNone(cost.mem_hier_at(None)) 109 | 110 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_data_dim_loops.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import itertools 18 | import unittest 19 | 20 | from nn_dataflow.core import DataDimLoops 21 | from nn_dataflow.core import LoopEnum as le 22 | 23 | class TestDataDimLoops(unittest.TestCase): 24 | ''' Tests for DataDimLoops. ''' 25 | 26 | def test_valid_args(self): 27 | ''' Valid arguments. ''' 28 | ddls = DataDimLoops(le.IFM, le.OFM) 29 | self.assertTupleEqual(ddls.loops(), (le.IFM, le.OFM)) 30 | 31 | ddls = DataDimLoops(le.BAT, le.IFM, le.OFM) 32 | self.assertTupleEqual(ddls.loops(), (le.IFM, le.OFM, le.BAT)) 33 | 34 | def test_valid_repeated_args(self): 35 | ''' Valid repeated arguments. ''' 36 | ddls = DataDimLoops(le.IFM, le.OFM, le.IFM, le.IFM) 37 | self.assertTupleEqual(ddls.loops(), (le.IFM, le.OFM)) 38 | 39 | ddls = DataDimLoops(*([le.BAT] * 10)) 40 | self.assertTupleEqual(ddls.loops(), (le.BAT,)) 41 | 42 | def test_invalid_args(self): 43 | ''' Invalid arguments. ''' 44 | with self.assertRaisesRegex(ValueError, 45 | 'DataDimLoops: .*LoopEnum.*'): 46 | _ = DataDimLoops(le.NUM + 1) 47 | 48 | with self.assertRaisesRegex(ValueError, 49 | 'DataDimLoops: .*LoopEnum.*'): 50 | _ = DataDimLoops(le.IFM, le.NUM) 51 | 52 | def test_loops(self): 53 | ''' Get loops. ''' 54 | for loops in self._gen_loop_combs(): 55 | ddls = DataDimLoops(*loops) 56 | self.assertTupleEqual(ddls.loops(), loops) 57 | 58 | def test_take(self): 59 | ''' take. ''' 60 | lst = [str(lpe) for lpe in range(le.NUM)] 61 | 62 | for loops in self._gen_loop_combs(): 63 | ddls = DataDimLoops(*loops) 64 | sublst = ddls.take(lst) 65 | 66 | self.assertEqual(len(sublst), len(loops)) 67 | self.assertListEqual(sublst, [str(lpe) for lpe in loops]) 68 | 69 | def test_drop(self): 70 | ''' drop. ''' 71 | lst = [str(lpe) for lpe in range(le.NUM)] 72 | 73 | for loops in self._gen_loop_combs(): 74 | ddls = DataDimLoops(*loops) 75 | sublst = ddls.drop(lst) 76 | 77 | self.assertEqual(len(sublst), le.NUM - len(loops)) 78 | 79 | def test_take_and_drop(self): 80 | ''' take and drop. ''' 81 | lst = [str(lpe) for lpe in range(le.NUM)] 82 | 83 | for loops in self._gen_loop_combs(): 84 | ddls = DataDimLoops(*loops) 85 | takelst = ddls.take(lst) 86 | droplst = ddls.drop(lst) 87 | 88 | self.assertEqual(len(takelst) + len(droplst), le.NUM) 89 | self.assertTrue(set(takelst).isdisjoint(set(droplst))) 90 | self.assertSetEqual(set(takelst) | set(droplst), set(lst)) 91 | 92 | def test_repr(self): 93 | ''' __repr__. ''' 94 | # pylint: disable=eval-used 95 | for loops in self._gen_loop_combs(): 96 | ddls = DataDimLoops(*loops) 97 | self.assertEqual(eval(repr(ddls)), ddls) 98 | 99 | @staticmethod 100 | def _gen_loop_combs(): 101 | ''' Generate all combinations of LoopEnum with all lengths. ''' 102 | for num in range(1, le.NUM + 1): 103 | for comb in itertools.combinations(range(le.NUM), num): 104 | yield comb 105 | 106 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_int_range.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow.core import IntRange 20 | 21 | class TestIntRange(unittest.TestCase): 22 | ''' Tests for IntRange. ''' 23 | 24 | def test_valid_args(self): 25 | ''' Valid arguments. ''' 26 | ir1 = IntRange(1, 7) 27 | self.assertEqual(ir1.beg, 1) 28 | self.assertEqual(ir1.end, 7) 29 | ir2 = IntRange(-3, 0) 30 | self.assertEqual(ir2.beg, -3) 31 | self.assertEqual(ir2.end, 0) 32 | ir3 = IntRange(4, 4) 33 | self.assertEqual(ir3.beg, 4) 34 | self.assertEqual(ir3.end, 4) 35 | 36 | def test_invalid_args(self): 37 | ''' Invalid arguments. ''' 38 | with self.assertRaisesRegex(TypeError, 'IntRange: .*beg.*'): 39 | _ = IntRange(7.2, 3) 40 | with self.assertRaisesRegex(TypeError, 'IntRange: .*end.*'): 41 | _ = IntRange(7, None) 42 | 43 | with self.assertRaisesRegex(ValueError, 'IntRange: .*beg.*end.*'): 44 | _ = IntRange(7, 3) 45 | with self.assertRaisesRegex(ValueError, 'IntRange: .*beg.*end.*'): 46 | _ = IntRange(-3, -7) 47 | 48 | def test_size(self): 49 | ''' Get size. ''' 50 | ir1 = IntRange(1, 7) 51 | self.assertEqual(ir1.size(), 6) 52 | ir2 = IntRange(-3, 0) 53 | self.assertEqual(ir2.size(), 3) 54 | ir3 = IntRange(4, 4) 55 | self.assertEqual(ir3.size(), 0) 56 | 57 | def test_empty(self): 58 | ''' Get empty. ''' 59 | ir1 = IntRange(1, 7) 60 | self.assertFalse(ir1.empty()) 61 | ir2 = IntRange(-3, 0) 62 | self.assertFalse(ir2.empty()) 63 | ir3 = IntRange(4, 4) 64 | self.assertTrue(ir3.empty()) 65 | 66 | def test_range(self): 67 | ''' Get range. ''' 68 | ir1 = IntRange(1, 7) 69 | self.assertEqual(len(set(ir1.range())), ir1.size()) 70 | ir2 = IntRange(-3, 0) 71 | self.assertListEqual(list(ir2.range()), [-3, -2, -1]) 72 | ir3 = IntRange(4, 4) 73 | self.assertEqual(len(list(ir3.range())), 0) 74 | 75 | def test_overlap(self): 76 | ''' Get overlap. ''' 77 | ir1 = IntRange(-11, 5) 78 | ir2 = IntRange(3, 8) 79 | ir_ovlp = ir1.overlap(ir2) 80 | self.assertEqual(ir_ovlp, IntRange(3, 5)) 81 | self.assertEqual(ir1.overlap(ir2), ir2.overlap(ir1)) 82 | 83 | ir3 = IntRange(-3, 3) 84 | ir_ovlp = ir1.overlap(ir3) 85 | self.assertEqual(ir_ovlp, IntRange(-3, 3)) 86 | 87 | ir4 = IntRange(8, 10) 88 | ir_ovlp = ir1.overlap(ir4) 89 | self.assertTrue(ir_ovlp.empty()) 90 | 91 | def test_overlap_error(self): 92 | ''' Get overlap error. ''' 93 | ir = IntRange(-11, 5) 94 | with self.assertRaisesRegex(TypeError, 'IntRange: .*'): 95 | ir.overlap((0, 1)) 96 | 97 | def test_offset(self): 98 | ''' Get offset. ''' 99 | ir1 = IntRange(1, 7) 100 | self.assertEqual(ir1.offset(3), IntRange(4, 10)) 101 | ir2 = IntRange(-3, 0) 102 | self.assertEqual(ir2.offset(-2), IntRange(-5, -2)) 103 | 104 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_option.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow.core import Option 20 | 21 | class TestOption(unittest.TestCase): 22 | ''' Tests for Option. ''' 23 | 24 | def test_valid_kwargs(self): 25 | ''' Valid keyword arguments. ''' 26 | options = Option(sw_gbuf_bypass=(False, False, False), 27 | sw_solve_loopblocking=False, 28 | hw_access_forwarding=False, 29 | hw_gbuf_sharing=False, 30 | partition_hybrid=True, 31 | partition_batch=False, 32 | partition_ifmaps=False, 33 | partition_interlayer=False, 34 | opt_goal='ed', 35 | ntops=10, 36 | nprocesses=16, 37 | verbose=False 38 | ) 39 | self.assertEqual(options.sw_gbuf_bypass, (False, False, False), 40 | 'sw_gbuf_bypass') 41 | self.assertEqual(options.sw_solve_loopblocking, False, 42 | 'sw_solve_loopblocking') 43 | self.assertEqual(options.hw_access_forwarding, False, 44 | 'hw_access_forwarding') 45 | self.assertEqual(options.hw_gbuf_sharing, False, 46 | 'hw_gbuf_sharing') 47 | self.assertEqual(options.partition_hybrid, True, 48 | 'partition_hybrid') 49 | self.assertEqual(options.partition_batch, False, 50 | 'partition_batch') 51 | self.assertEqual(options.partition_ifmaps, False, 52 | 'partition_ifmaps') 53 | self.assertEqual(options.partition_interlayer, False, 54 | 'partition_interlayer') 55 | self.assertEqual(options.opt_goal, 'ed', 'opt_goal') 56 | self.assertEqual(options.ntops, 10, 'ntops') 57 | self.assertEqual(options.nprocesses, 16, 'nprocesses') 58 | self.assertEqual(options.verbose, False, 'verbose') 59 | 60 | def test_valid_args(self): 61 | ''' Valid arguments. ''' 62 | options = Option((False, True, False), True) 63 | self.assertEqual(options.sw_gbuf_bypass, (False, True, False), 64 | 'sw_gbuf_bypass') 65 | self.assertEqual(options.sw_solve_loopblocking, True, 66 | 'sw_solve_loopblocking') 67 | 68 | def test_default_args(self): 69 | ''' Default arguments. ''' 70 | options = Option() 71 | self.assertTupleEqual(options.sw_gbuf_bypass, (False, False, False)) 72 | self.assertEqual(options.sw_solve_loopblocking, False) 73 | self.assertEqual(options.partition_hybrid, False) 74 | self.assertEqual(options.partition_batch, False) 75 | self.assertEqual(options.partition_ifmaps, False) 76 | self.assertEqual(options.opt_goal, 'e') 77 | self.assertEqual(options.ntops, 1) 78 | self.assertEqual(options.nprocesses, 1) 79 | self.assertEqual(options.verbose, False) 80 | 81 | def test_invalid_args(self): 82 | ''' Invalid args. ''' 83 | with self.assertRaisesRegex(TypeError, 'Option: .*at most.*100'): 84 | _ = Option(*[None] * 100) 85 | 86 | def test_invalid_kwargs(self): 87 | ''' Invalid kwargs. ''' 88 | with self.assertRaisesRegex(TypeError, 'Option: .*bad.*'): 89 | _ = Option(bad='') 90 | 91 | def test_invalid_both_args_kwargs(self): 92 | ''' Invalid both args and kwargs are given. ''' 93 | with self.assertRaisesRegex(TypeError, 'Option: .*sw_gbuf_bypass.*'): 94 | _ = Option((False,) * 3, sw_gbuf_bypass=(False,) * 3) 95 | 96 | def test_invalid_swgbyp_type(self): 97 | ''' Invalid sw_gbuf_bypass type. ''' 98 | with self.assertRaisesRegex(TypeError, 'Option: .*sw_gbuf_bypass.*'): 99 | _ = Option(sw_gbuf_bypass=[False, False, False]) 100 | 101 | def test_invalid_swgbyp_len(self): 102 | ''' Invalid sw_gbuf_bypass len. ''' 103 | with self.assertRaisesRegex(ValueError, 'Option: .*sw_gbuf_bypass.*'): 104 | _ = Option(sw_gbuf_bypass=(False, False)) 105 | 106 | def test_invalid_swsol_hwbufshr(self): 107 | ''' Invalid sw_solve_loopblocking and hw_gbuf_sharing comb. ''' 108 | with self.assertRaisesRegex(ValueError, 109 | 'Option: .*sw_solve_loopblocking.*' 110 | 'hw_gbuf_sharing.*'): 111 | _ = Option(sw_solve_loopblocking=True, hw_gbuf_sharing=True) 112 | 113 | def test_invalid_hwaccfwd_hwbufshr(self): 114 | ''' Invalid hw_access_forwarding and hw_gbuf_sharing comb. ''' 115 | with self.assertRaisesRegex(ValueError, 116 | 'Option: .*hw_access_forwarding.*' 117 | 'hw_gbuf_sharing.*'): 118 | _ = Option(hw_access_forwarding=True, hw_gbuf_sharing=True) 119 | 120 | def test_invalid_swsol_hwswb(self): 121 | ''' Invalid sw_solve_loopblocking and hw_gbuf_save_writeback comb. ''' 122 | with self.assertRaisesRegex(ValueError, 123 | 'Option: .*sw_solve_loopblocking.*' 124 | 'hw_gbuf_save_writeback.*'): 125 | _ = Option(sw_solve_loopblocking=True, hw_gbuf_save_writeback=True) 126 | 127 | def test_invalid_part_hybrid_ifmaps(self): 128 | ''' Invalid partition_hybrid and partition_ifmaps comb. ''' 129 | with self.assertRaisesRegex(ValueError, 130 | 'Option: .*partition_ifmaps.*' 131 | 'partition_hybrid.*'): 132 | _ = Option(partition_hybrid=False, partition_ifmaps=True) 133 | 134 | def test_invalid_time_ovhd(self): 135 | ''' Invalid layer_pipeline_time_ovhd. ''' 136 | with self.assertRaisesRegex(KeyError, 137 | 'Option: .*layer_pipeline_time_ovhd.*'): 138 | _ = Option(layer_pipeline_time_ovhd=None) 139 | 140 | with self.assertRaisesRegex(ValueError, 141 | 'Option: .*layer_pipeline_time_ovhd.*'): 142 | _ = Option(layer_pipeline_time_ovhd=-1) 143 | 144 | def test_invalid_max_degree(self): 145 | ''' Invalid layer_pipeline_max_degree. ''' 146 | with self.assertRaisesRegex(KeyError, 147 | 'Option: .*layer_pipeline_max_degree.*'): 148 | _ = Option(layer_pipeline_max_degree=None) 149 | 150 | with self.assertRaisesRegex(ValueError, 151 | 'Option: .*layer_pipeline_max_degree.*'): 152 | _ = Option(layer_pipeline_max_degree=-1) 153 | 154 | def test_invalid_opt_goal(self): 155 | ''' Invalid opt_goal. ''' 156 | with self.assertRaisesRegex(ValueError, 'Option: .*opt_goal.*'): 157 | _ = Option(opt_goal='o') 158 | with self.assertRaisesRegex(ValueError, 'Option: .*opt_goal.*'): 159 | _ = Option(opt_goal='E') 160 | 161 | def test_option_list(self): 162 | ''' Accessor option_list. ''' 163 | options = Option() 164 | self.assertCountEqual(options.option_list(), options._fields) 165 | 166 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_phy_dim2.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow.core import PhyDim2 20 | 21 | class TestPhyDim2(unittest.TestCase): 22 | ''' Tests for PhyDim2. ''' 23 | 24 | def test_valid_args(self): 25 | ''' Valid arguments. ''' 26 | dim = PhyDim2(14, 12) 27 | self.assertEqual(dim.h, 14, 'h') 28 | self.assertEqual(dim.w, 12, 'w') 29 | 30 | def test_size(self): 31 | ''' Get size. ''' 32 | dim = PhyDim2(14, 12) 33 | self.assertEqual(dim.size(), 14 * 12, 'size') 34 | 35 | def test_add(self): 36 | ''' Operation add. ''' 37 | dim1 = PhyDim2(14, 12) 38 | dim2 = PhyDim2(5, 3) 39 | self.assertTupleEqual(dim1 + dim2, (19, 15), 'add') 40 | self.assertTupleEqual(dim1 + 3, (17, 15), 'add') 41 | 42 | def test_sub(self): 43 | ''' Operation sub. ''' 44 | dim1 = PhyDim2(14, 12) 45 | dim2 = PhyDim2(5, 3) 46 | self.assertTupleEqual(dim1 - dim2, (9, 9), 'sub') 47 | self.assertTupleEqual(dim1 - 3, (11, 9), 'sub') 48 | 49 | def test_neg(self): 50 | ''' Operation neg. ''' 51 | dim1 = PhyDim2(14, 12) 52 | dim2 = PhyDim2(5, 3) 53 | self.assertTupleEqual(-dim1, (-14, -12), 'neg') 54 | self.assertTupleEqual(-dim2, (-5, -3), 'neg') 55 | 56 | def test_mul(self): 57 | ''' Operation mul. ''' 58 | dim1 = PhyDim2(14, 12) 59 | dim2 = PhyDim2(5, 3) 60 | self.assertTupleEqual(dim1 * dim2, (70, 36), 'mul') 61 | self.assertTupleEqual(dim1 * 2, (28, 24), 'mul') 62 | self.assertTupleEqual(2 * dim1, (28, 24), 'rmul') 63 | 64 | def test_hop_dist(self): 65 | ''' Get hop distance. ''' 66 | dim1 = PhyDim2(14, 12) 67 | dim2 = PhyDim2(5, 20) 68 | self.assertEqual(dim1.hop_dist(dim2), 9 + 8, 'hop_dist') 69 | self.assertEqual(dim2.hop_dist(dim1), 9 + 8, 'hop_dist') 70 | 71 | def test_hop_dist_error(self): 72 | ''' Get hop distance. ''' 73 | dim1 = PhyDim2(14, 12) 74 | with self.assertRaisesRegex(TypeError, 'hop_dist'): 75 | _ = dim1.hop_dist((5, 20)) 76 | 77 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_scheduling_condition.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow.core import DataLayout 20 | from nn_dataflow.core import FmapRange 21 | from nn_dataflow.core import NodeRegion 22 | from nn_dataflow.core import ParallelEnum as pe 23 | from nn_dataflow.core import PartitionScheme 24 | from nn_dataflow.core import PhyDim2 25 | from nn_dataflow.core import Resource 26 | from nn_dataflow.core import SchedulingCondition 27 | from nn_dataflow.core import SchedulingConstraint 28 | 29 | class TestSchedulingCondition(unittest.TestCase): 30 | ''' Tests for SchedulingCondition. ''' 31 | 32 | def setUp(self): 33 | 34 | self.resource = Resource( 35 | proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 36 | type=NodeRegion.PROC), 37 | dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 38 | type=NodeRegion.DRAM), 39 | src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 40 | type=NodeRegion.DRAM), 41 | dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 42 | type=NodeRegion.DRAM), 43 | dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64, 44 | array_bus_width=float('inf'), dram_bandwidth=float('inf'), 45 | no_time_mux=False) 46 | 47 | self.none_cstr = SchedulingConstraint() 48 | 49 | part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM) 50 | self.ifmap_layout = DataLayout(frngs=(FmapRange((0, 0, 0, 0), 51 | (2, 4, 16, 16)),), 52 | regions=(self.resource.src_data_region,), 53 | parts=(part,)) 54 | 55 | self.sched_seq = (2, 0, 0) 56 | 57 | def test_valid_args(self): 58 | ''' Valid arguments. ''' 59 | condition = SchedulingCondition(resource=self.resource, 60 | constraint=self.none_cstr, 61 | ifmap_layout=self.ifmap_layout, 62 | sched_seq=self.sched_seq) 63 | self.assertEqual(condition.resource, self.resource) 64 | self.assertEqual(condition.constraint, self.none_cstr) 65 | self.assertEqual(condition.ifmap_layout, self.ifmap_layout) 66 | self.assertTupleEqual(condition.sched_seq, self.sched_seq) 67 | 68 | def test_invalid_resource(self): 69 | ''' Invalid resource. ''' 70 | with self.assertRaisesRegex(TypeError, 71 | 'SchedulingCondition: .*resource.*'): 72 | _ = SchedulingCondition(resource=None, 73 | constraint=self.none_cstr, 74 | ifmap_layout=self.ifmap_layout, 75 | sched_seq=self.sched_seq) 76 | 77 | def test_invalid_constraint(self): 78 | ''' Invalid constraint. ''' 79 | with self.assertRaisesRegex(TypeError, 80 | 'SchedulingCondition: .*constraint.*'): 81 | _ = SchedulingCondition(resource=self.resource, 82 | constraint=None, 83 | ifmap_layout=self.ifmap_layout, 84 | sched_seq=self.sched_seq) 85 | 86 | def test_invalid_ifmap_layout(self): 87 | ''' Invalid ifmap_layout. ''' 88 | with self.assertRaisesRegex(TypeError, 89 | 'SchedulingCondition: .*ifmap_layout.*'): 90 | _ = SchedulingCondition(resource=self.resource, 91 | constraint=self.none_cstr, 92 | ifmap_layout=None, 93 | sched_seq=self.sched_seq) 94 | 95 | def test_invalid_sched_seq(self): 96 | ''' Invalid sched_seq. ''' 97 | with self.assertRaisesRegex(TypeError, 98 | 'SchedulingCondition: .*sched_seq.*'): 99 | _ = SchedulingCondition(resource=self.resource, 100 | constraint=self.none_cstr, 101 | ifmap_layout=self.ifmap_layout, 102 | sched_seq=list(self.sched_seq)) 103 | 104 | with self.assertRaisesRegex(ValueError, 105 | 'SchedulingCondition: .*sched_seq.*'): 106 | _ = SchedulingCondition(resource=self.resource, 107 | constraint=self.none_cstr, 108 | ifmap_layout=self.ifmap_layout, 109 | sched_seq=self.sched_seq[:-1]) 110 | 111 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_scheduling_result.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | from collections import OrderedDict 19 | 20 | from nn_dataflow.core import DataLayout 21 | from nn_dataflow.core import FmapRange 22 | from nn_dataflow.core import NodeRegion 23 | from nn_dataflow.core import ParallelEnum as pe 24 | from nn_dataflow.core import PartitionScheme 25 | from nn_dataflow.core import PhyDim2 26 | from nn_dataflow.core import SchedulingResult 27 | 28 | class TestSchedulingResult(unittest.TestCase): 29 | ''' Tests for SchedulingResult. ''' 30 | 31 | def setUp(self): 32 | 33 | self.scheme = OrderedDict([('cost', 9.876 + 1.234), 34 | ('time', 123.4), 35 | ('ops', 1234), 36 | ('num_nodes', 4), 37 | ('cost_op', 1.234), 38 | ('cost_access', 9.876), 39 | ('cost_noc', 0), 40 | ('cost_static', 0), 41 | ('proc_time', 59), 42 | ('bus_time', 40), 43 | ('dram_time', 120), 44 | ('access', [[2, 3, 4], 45 | [30, 40, 50], 46 | [400, 500, 600], 47 | [5000, 6000, 7000]]), 48 | ('remote_gbuf_access', [0, 0, 0]), 49 | ('total_nhops', [123, 456, 789]), 50 | ('fetch', [[1, 2, 1], [3, 4, 5]]), 51 | ]) 52 | 53 | part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM) 54 | self.ofmap_layout = DataLayout( 55 | frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)),), 56 | regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1), 57 | type=NodeRegion.DRAM),), 58 | parts=(part,)) 59 | 60 | self.sched_seq = (2, 0, 0) 61 | 62 | def test_valid_args(self): 63 | ''' Valid arguments. ''' 64 | result = SchedulingResult(scheme=self.scheme, 65 | ofmap_layout=self.ofmap_layout, 66 | sched_seq=self.sched_seq) 67 | self.assertIn('ops', result.scheme) 68 | self.assertIn('total_nhops', result.scheme) 69 | self.assertEqual(result.ofmap_layout, self.ofmap_layout) 70 | self.assertTupleEqual(result.sched_seq, self.sched_seq) 71 | 72 | def test_invalid_scheme(self): 73 | ''' Invalid scheme. ''' 74 | with self.assertRaisesRegex(TypeError, 75 | 'SchedulingResult: .*scheme.*'): 76 | _ = SchedulingResult(scheme={}, 77 | ofmap_layout=self.ofmap_layout, 78 | sched_seq=self.sched_seq) 79 | 80 | def test_invalid_ofmap_layout(self): 81 | ''' Invalid ofmap_layout. ''' 82 | with self.assertRaisesRegex(TypeError, 83 | 'SchedulingResult: .*ofmap_layout.*'): 84 | _ = SchedulingResult(scheme=self.scheme, 85 | ofmap_layout=None, 86 | sched_seq=self.sched_seq) 87 | 88 | def test_invalid_sched_seq(self): 89 | ''' Invalid sched_seq. ''' 90 | with self.assertRaisesRegex(TypeError, 91 | 'SchedulingResult: .*sched_seq.*'): 92 | _ = SchedulingResult(scheme=self.scheme, 93 | ofmap_layout=self.ofmap_layout, 94 | sched_seq=list(self.sched_seq)) 95 | 96 | with self.assertRaisesRegex(ValueError, 97 | 'SchedulingResult: .*sched_seq.*'): 98 | _ = SchedulingResult(scheme=self.scheme, 99 | ofmap_layout=self.ofmap_layout, 100 | sched_seq=self.sched_seq[:-1]) 101 | 102 | def test_total_cost(self): 103 | ''' Accessor total_cost. ''' 104 | result = SchedulingResult(scheme=self.scheme, 105 | ofmap_layout=self.ofmap_layout, 106 | sched_seq=self.sched_seq) 107 | self.assertAlmostEqual(result.total_cost, 1.234 + 9.876) 108 | 109 | def test_total_time(self): 110 | ''' Accessor total_time. ''' 111 | result = SchedulingResult(scheme=self.scheme, 112 | ofmap_layout=self.ofmap_layout, 113 | sched_seq=self.sched_seq) 114 | self.assertAlmostEqual(result.total_time, 123.4) 115 | 116 | self.assertGreaterEqual(result.total_time, result.total_node_time) 117 | self.assertGreaterEqual(result.total_time, result.total_dram_time) 118 | 119 | def test_total_node_time(self): 120 | ''' Accessor total_node_time. ''' 121 | result = SchedulingResult(scheme=self.scheme, 122 | ofmap_layout=self.ofmap_layout, 123 | sched_seq=self.sched_seq) 124 | self.assertAlmostEqual(result.total_node_time, max(59, 40)) 125 | 126 | scheme = self.scheme 127 | scheme['bus_time'] = 100 128 | result = SchedulingResult(scheme=scheme, 129 | ofmap_layout=self.ofmap_layout, 130 | sched_seq=self.sched_seq) 131 | self.assertAlmostEqual(result.total_node_time, max(59, 100)) 132 | 133 | def test_total_dram_time(self): 134 | ''' Accessor total_dram_time. ''' 135 | result = SchedulingResult(scheme=self.scheme, 136 | ofmap_layout=self.ofmap_layout, 137 | sched_seq=self.sched_seq) 138 | self.assertAlmostEqual(result.total_dram_time, 120) 139 | 140 | def test_total_proc_time(self): 141 | ''' Accessor total_proc_time. ''' 142 | result = SchedulingResult(scheme=self.scheme, 143 | ofmap_layout=self.ofmap_layout, 144 | sched_seq=self.sched_seq) 145 | self.assertAlmostEqual(result.total_proc_time, 59) 146 | 147 | scheme = self.scheme 148 | scheme['bus_time'] = 100 149 | result = SchedulingResult(scheme=scheme, 150 | ofmap_layout=self.ofmap_layout, 151 | sched_seq=self.sched_seq) 152 | self.assertAlmostEqual(result.total_proc_time, 59) 153 | 154 | def test_total_ops(self): 155 | ''' Accessor total_ops. ''' 156 | result = SchedulingResult(scheme=self.scheme, 157 | ofmap_layout=self.ofmap_layout, 158 | sched_seq=self.sched_seq) 159 | self.assertEqual(result.total_ops, 1234) 160 | 161 | def test_total_accesses(self): 162 | ''' Accessor total_cost. ''' 163 | result = SchedulingResult(scheme=self.scheme, 164 | ofmap_layout=self.ofmap_layout, 165 | sched_seq=self.sched_seq) 166 | self.assertSequenceEqual(result.total_accesses, 167 | [9, 120, 1500, 18000]) 168 | 169 | def test_total_accesses_rgbuf(self): 170 | ''' Accessor total_accesses remote gbuf. ''' 171 | scheme = self.scheme.copy() 172 | scheme['remote_gbuf_access'] = [10, 20, 30] 173 | result = SchedulingResult(scheme=scheme, 174 | ofmap_layout=self.ofmap_layout, 175 | sched_seq=self.sched_seq) 176 | self.assertSequenceEqual(result.total_accesses, 177 | [9, 120 + 60, 1500, 18000]) 178 | 179 | def test_total_noc_hops(self): 180 | ''' Accessor total_noc_hops. ''' 181 | result = SchedulingResult(scheme=self.scheme, 182 | ofmap_layout=self.ofmap_layout, 183 | sched_seq=self.sched_seq) 184 | self.assertEqual(result.total_noc_hops, 1368) 185 | 186 | def test_num_nodes(self): 187 | ''' Accessor num_nodes. ''' 188 | result = SchedulingResult(scheme=self.scheme, 189 | ofmap_layout=self.ofmap_layout, 190 | sched_seq=self.sched_seq) 191 | self.assertEqual(result.num_nodes, 4) 192 | 193 | -------------------------------------------------------------------------------- /nn_dataflow/tests/unit_test/test_version.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import unittest 18 | 19 | from nn_dataflow import version 20 | 21 | class TestVersion(unittest.TestCase): 22 | ''' Tests for version. ''' 23 | 24 | def test_get_version(self): 25 | ''' get_version. ''' 26 | ver_raw = version.get_version() 27 | ver_lcl = version.get_version(with_local=True) 28 | self.assertIn(ver_raw, ver_lcl) 29 | 30 | -------------------------------------------------------------------------------- /nn_dataflow/tools/__init__.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /nn_dataflow/tools/nn_layer_stats.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import argparse 18 | import sys 19 | 20 | from nn_dataflow.core import ConvLayer, FCLayer 21 | 22 | from nn_dataflow.nns import import_network 23 | 24 | KILO = 1024. 25 | MILLION = 1024.*1024. 26 | 27 | STR_FMT_NAME_LEN = '30' 28 | STR_FMT_NUMB_LEN = '12' 29 | STR_FMT_NUMB_PCS = '2' 30 | 31 | STR_FMT_NAME = '{:' + STR_FMT_NAME_LEN + 's}' 32 | STR_FMT_NUMB_HDER = '{:>' + STR_FMT_NUMB_LEN + '}' 33 | STR_FMT_NUMB = '{:' + STR_FMT_NUMB_LEN + '.' + STR_FMT_NUMB_PCS + 'f}' 34 | 35 | def layer_stats(args): 36 | ''' Print stats of layers in the network. ''' 37 | 38 | network = import_network(args.net) 39 | word_bytes = (args.word + 7) // 8 40 | batch = args.batch 41 | 42 | hder_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB_HDER] * 5) + '\n' 43 | line_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB] * 5) + '\n' 44 | line_sep = '-' * int(STR_FMT_NAME_LEN) + '\n' 45 | 46 | # Header. 47 | sys.stdout.write(hder_fmt 48 | .format('Layer', 49 | 'Ifmap/kB', 'Ofmap/kB', 'Weight/kB', 50 | 'MACs/M', 'MinOptBuf/kB')) 51 | 52 | # Aggregate stats. 53 | max_fmaps = 0 54 | max_filters = 0 55 | max_ops = 0 56 | sum_fmaps = 0 57 | sum_filters = 0 58 | sum_ops = 0 59 | convs = 0 60 | fcs = 0 61 | 62 | for name in network: 63 | 64 | layer = network[name] 65 | 66 | if isinstance(layer, FCLayer): 67 | fcs += 1 68 | elif isinstance(layer, ConvLayer): 69 | convs += 1 70 | 71 | ifmap_size = layer.total_ifmap_size(batch, word_bytes) / KILO 72 | ofmap_size = layer.total_ofmap_size(batch, word_bytes) / KILO 73 | try: 74 | filter_size = layer.total_filter_size(word_bytes) / KILO 75 | except AttributeError: 76 | filter_size = 0 77 | 78 | ops = layer.total_ops(batch) / MILLION 79 | 80 | # The minimum optimal buffer size is the sum of the full size (two 81 | # dimensions) for one data category, the size of one dimension for the 82 | # second, and the size of one point for the third. 83 | min_opt_buf_size = min( 84 | filter_size + (ifmap_size + ofmap_size / layer.nofm) / batch, 85 | filter_size + (ifmap_size / layer.nifm + ofmap_size) / batch, 86 | ifmap_size + (ofmap_size + filter_size / layer.nifm) / layer.nofm, 87 | ifmap_size + (ofmap_size / batch + filter_size) / layer.nofm, 88 | ofmap_size + (ifmap_size + filter_size / layer.nofm) / layer.nifm, 89 | ofmap_size + (ifmap_size / batch + filter_size) / layer.nifm) 90 | 91 | sys.stdout.write(line_fmt 92 | .format(name, 93 | ifmap_size, ofmap_size, filter_size, 94 | ops, min_opt_buf_size)) 95 | 96 | max_fmaps = max(max_fmaps, ofmap_size) 97 | max_filters = max(max_filters, filter_size) 98 | max_ops = max(max_ops, ops) 99 | sum_fmaps += ofmap_size 100 | sum_filters += filter_size 101 | sum_ops += ops 102 | 103 | sys.stdout.write(line_sep) 104 | 105 | sys.stdout.write(line_fmt 106 | .format('MAX', 107 | float('nan'), max_fmaps, max_filters, 108 | max_ops, float('nan'))) 109 | sys.stdout.write(line_fmt 110 | .format('SUM', 111 | float('nan'), sum_fmaps, sum_filters, 112 | sum_ops, float('nan'))) 113 | 114 | sys.stdout.write(line_sep) 115 | 116 | sys.stdout.write('# CONV layers = {}, # FC layers = {}\n' 117 | .format(convs, fcs)) 118 | 119 | 120 | def argparser(): 121 | ''' Argument parser. ''' 122 | 123 | ap = argparse.ArgumentParser() 124 | 125 | ap.add_argument('net', 126 | help='network name, should be a .py file under examples') 127 | 128 | ap.add_argument('-b', '--batch', type=int, default=1, 129 | help='batch size') 130 | ap.add_argument('-w', '--word', type=int, default=16, 131 | help='word size in bits') 132 | 133 | return ap 134 | 135 | 136 | if __name__ == '__main__': 137 | layer_stats(argparser().parse_args()) 138 | 139 | -------------------------------------------------------------------------------- /nn_dataflow/util.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | from functools import reduce 18 | import math 19 | from operator import mul 20 | 21 | ''' 22 | Utilities. 23 | ''' 24 | 25 | class ContentHashClass(): 26 | ''' 27 | Class using the content instead of the object ID for hash. 28 | 29 | Such class instance can be used as key in dictionary. 30 | ''' 31 | # pylint: disable=too-few-public-methods 32 | 33 | def __eq__(self, other): 34 | if isinstance(other, self.__class__): 35 | return self.__dict__ == other.__dict__ 36 | return NotImplemented 37 | 38 | def __ne__(self, other): 39 | r = self.__eq__(other) 40 | if r is NotImplemented: 41 | # "not" NotImplemented will be True. 42 | return r 43 | return not r 44 | 45 | def __hash__(self): 46 | return hash(frozenset(self.__dict__.items())) 47 | 48 | 49 | class HashableDict(dict): 50 | ''' Hashable dict. ''' 51 | def __eq__(self, other): 52 | if isinstance(other, self.__class__): 53 | return (frozenset(self), frozenset(self.values())) \ 54 | == (frozenset(other), frozenset(other.values())) 55 | return NotImplemented 56 | 57 | def __ne__(self, other): 58 | r = self.__eq__(other) 59 | if r is NotImplemented: 60 | # "not" NotImplemented will be True. 61 | return r 62 | return not r 63 | 64 | def __hash__(self): 65 | return hash((frozenset(self), frozenset(self.values()))) 66 | 67 | def copy(self): 68 | return self.__class__.fromdict(self) 69 | 70 | def __setitem__(self, key, val): 71 | raise KeyError('Cannot insert items to HashableDict.') 72 | 73 | def __delitem__(self, key): 74 | raise KeyError('Cannot delete items from HashableDict.') 75 | 76 | def setdefault(self, key, default=None): 77 | del key, default 78 | raise KeyError('Cannot insert items to HashableDict.') 79 | 80 | def update(self, other): 81 | del other 82 | raise KeyError('Cannot insert items to HashableDict.') 83 | 84 | def pop(self, key, default=None): 85 | del key, default 86 | raise KeyError('Cannot delete items from HashableDict.') 87 | 88 | def popitem(self): 89 | raise KeyError('Cannot delete items from HashableDict.') 90 | 91 | def clear(self): 92 | raise KeyError('Cannot delete items from HashableDict.') 93 | 94 | @classmethod 95 | def fromdict(cls, other, keyfunc=None, valfunc=None): 96 | ''' 97 | Construct a HashableDict from a normal dict instance. 98 | 99 | The keys and values can be modified during the translation. 100 | ''' 101 | if not isinstance(other, dict): 102 | raise TypeError('HashableDict: fromdict expects a dict argument.') 103 | 104 | keyfunc = keyfunc if keyfunc else lambda x: x 105 | valfunc = valfunc if valfunc else lambda x: x 106 | 107 | return cls((keyfunc(k), valfunc(v)) for k, v in other.items()) 108 | 109 | 110 | def idivc(valx, valy): 111 | ''' 112 | Integer division and ceiling. 113 | 114 | Return the min integer that is no less than `valx / valy`. 115 | ''' 116 | if math.isinf(valy): 117 | if math.isinf(valx): 118 | return float('nan') 119 | return 0 120 | return (valx + valy - 1) // valy 121 | 122 | 123 | def prod(lst): 124 | ''' Get the product of a list. ''' 125 | return reduce(mul, lst, 1) 126 | 127 | 128 | def approx_dividable(total, num, rel_overhead=0.1, abs_overhead=1): 129 | ''' Whether it is reasonable to divide `total` into `num` parts. 130 | `rel_overhead` is the allowed max padding overhead measured 131 | relatively; `abs_overhead` is the allowed max padding 132 | overhead measured by absolute value.''' 133 | return total >= num and isclose( 134 | idivc(total, num) * num, total, 135 | rel_tol=rel_overhead, abs_tol=abs_overhead) 136 | 137 | 138 | def factorize(value, num, limits=None): 139 | ''' 140 | Factorize given `value` into `num` numbers. Return a tuple of length 141 | `num`. 142 | 143 | Iterate over factor combinations of which the product is `value`. 144 | 145 | `limits` is a (num-1)-length tuple, specifying the upper limits for the 146 | first num-1 factors. 147 | ''' 148 | if limits is None: 149 | limits = [float('inf')] * (num - 1) 150 | assert len(limits) >= num - 1 151 | limits = list(limits[:num-1]) + [float('inf')] 152 | 153 | factors = [1] * num 154 | while True: 155 | # Calculate the last factor. 156 | factors[-1] = idivc(value, prod(factors[:-1])) 157 | if prod(factors) == value \ 158 | and all(f <= l for f, l in zip(factors, limits)): 159 | yield tuple(factors) 160 | 161 | # Update the first n - 1 factor combination, backwards. 162 | lvl = num - 1 163 | while lvl >= 0: 164 | factors[lvl] += 1 165 | if prod(factors[:lvl+1]) <= value: 166 | break 167 | factors[lvl] = 1 168 | lvl -= 1 169 | if lvl < 0: 170 | return 171 | 172 | 173 | def closest_factor(value, factor): 174 | ''' 175 | Return the maximum factor of `value` that is no larger than `factor` (if 176 | any), and the minimum factor of `value` that is no less than `factor` (if 177 | any), as a tuple. 178 | ''' 179 | if not isinstance(value, int): 180 | raise TypeError('value must be integers.') 181 | 182 | if value < 0 or factor < 0: 183 | raise ValueError('arguments must not be negative.') 184 | 185 | res = tuple() 186 | 187 | # Maximum no-larger factor. 188 | if factor >= 1: 189 | f = int(factor) + 1 190 | while f > factor: 191 | f -= 1 192 | while True: 193 | if f != 0 and value % f == 0: 194 | break 195 | f -= 1 196 | assert f <= factor and value % f == 0 197 | res += (f,) 198 | 199 | # Minimum no-smaller factor. 200 | if factor <= abs(value): 201 | f = int(factor) - 1 202 | while f < factor: 203 | f += 1 204 | while True: 205 | if f != 0 and value % f == 0: 206 | break 207 | f += 1 208 | assert f >= factor and value % f == 0 209 | res += (f,) 210 | 211 | return res 212 | 213 | 214 | def get_ith_range(rng, idx, num): 215 | ''' 216 | Divide the full range `rng` into `num` parts, and get the `idx`-th range. 217 | ''' 218 | length = rng[1] - rng[0] 219 | beg = rng[0] + idx * length // num 220 | end = rng[0] + (idx + 1) * length // num 221 | assert end <= rng[1] 222 | return beg, end 223 | 224 | 225 | def gcd(*values): 226 | ''' 227 | Get the greatest common divisor of the given values. 228 | ''' 229 | if any(not isinstance(v, int) for v in values): 230 | raise TypeError('value must be integers.') 231 | if any(v <= 0 for v in values): 232 | raise ValueError('arguments must be positive.') 233 | 234 | if not values: 235 | raise ValueError('must give at least 1 value.') 236 | if len(values) == 1: 237 | return values[0] 238 | if len(values) > 2: 239 | return reduce(gcd, values) 240 | 241 | a, b = values 242 | while b: 243 | a, b = b, a % b 244 | return a 245 | 246 | 247 | def lcm(*values): 248 | ''' 249 | Get the least common multiple of the given values. 250 | ''' 251 | if any(not isinstance(v, int) for v in values): 252 | raise TypeError('value must be integers.') 253 | if any(v <= 0 for v in values): 254 | raise ValueError('arguments must be positive.') 255 | 256 | if not values: 257 | raise ValueError('must give at least 1 value.') 258 | if len(values) == 1: 259 | return values[0] 260 | if len(values) > 2: 261 | return reduce(lcm, values) 262 | 263 | a, b = values 264 | return a * b // gcd(a, b) 265 | 266 | 267 | def isclose(vala, valb, rel_tol=1e-9, abs_tol=0.0): 268 | ''' 269 | Whether two values are close to each other. 270 | 271 | Identical to math.isclose() in Python 3.5. 272 | ''' 273 | return abs(vala - valb) <= max(rel_tol * max(abs(vala), abs(valb)), abs_tol) 274 | 275 | 276 | def assert_float_eq_int(vfloat, vint, message=''): 277 | ''' 278 | Check the given float value is equal to the given int value. Print the 279 | optional message if not equal. 280 | ''' 281 | if abs(vfloat - vint) > 1: 282 | raise AssertionError(message + ' {} != {}'.format(vfloat, vint)) 283 | 284 | def apply(func, argv): 285 | ''' 286 | Similar to python2 built-in apply function. 287 | ''' 288 | return func(*argv) 289 | 290 | -------------------------------------------------------------------------------- /nn_dataflow/version.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import hashlib 18 | import os 19 | import string 20 | import subprocess 21 | 22 | from . import __version__ 23 | 24 | def _command_output(args, cwd): 25 | return subprocess.check_output(args, cwd=cwd).strip() 26 | 27 | def get_version(with_local=False): 28 | ''' Get the version number, optionally with the local version number. ''' 29 | 30 | version = __version__ 31 | 32 | if with_local: 33 | cwd = os.path.dirname(os.path.abspath(__file__)) 34 | 35 | with open(os.devnull, 'w') as devnull: 36 | result = subprocess.call(['git', 'rev-parse'], cwd=cwd, 37 | stderr=subprocess.STDOUT, 38 | stdout=devnull) 39 | if result != 0: 40 | # Not in git repo. 41 | return version # pragma: no cover 42 | 43 | # Dirty summary. 44 | short_stat = _command_output(['git', 'diff', 'HEAD', '--shortstat'], 45 | cwd).decode() \ 46 | .replace('files changed', 'fc').replace('file changed', 'fc') \ 47 | .replace('insertions(+)', 'a').replace(' insertion(+)', 'a') \ 48 | .replace('deletions(-)', 'd').replace(' deletion(-)', 'd') \ 49 | .replace(',', '').replace(' ', '') 50 | diff_hash = hashlib.md5(_command_output(['git', 'diff', 'HEAD'], cwd)) \ 51 | .hexdigest()[:8] 52 | dirty = '' if not short_stat else '-' + short_stat + '-' + diff_hash 53 | 54 | # Git describe. 55 | desc = _command_output(['git', 'describe', '--tags', '--always', 56 | '--dirty={}'.format(dirty)], 57 | cwd).decode() 58 | version += '+' + desc 59 | 60 | assert not any(w in version for w in string.whitespace) 61 | return version 62 | 63 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse 2 | coverage==5.0 3 | fastcache==1.1.0 4 | pytest==5.3.2 5 | pytest-cov==2.8.1 6 | pytest-xdist==1.30.0 7 | sympy==1.4 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ $lic$ 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of 3 | Stanford University 4 | 5 | This program is free software: you can redistribute it and/or modify it under 6 | the terms of the Modified BSD-3 License as published by the Open Source 7 | Initiative. 8 | 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details. 12 | 13 | You should have received a copy of the Modified BSD-3 License along with this 14 | program. If not, see . 15 | """ 16 | 17 | import os 18 | import re 19 | import setuptools 20 | 21 | PACKAGE = 'nn_dataflow' 22 | DESC = 'Explore the energy-efficient dataflow scheduling for neural networks.' 23 | 24 | def _get_version(): 25 | here = os.path.abspath(os.path.dirname(__file__)) 26 | with open(os.path.join(here, PACKAGE, '__init__.py'), 'r') as fh: 27 | matches = re.findall(r'^\s*__version__\s*=\s*[\'"]([^\'"]+)[\'"]', 28 | fh.read(), re.M) 29 | if matches: 30 | return matches[-1] 31 | return '0.0.0' 32 | 33 | def _readme(): 34 | here = os.path.abspath(os.path.dirname(__file__)) 35 | with open(os.path.join(here, 'README.rst'), 'r') as fh: 36 | return fh.read() 37 | 38 | setuptools.setup( 39 | name=PACKAGE, 40 | version=_get_version(), 41 | description=DESC, 42 | 43 | author='Mingyu Gao', 44 | author_email='mgao12@stanford.edu', 45 | long_description=_readme(), 46 | url='https://github.com/stanford-mast/nn_dataflow', 47 | license='BSD 3-clause', 48 | 49 | packages=setuptools.find_packages(), 50 | 51 | install_requires=[ 52 | 'argparse', 53 | 'coverage>=4', 54 | 'fastcache>=1', 55 | 'pytest>=3', 56 | 'pytest-cov>=2', 57 | 'pytest-xdist>=1', 58 | 'sympy>=1', 59 | ], 60 | 61 | entry_points={ 62 | 'console_scripts': [ 63 | 'nn_dataflow_search=nn_dataflow.tools.nn_dataflow_search:main', 64 | ] 65 | }, 66 | 67 | keywords='neural-network scheduling dataflow optimizer', 68 | classifiers=[ 69 | 'Development Status :: 3 - Alpha', 70 | 'Intended Audience :: Developers', 71 | 'Intended Audience :: Science/Research', 72 | 'License :: OSI Approved :: BSD License', 73 | 'Programming Language :: Python :: 3.6', 74 | 'Programming Language :: Python :: 3.7', 75 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 76 | 'Topic :: System :: Hardware', 77 | ], 78 | ) 79 | 80 | --------------------------------------------------------------------------------