├── .gitignore
├── .pylintrc
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.rst
├── nn_dataflow
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── buf_shr_scheme.py
    │   ├── cost.py
    │   ├── data_category_enum.py
    │   ├── data_dim_loops.py
    │   ├── data_layout.py
    │   ├── fmap_range.py
    │   ├── int_range.py
    │   ├── inter_layer_pipeline.py
    │   ├── layer.py
    │   ├── loop_blocking.py
    │   ├── loop_blocking_scheme.py
    │   ├── loop_blocking_solver.py
    │   ├── loop_enum.py
    │   ├── map_strategy.py
    │   ├── mem_hier_enum.py
    │   ├── nested_loop_desc.py
    │   ├── network.py
    │   ├── nn_dataflow.py
    │   ├── nn_dataflow_scheme.py
    │   ├── node_region.py
    │   ├── option.py
    │   ├── parallel_enum.py
    │   ├── partition.py
    │   ├── partition_scheme.py
    │   ├── phy_dim2.py
    │   ├── pipeline_segment.py
    │   ├── pipeline_segment_timing.py
    │   ├── resource.py
    │   ├── scheduling.py
    │   └── scheduling_constraint.py
    ├── nns
    │   ├── __init__.py
    │   ├── alex_net.py
    │   ├── googlenet.py
    │   ├── lstm_gnmt.py
    │   ├── lstm_phoneme.py
    │   ├── lstm_showtell.py
    │   ├── mlp_l.py
    │   ├── mlp_m.py
    │   ├── mlp_s.py
    │   ├── resnet152.py
    │   ├── resnet50.py
    │   ├── vgg19_net.py
    │   ├── vgg_net.py
    │   └── zfnet.py
    ├── tests
    │   ├── __init__.py
    │   ├── dataflow_test
    │   │   ├── __init__.py
    │   │   ├── test_nn_dataflow.py
    │   │   └── test_scheduling.py
    │   ├── loop_blocking_test
    │   │   ├── __init__.py
    │   │   ├── test_loop_blocking.py
    │   │   ├── test_loop_blocking_fixture.py
    │   │   ├── test_loop_blocking_partition.py
    │   │   ├── test_loop_blocking_scheme.py
    │   │   └── test_loop_blocking_solver.py
    │   ├── map_strategy_test
    │   │   ├── __init__.py
    │   │   ├── test_map_strategy.py
    │   │   ├── test_map_strategy_eyeriss.py
    │   │   └── test_map_strategy_fixture.py
    │   ├── nns_test
    │   │   ├── __init__.py
    │   │   └── test_nns.py
    │   ├── partition_test
    │   │   ├── __init__.py
    │   │   ├── test_gen_partition.py
    │   │   ├── test_partition_fixture.py
    │   │   ├── test_proc_data_range.py
    │   │   └── test_unit_nhops_to_proc_region.py
    │   ├── pipeline_test
    │   │   ├── __init__.py
    │   │   ├── test_inter_layer_pipeline.py
    │   │   ├── test_pipeline_fixture.py
    │   │   ├── test_pipeline_segment.py
    │   │   └── test_pipeline_segment_timing.py
    │   ├── tool_test
    │   │   ├── __init__.py
    │   │   ├── test_nn_dataflow_search.py
    │   │   └── test_nn_layer_stats.py
    │   └── unit_test
    │   │   ├── __init__.py
    │   │   ├── test_buf_shr_scheme.py
    │   │   ├── test_cost.py
    │   │   ├── test_data_dim_loops.py
    │   │   ├── test_data_layout.py
    │   │   ├── test_fmap_range.py
    │   │   ├── test_int_range.py
    │   │   ├── test_layer.py
    │   │   ├── test_nested_loop_desc.py
    │   │   ├── test_network.py
    │   │   ├── test_nn_dataflow_scheme.py
    │   │   ├── test_node_region.py
    │   │   ├── test_option.py
    │   │   ├── test_partition_scheme.py
    │   │   ├── test_phy_dim2.py
    │   │   ├── test_resource.py
    │   │   ├── test_scheduling_condition.py
    │   │   ├── test_scheduling_constraint.py
    │   │   ├── test_scheduling_result.py
    │   │   ├── test_util.py
    │   │   └── test_version.py
    ├── tools
    │   ├── __init__.py
    │   ├── nn_dataflow_search.py
    │   └── nn_layer_stats.py
    ├── util.py
    └── version.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | *$py.class
 3 | 
 4 | eggs/
 5 | .eggs/
 6 | *.egg-info/
 7 | *.egg
 8 | 
 9 | # Editor related.
10 | settings.json


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | # pylint configuration file
 2 | 
 3 | [MASTER]
 4 | extension-pkg-whitelist=numpy
 5 | 
 6 | [MESSAGES CONTROL]
 7 | disable=
 8 |     I0011,  # locally-disabled,
 9 |     C0305,  # trailing-newlines,
10 |     C0325,  # superfluous-parens,
11 |     C0415,  # import-outside-toplevel,
12 |     W0105,  # pointless-string-statement,
13 |     W0141,  # bad-builtin,
14 | 
15 | [BASIC]
16 | # Allow single-char and two-char variable names
17 | variable-rgx=[a-z_][a-z0-9_]*$
18 | 
19 | [DESIGN]
20 | max-args=25
21 | max-attributes=15
22 | max-branches=25
23 | max-locals=50
24 | max-module-lines=2000
25 | max-statements=200
26 | 
27 | [SIMILARITIES]
28 | ignore-imports=yes
29 | 
30 | [TYPECHECK]
31 | ignored-modules=numpy
32 | ignored-classes=numpy
33 | 
34 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |     - 3.6
 5 | 
 6 | install:
 7 |     - pip install .
 8 |     - pip install -r requirements.txt
 9 |     - pip install pylint
10 |     - pip install coveralls
11 | 
12 | script:
13 |     - pytest -n 12 --cov=nn_dataflow
14 | 
15 | after_success:
16 |     - pylint --disable=R0801 nn_dataflow/
17 |     - coveralls
18 | 
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, Tsinghua IDEAL, Stanford MAST
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt *.md *.rst
2 | include LICENSE
3 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://travis-ci.org/stanford-mast/nn_dataflow.svg?branch=master
  2 |     :target: https://travis-ci.org/stanford-mast/nn_dataflow
  3 | .. image:: https://coveralls.io/repos/github/stanford-mast/nn_dataflow/badge.svg?branch=master
  4 |     :target: https://coveralls.io/github/stanford-mast/nn_dataflow?branch=master
  5 | 
  6 | 
  7 | Neural Network Dataflow Scheduling
  8 | ==================================
  9 | 
 10 | This Python tool allows you to explore the energy-efficient dataflow scheduling
 11 | for neural networks (NNs), including array mapping, loop blocking and
 12 | reordering, and (coarse-grained) parallel processing within and across layers.
 13 | 
 14 | For hardware, we assume an Eyeriss-style NN accelerator [Chen16]_, i.e., a 2D
 15 | array of processing elements (PEs) with a local register file in each PE, and a
 16 | global SRAM buffer shared by all PEs. We further support a tiled architecture
 17 | with multiple nodes that can partition and process the NN computations in
 18 | parallel. Each node is an Eyeriss-style engine as above.
 19 | 
 20 | In software, we decouple the dataflow scheduling into three subproblems:
 21 | 
 22 | - Array mapping, which deals with mapping one 2D convolution computation (one
 23 |   2D ifmap convolves with one 2D filter to get one 2D ofmap) onto the hardware
 24 |   PE array. We support row stationary mapping [Chen16]_.
 25 | - Loop blocking and reordering, which decides the order between all 2D
 26 |   convolutions by blocking and reordering the nested loops. We support
 27 |   exhaustive search over all blocking and reordering schemes [Yang16]_, and
 28 |   analytical bypass solvers [Gao17]_.
 29 | - Parallel processing, which partitions the NN computations across the multiple
 30 |   tiled engines. We support both intra-layer and inter-layer parallelism. For
 31 |   intra-layer, we support batch partitioning, fmap partitioning, output
 32 |   partitioning, input partitioning, and the combination between them (hybrid)
 33 |   [Gao17]_. We also explore various dataflow optimizations including access
 34 |   forwarding and buffer sharing [Gao19]_. We use exhaustive search within each
 35 |   layer. For inter-layer, we support spatial pipelining (inter-layer
 36 |   pipelining) and temporal pipelining (time multiplexing without writing back
 37 |   intermediate data) as well as their optimized scheduling [Gao19]_. We use
 38 |   layer-wise greedy beam search across layers.
 39 | 
 40 | See the details in our ASPLOS'17 [Gao17]_ and ASPLOS'19 [Gao19]_ papers.
 41 | 
 42 | If you use this tool in your work, we kindly request that you reference our
 43 | paper(s) below, and send us a citation of your work.
 44 | 
 45 | - Gao et al., "TETRIS: Scalable and Efficient Neural Network Acceleration with
 46 |   3D Memory", in ASPLOS, April 2017.
 47 | 
 48 | - Gao et al., "TANGRAM: Optimized Coarse-Grained Dataflow for Scalable NN
 49 |   Accelerators", in ASPLOS. April 2019.
 50 | 
 51 | 
 52 | Install
 53 | -------
 54 | 
 55 | ``nn_dataflow`` supports Python 3.6 and above.
 56 | 
 57 | ``nn_dataflow`` can be directly used without installation if you have first
 58 | defined the environment variable ``PYTHONPATH`` to include the top directory path.
 59 | See the Usage section below for details.
 60 | 
 61 | ``nn_dataflow`` has been registered on `PyPI
 62 | <//pypi.org/project/nn-dataflow>`_, so it can be installed through
 63 | ``pip`` as::
 64 | 
 65 |     > pip install nn-dataflow
 66 | 
 67 | And ``pip`` will take care of all dependencies.
 68 | 
 69 | To only install ``nn_dataflow`` in local user install directory (without
 70 | ``sudo``), and/or to install in editable mode, at the top directory do::
 71 | 
 72 |     > pip install --user -e .
 73 | 
 74 | 
 75 | Usage
 76 | -----
 77 | 
 78 | First, define the NN structure in ``nn_dataflow/nns``. We already defined
 79 | several popular NNs for you, including AlexNet, VGG-16, GoogLeNet, ResNet-152,
 80 | etc.
 81 | 
 82 | Then, use ``nn_dataflow/tools/nn_dataflow_search.py`` to search for the optimal
 83 | dataflow for the NN. For detailed options, type::
 84 | 
 85 |     > python ./nn_dataflow/tools/nn_dataflow_search.py -h
 86 | 
 87 | You can specify NN batch size and word size, PE array dimensions, number of
 88 | tile nodes, register file and global buffer capacity, and the energy cost of
 89 | all components. Note that, the energy cost of array bus should be the average
 90 | energy of transferring the data from the buffer to one PE, *not* local neighbor
 91 | transfer; the unit static energy cost should be the static energy of *all*
 92 | nodes in one clock cycle.
 93 | 
 94 | Other options include:
 95 | 
 96 | - ``-g``, ``--goal``: ``E``, ``D``, or ``ED``. the optimization goal, e(nergy),
 97 |   d(elay), or ED product.
 98 | - ``--mem-type``: ``2D`` or ``3D``. With 2D memory, memory channels are only on
 99 |   the four corners of the chip; with 3D memory, memory channels are on the top
100 |   of all tile nodes (one per each).
101 | - ``--bus-width``: the multicast bus bit width in the PE array for one data
102 |   type. Set to 0 to ignore multicast overheads.
103 | - ``--dram-bw``: ``float`` or ``inf``. Total DRAM bandwidth for all tile nodes,
104 |   in bytes per cycle.
105 | - ``--disable-bypass``: a combination of ``i``, ``o``, ``f``, whether to
106 |   disallow global buffer bypass for ifmaps, ofmaps, and weights.
107 | - ``--solve-loopblocking``: whether to use analytical bypass solvers for loop
108 |   blocking and reordering. See [Gao17]_.
109 | - ``--hybrid-partitioning``: whether to use hybrid partitioning in [Gao17]_.
110 |   If not enabled, use naive partitioning, i.e., fmap partitioning for CONV
111 |   layers, and output partitioning for FC layers.
112 | - ``--batch-partitioning`` and ``--ifmap-partitioning``: whether the hybrid
113 |   partitioning also explores batch and input partitioning.
114 | - ``--enable-access-forwarding``: access forwarding, where the nodes fetch
115 |   disjoint subsets of data and forward them to other nodes. See [Gao19]_.
116 | - ``--enable-gbuf-sharing``: buffer sharing, where the global buffer capacity is
117 |   shared across nodes through NoC. See [Gao19]_.
118 | - ``--enable-save-writeback``: allow to elide the intermediate data writeback to
119 |   memory when switching between layers if it is possible to store the entire
120 |   data set in on-chip buffers.
121 | - ``--interlayer-partition``: whether to use inter-layer pipelining to
122 |   partition resources across multiple layers and process them simultaneously.
123 | - ``--layer-pipeline-time-overhead``, ``--layer-pipeline-max-degree``:
124 |   constrain the configuration space of inter-layer pipelining, by specifying
125 |   the maximum execution time overhead, or the maximum pipelining degree.
126 | - ``--disable-interlayer-opt``: disable optimizations and only allow basic
127 |   inter-layer pipelining.
128 | 
129 | 
130 | Code Structure
131 | --------------
132 | 
133 | - ``nn_dataflow``
134 |     - ``core``
135 |         - Top-level dataflow exploration: ``nn_dataflow``,
136 |           ``nn_dataflow_scheme``.
137 |         - Layer scheduling: ``scheduling``.
138 |         - Array mapping: ``map_strategy``.
139 |         - Loop blocking and reordering: ``loop_blocking``,
140 |           ``loop_blocking_scheme``, ``loop_blocking_solver``.
141 |         - Intra-layer partitioning: ``partition``, ``partition_scheme``,
142 |           ``buf_shr_scheme``.
143 |         - Inter-layer pipelining: ``inter_layer_pipeline``,
144 |           ``pipeline_segment``.
145 |         - Network and layer: ``network``, ``layer``.
146 |     - ``nns``: example NN definitions.
147 |     - ``tests``: unit tests.
148 |     - ``tools``: executables.
149 | 
150 | 
151 | Verification and Testing
152 | ------------------------
153 | 
154 | To verify the tool against the Eyeriss result [Chen16]_, see
155 | ``nn_dataflow/tests/dataflow_test/test_nn_dataflow.py``.
156 | 
157 | To run (unit) tests, do one of the following::
158 | 
159 |     > python -m unittest discover
160 | 
161 |     > python -m pytest
162 | 
163 |     > pytest
164 | 
165 | To check code coverage with ``pytest-cov`` plug-in::
166 | 
167 |     > pytest --cov=nn_dataflow
168 | 
169 | 
170 | Copyright & License
171 | -------------------
172 | 
173 | ``nn_dataflow`` is free software; you can redistribute it and/or modify it
174 | under the terms of the `BSD License <LICENSE>`__ as published by the Open
175 | Source Initiative, revised version.
176 | 
177 | ``nn_dataflow`` was originally written by Mingyu Gao at Stanford University,
178 | and per Stanford University policy, the copyright of this original code remains
179 | with the Board of Trustees of Leland Stanford Junior University.
180 | 
181 | 
182 | References
183 | ----------
184 | 
185 | .. [Gao19] Gao, Yang, Pu, Horowitz, and Kozyrakis, `TANGRAM: Optimized
186 |   Coarse-Grained Dataflow for Scalable NN Accelerators
187 |   <//dl.acm.org/citation.cfm?id=3297858.3304014>`__, in ASPLOS. April, 2019.
188 | 
189 | .. [Gao17] Gao, Pu, Yang, Horowitz, and Kozyrakis, `TETRIS: Scalable and
190 |   Efficient Neural Network Acceleration with 3D Memory
191 |   <//dl.acm.org/citation.cfm?id=3037697.3037702>`__, in ASPLOS. April, 2017.
192 | 
193 | .. [Chen16] Chen, Emer, and Sze, `Eyeriss: A Spatial Architecture for
194 |   Energy-Efficient Dataflow for Convolutional Neural Networks
195 |   <//dl.acm.org/citation.cfm?id=3001177>`__, in ISCA. June, 2016.
196 | 
197 | .. [Yang16] Yang, Pu, Rister, Bhagdikar, Richardson, Kvatinsky,
198 |   Ragan-Kelley, Pedram, and Horowitz, `A Systematic Approach to Blocking
199 |   Convolutional Neural Networks <//arxiv.org/abs/1606.04209>`__, arXiv
200 |   preprint, 2016.
201 | 
202 | 


--------------------------------------------------------------------------------
/nn_dataflow/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | __version__ = '2.1'
18 | 
19 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from . import loop_blocking
18 | from . import loop_blocking_solver
19 | from . import partition
20 | from . import data_category_enum as DataCategoryEnum
21 | from . import loop_enum as LoopEnum
22 | from . import mem_hier_enum as MemHierEnum
23 | from . import parallel_enum as ParallelEnum
24 | from .buf_shr_scheme import BufShrScheme
25 | from .cost import Cost
26 | from .data_dim_loops import DataDimLoops
27 | from .data_layout import DataLayout
28 | from .fmap_range import FmapPosition, FmapRange, FmapRangeMap
29 | from .int_range import IntRange
30 | from .inter_layer_pipeline import InterLayerPipeline
31 | from .layer import Layer, InputLayer, ConvLayer, FCLayer, \
32 |         LocalRegionLayer, PoolingLayer, EltwiseLayer
33 | from .loop_blocking_scheme import LoopBlockingScheme
34 | from .map_strategy import MapStrategy, MapStrategyEyeriss
35 | from .nested_loop_desc import NestedLoopDesc
36 | from .network import Network
37 | from .node_region import NodeRegion
38 | from .nn_dataflow_scheme import NNDataflowScheme
39 | from .option import Option
40 | from .partition_scheme import PartitionScheme
41 | from .phy_dim2 import PhyDim2
42 | from .pipeline_segment import PipelineSegment
43 | from .pipeline_segment_timing import PipelineSegmentTiming
44 | from .resource import Resource
45 | from .scheduling import SchedulingCondition, SchedulingResult, Scheduling
46 | from .scheduling_constraint import SchedulingConstraint, \
47 |         SchedulingConstraintLayerPipeline
48 | 
49 | from .nn_dataflow import NNDataflow
50 | 
51 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/cost.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from collections import namedtuple
18 | 
19 | from . import mem_hier_enum as me
20 | 
21 | COST_LIST = ['mac_op',
22 |              'mem_hier',
23 |              'noc_hop',
24 |              'idl_unit',
25 |             ]
26 | 
27 | class Cost(namedtuple('Cost', COST_LIST)):
28 |     '''
29 |     Cost specification, including MAC operation cost, memory hierarchy cost,
30 |     NoC hop cost, and idle unit-time cost.
31 |     '''
32 | 
33 |     def __new__(cls, *args, **kwargs):
34 |         ntp = super(Cost, cls).__new__(cls, *args, **kwargs)
35 | 
36 |         if hasattr(ntp.mac_op, '__len__'):
37 |             raise TypeError('Cost: mac_op must be a scalar')
38 |         if not isinstance(ntp.mem_hier, tuple):
39 |             raise TypeError('Cost: mem_hier must be a tuple')
40 |         if len(ntp.mem_hier) != me.NUM:
41 |             raise ValueError('Cost: mem_hier must have length {}'
42 |                              .format(me.NUM))
43 |         if hasattr(ntp.noc_hop, '__len__'):
44 |             raise TypeError('Cost: noc_hop must be a scalar')
45 |         if hasattr(ntp.idl_unit, '__len__'):
46 |             raise TypeError('Cost: idl_unit must be a scalar')
47 | 
48 |         return ntp
49 | 
50 |     def mem_hier_at(self, mhe):
51 |         '''
52 |         Return cost of memory hierarchy level `mhe`.
53 |         '''
54 |         try:
55 |             return self.mem_hier[mhe]
56 |         except (IndexError, TypeError):
57 |             return None
58 | 
59 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/data_category_enum.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | '''
18 | Enum for data types.
19 | '''
20 | FIL = 0
21 | IFM = 1
22 | OFM = 2
23 | NUM = 3
24 | 
25 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/data_dim_loops.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from . import loop_enum as le
18 | from .. import util
19 | 
20 | class DataDimLoops(util.ContentHashClass):
21 |     '''
22 |     A tuple of loops that are the dimensions of the data.
23 |     '''
24 | 
25 |     def __init__(self, *lpe_list):
26 |         for lpe in lpe_list:
27 |             if lpe not in range(le.NUM):
28 |                 raise ValueError('DataDimLoops: arguments must be LoopEnum.')
29 | 
30 |         self.lpe_tuple = tuple(sorted(set(lpe_list)))
31 | 
32 |     def loops(self):
33 |         '''
34 |         Get the loops that are the dimensions of the data.
35 |         '''
36 |         return self.lpe_tuple
37 | 
38 |     def take(self, lpe_indexed):
39 |         '''
40 |         Get the elements in `lpe_indexed` that correspond to the loops of the
41 |         data.
42 |         '''
43 |         return [lpe_indexed[lpe] for lpe in self.lpe_tuple]
44 | 
45 |     def drop(self, lpe_indexed):
46 |         '''
47 |         Get the elements in `lpe_indexed` that do not correspond to the loops
48 |         of the data.
49 |         '''
50 |         return [lpe_indexed[lpe] for lpe in range(le.NUM)
51 |                 if lpe not in self.lpe_tuple]
52 | 
53 |     def __repr__(self):
54 |         return '{}({})'.format(
55 |             self.__class__.__name__,
56 |             ', '.join([repr(lpe) for lpe in self.lpe_tuple]))
57 | 
58 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/data_layout.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | from collections import namedtuple
 18 | import itertools
 19 | 
 20 | from .fmap_range import FmapPosition, FmapRange, FmapRangeMap
 21 | from .node_region import NodeRegion
 22 | from .partition_scheme import PartitionScheme
 23 | 
 24 | DATA_LAYOUT_LIST = ['frngs',
 25 |                     'regions',
 26 |                     'parts',
 27 |                    ]
 28 | 
 29 | class DataLayout(namedtuple('DataLayout', DATA_LAYOUT_LIST)):
 30 |     '''
 31 |     The data layout for batched i/ofmap.
 32 |     '''
 33 | 
 34 |     def __new__(cls, *args, **kwargs):
 35 |         ntp = super(DataLayout, cls).__new__(cls, *args, **kwargs)
 36 | 
 37 |         if not isinstance(ntp.frngs, tuple):
 38 |             raise TypeError('DataLayout: frngs must be a tuple.')
 39 |         for fr in ntp.frngs:
 40 |             if not isinstance(fr, FmapRange):
 41 |                 raise TypeError('DataLayout: elements in frngs must be a '
 42 |                                 'FmapRange object.')
 43 |         if not isinstance(ntp.regions, tuple):
 44 |             raise TypeError('DataLayout: regions must be a tuple.')
 45 |         for nr in ntp.regions:
 46 |             if not isinstance(nr, NodeRegion):
 47 |                 raise TypeError('DataLayout: elements in regions must be a '
 48 |                                 'NodeRegion object.')
 49 |         if not isinstance(ntp.parts, tuple):
 50 |             raise TypeError('DataLayout: parts must be a tuple.')
 51 |         for p in ntp.parts:
 52 |             if not isinstance(p, PartitionScheme):
 53 |                 raise TypeError('DataLayout: elements in parts must be a '
 54 |                                 'PartitionScheme object.')
 55 | 
 56 |         cls._validate_frngs(ntp.frngs)
 57 |         cls._validate_parts(ntp.parts, ntp.regions)
 58 | 
 59 |         if not len(ntp.frngs) == len(ntp.regions) == len(ntp.parts):
 60 |             raise ValueError('DataLayout: {} must have the same length.'
 61 |                              .format(', '.join(DATA_LAYOUT_LIST)))
 62 | 
 63 |         return ntp
 64 | 
 65 |     def complete_fmap_range(self):
 66 |         '''
 67 |         Get the complete FmapRange, i.e., a perfect hyper cube starting from
 68 |         origin point (0, ..., 0) with no holes.
 69 |         '''
 70 |         return FmapRange(self.frngs[0].fp_beg, self.frngs[-1].fp_end)
 71 | 
 72 |     def fmap_range_map(self):
 73 |         '''
 74 |         Get an `FmapRangeMap` instance, mapping from fmap range to absolute
 75 |         node coordinate.
 76 |         '''
 77 |         frmap = FmapRangeMap()
 78 | 
 79 |         for frng, region, part in zip(self.frngs, self.regions, self.parts):
 80 | 
 81 |             for pidx in part.gen_pidx():
 82 |                 pcoord = part.coordinate(region, pidx)
 83 |                 pfrng = part.fmap_range(frng, pidx)
 84 | 
 85 |                 frmap.add(pfrng, pcoord)
 86 | 
 87 |         return frmap
 88 | 
 89 |     def nhops_to(self, fmap_range, *dest_list, **kwargs):
 90 |         '''
 91 |         Get the total number of hops to transfer the FmapRange `fmap_range` to
 92 |         destinations `dest_list` given as a list of absolute coordinates.
 93 | 
 94 |         If `forwarding` is True, the data can be forwarded between destinations
 95 |         rather than all from the source.
 96 |         '''
 97 |         forwarding = kwargs.pop('forwarding', False)
 98 |         if kwargs:
 99 |             raise ValueError('DataLayout: method nhops_to() got an unexpected '
100 |                              'keyword argument: {}.'
101 |                              .format(kwargs.popitem()[0]))
102 | 
103 |         # The number of hops to transfer data to each destination individually.
104 |         nhops_list = [0] * len(dest_list)
105 | 
106 |         for frng, region, part in zip(self.frngs, self.regions, self.parts):
107 | 
108 |             # Skip non-overlapped fmap range.
109 |             if fmap_range.overlap_size(frng) == 0:
110 |                 continue
111 | 
112 |             for pidx in part.gen_pidx():
113 |                 psrc = part.coordinate(region, pidx)
114 |                 pfrng = part.fmap_range(frng, pidx)
115 |                 size = fmap_range.overlap_size(pfrng)
116 | 
117 |                 nhops_list = [n + size * d.hop_dist(psrc)
118 |                               for n, d in zip(nhops_list, dest_list)]
119 | 
120 |         if forwarding:
121 |             # The number of hops to the first node and its coordinate.
122 |             nhops, coord = min(zip(nhops_list, dest_list))
123 | 
124 |             # Size of all data.
125 |             total_size = self.complete_fmap_range().overlap_size(fmap_range)
126 | 
127 |             # Data can be forwarded from all sources to any destination.
128 |             src_set = {coord}
129 |             dst_set = set(dest_list) - src_set
130 | 
131 |             while dst_set:
132 |                 # Each forward step, get the min-distance pair of source and
133 |                 # destination.
134 |                 src, dst = min(itertools.product(src_set, dst_set),
135 |                                key=lambda sd: sd[1].hop_dist(sd[0]))
136 |                 dst_set.remove(dst)
137 |                 src_set.add(dst)
138 |                 nhops += total_size * dst.hop_dist(src)
139 | 
140 |         else:
141 |             nhops = sum(nhops_list)
142 | 
143 |         return nhops
144 | 
145 |     def is_in(self, *regions):
146 |         '''
147 |         Whether the layout is completely in the given NodeRegion's `regions`.
148 |         Region types must match. Each fmap range can be split into multiple
149 |         given regions.
150 |         '''
151 |         return all(any(region.type == r.type and r.contains_node(coord)
152 |                        for r in regions)
153 |                    for region in self.regions for coord in region.iter_node())
154 | 
155 |     @classmethod
156 |     def concat(cls, *data_layout_list):
157 |         '''
158 |         Concatenate multiple `DataLayout` objects along the channel dimension.
159 |         '''
160 |         frngs = []
161 |         regions = []
162 |         parts = []
163 | 
164 |         n_offset = 0
165 | 
166 |         for dl in data_layout_list:
167 | 
168 |             # Check type.
169 |             if not isinstance(dl, DataLayout):
170 |                 raise TypeError('DataLayout: only DataLayout object can be '
171 |                                 'concatenated.')
172 | 
173 |             # Concatenate frngs along n dimension.
174 |             for frng in dl.frngs:
175 |                 fpb = frng.fp_beg
176 |                 fpe = frng.fp_end
177 |                 frng2 = FmapRange(FmapPosition(b=fpb.b, n=fpb.n + n_offset,
178 |                                                h=fpb.h, w=fpb.w),
179 |                                   FmapPosition(b=fpe.b, n=fpe.n + n_offset,
180 |                                                h=fpe.h, w=fpe.w))
181 |                 frngs.append(frng2)
182 |                 n_offset += frng.size('n')
183 | 
184 |             # Regions and partitions are the same.
185 |             regions += dl.regions
186 |             parts += dl.parts
187 | 
188 |         return DataLayout(frngs=tuple(frngs), regions=tuple(regions),
189 |                           parts=tuple(parts))
190 | 
191 |     @classmethod
192 |     def _validate_frngs(cls, frngs):
193 |         '''
194 |         Validate the fmap ranges.
195 |         '''
196 |         if not frngs:
197 |             raise ValueError('DataLayout: no frngs.')
198 | 
199 |         _, n_end = frngs[0].beg_end('n')
200 |         bhw_beg_end = frngs[0].beg_end('b', 'h', 'w')
201 | 
202 |         if frngs[0].fp_beg != FmapPosition(0, 0, 0, 0):
203 |             raise ValueError('DataLayout: frngs must begin at 0.')
204 | 
205 |         for frng in frngs[1:]:
206 |             if frng.beg_end('b', 'h', 'w') != bhw_beg_end:
207 |                 raise ValueError('DataLayout: frng dim b, h, w mismatch.')
208 |             nb, ne = frng.beg_end('n')
209 |             if nb != n_end:
210 |                 raise ValueError('DataLayout: frng dim n is discontinuous.')
211 |             n_end = ne
212 | 
213 |     @classmethod
214 |     def _validate_parts(cls, parts, regions):
215 |         '''
216 |         Validate the partitioning schemes.
217 |         '''
218 |         for region, part in zip(regions, parts):
219 |             if not part.is_applicable_to_fmap_range():
220 |                 raise ValueError('DataLayout: invalid partitioning scheme for '
221 |                                  'fmap range.')
222 | 
223 |             if any(pd > rd for pd, rd in zip(part.dim(), region.dim)):
224 |                 raise ValueError('DataLayout: partitioning scheme does not fit '
225 |                                  'in node region.')
226 | 
227 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/int_range.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from collections import namedtuple
18 | import numbers
19 | 
20 | class IntRange(namedtuple('IntRange', ['beg', 'end'])):
21 |     '''
22 |     A range of integer numbers.
23 |     '''
24 | 
25 |     def __new__(cls, *args, **kwargs):
26 |         ntp = super(IntRange, cls).__new__(cls, *args, **kwargs)
27 | 
28 |         if not isinstance(ntp.beg, numbers.Integral):
29 |             raise TypeError('IntRange: begin value must be an integer.')
30 |         if not isinstance(ntp.end, numbers.Integral):
31 |             raise TypeError('IntRange: end value must be an integer.')
32 |         if ntp.beg > ntp.end:
33 |             raise ValueError('IntRange: begin value {} > end value {}?'
34 |                              .format(ntp.beg, ntp.end))
35 | 
36 |         return ntp
37 | 
38 |     def size(self):
39 |         '''
40 |         Get the size of the range.
41 |         '''
42 |         return self.end - self.beg
43 | 
44 |     def empty(self):
45 |         '''
46 |         Whether the range is empty.
47 |         '''
48 |         return self.beg == self.end
49 | 
50 |     def range(self):
51 |         '''
52 |         Generator for the range.
53 |         '''
54 |         for v in range(self.beg, self.end):
55 |             yield v
56 | 
57 |     def overlap(self, other):
58 |         '''
59 |         Get the overlapped IntRange of the two.
60 |         '''
61 |         if not isinstance(other, IntRange):
62 |             raise TypeError('IntRange: an IntRange object is required.')
63 |         try:
64 |             return IntRange(max(self.beg, other.beg), min(self.end, other.end))
65 |         except ValueError:
66 |             # Non-overlapped.
67 |             return IntRange(0, 0)
68 | 
69 |     def offset(self, val):
70 |         '''
71 |         Get a new IntRange by offseting `val`.
72 |         '''
73 |         return IntRange(self.beg + val, self.end + val)
74 | 
75 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/loop_blocking.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import heapq
 18 | import itertools
 19 | from multiprocessing.pool import Pool
 20 | 
 21 | from . import loop_blocking_solver
 22 | from . import loop_enum as le
 23 | from .. import util
 24 | from .buf_shr_scheme import BufShrScheme
 25 | from .layer import ConvLayer
 26 | from .loop_blocking_scheme import LoopBlockingScheme
 27 | 
 28 | '''
 29 | Loop blocking optimization.
 30 | 
 31 | Include loop blocking and reordering.
 32 | 
 33 | For our problem, only deal with nifm, nofm, and batch loops.
 34 | '''
 35 | 
 36 | def skip_conv(bl_ts, bl_ords):
 37 |     '''
 38 |     Skip the given loop blocking scheme for CONV layer, if it has regularized
 39 |     equivalent, or it is suboptimal.
 40 | 
 41 |     Equivalence of loop blocking schemes:
 42 | 
 43 |     - changing the position of a trivial loop (with blocking factor 1) makes no
 44 |       difference to the access pattern.
 45 |     - reorder non-innermost non-trivial loops has no effect on reuse, although
 46 |       the access pattern changes.
 47 | 
 48 |     Therefore a scheme is regularized if:
 49 | 
 50 |     - all the trivial loops (with blocking factor 1) are at the outermost of
 51 |       this level, and are in order, i.e., smaller LoopEnum at inner.
 52 |     - the non-innermost non-trivial loops are in order, i.e., smaller LoopEnum
 53 |       at inner.
 54 | 
 55 |     A scheme is suboptimal if the closest innermost non-trivial loop of an
 56 |     outer level (skipping the levels with all trivial loops) is the same type
 57 |     (i.e., has the same LoopEnum value) as one of the non-innermost non-trivial
 58 |     loops of this level. For the last (innermost) level, all non-trivial loops
 59 |     should be considered, i.e., no innermost non-trivial loop.
 60 | 
 61 |     This is because an equivalent scheme can reorder the non-innermost loops to
 62 |     put the one loop adjacent to the outer-level innermost loop. Then this loop
 63 |     can be merged to the outer level, which results in the same access pattern
 64 |     but has smaller data size for this level.
 65 |     '''
 66 | 
 67 |     outer_level_innermost_nt_loop = None
 68 | 
 69 |     for t_, ord_ in itertools.zip_longest(bl_ts, bl_ords, fillvalue=None):
 70 | 
 71 |         # Non-trivial loops.
 72 |         nt_loops = [lpe for lpe in range(le.NUM) if t_[lpe] > 1]
 73 | 
 74 |         # Innermost non-trivial loops.
 75 |         try:
 76 |             innermost_nt_loop = min(nt_loops, key=lambda lpe, o=ord_: o[lpe])
 77 |         except (ValueError, TypeError):
 78 |             # All trivial loops, or order is None type (last level).
 79 |             innermost_nt_loop = None
 80 | 
 81 |         # Scheme is suboptimal if the outer-level innermost non-trivial loop is
 82 |         # a non-innermost non-trivial loops at this level.
 83 |         if outer_level_innermost_nt_loop != innermost_nt_loop \
 84 |                 and outer_level_innermost_nt_loop in nt_loops:
 85 |             return True
 86 |         if innermost_nt_loop is not None:
 87 |             outer_level_innermost_nt_loop = innermost_nt_loop
 88 | 
 89 |         if ord_:
 90 |             # Order the LoopEnum values, from innermost to outermost.
 91 |             # The sort key is a three-tuple:
 92 |             # - innermost non-trivial loop should be kept at the innermost.
 93 |             # - non-trivial loops should be inside trivial loops.
 94 |             # - within each part, order by LoopEnum value.
 95 |             lp_ord = sorted(range(le.NUM),
 96 |                             key=lambda lpe, inl=innermost_nt_loop, nls=nt_loops:
 97 |                             (lpe != inl, lpe not in nls, lpe))
 98 | 
 99 |             if any(lp_ord[ord_[lpe]] != lpe for lpe in range(le.NUM)):
100 |                 return True
101 | 
102 |     return False
103 | 
104 | 
105 | def _loop_blocking_cmp_key(options, cost):
106 |     if options.opt_goal == 'ed':
107 |         return lambda lbs: lbs.get_access_cost(cost) * lbs.time
108 |     if options.opt_goal == 'd':
109 |         return lambda lbs: (lbs.time, lbs.get_access_cost(cost))
110 |     assert options.opt_goal == 'e'
111 |     return lambda lbs: (lbs.get_access_cost(cost), lbs.time)
112 | 
113 | 
114 | def _gen_loopblocking_perprocess(
115 |         nested_loop_desc, resource, bufshr, constraint, cost, options,
116 |         gen_tifm, gen_tofm, gen_tbat, gen_ords):
117 | 
118 |     def _gen_bl_ts():
119 |         '''
120 |         Generator for blocking factors.
121 | 
122 |         Transpose LoopEnum-major to BL-major.
123 |         '''
124 |         gen_lp_ts = [None] * le.NUM
125 |         gen_lp_ts[le.IFM], gen_lp_ts[le.OFM], gen_lp_ts[le.BAT] = \
126 |                 constraint.filter_gen_ts(gen_tifm, gen_tofm, gen_tbat)
127 |         for lp_ts in itertools.product(*gen_lp_ts):
128 |             bl_ts = tuple(zip(*lp_ts))
129 |             yield bl_ts
130 | 
131 |     def _sweep():
132 |         ''' Sweep all. '''
133 |         is_conv_loops = (nested_loop_desc.data_loops == ConvLayer.data_loops())
134 |         for bl_ts, bl_ords in itertools.product(_gen_bl_ts(), gen_ords):
135 |             if is_conv_loops and skip_conv(bl_ts, bl_ords):
136 |                 continue
137 |             if not constraint.is_valid_top_bl(bl_ts[0], bl_ords[0]):
138 |                 continue
139 |             lbs = LoopBlockingScheme(
140 |                 nested_loop_desc, bl_ts, bl_ords, resource, bufshr,
141 |                 options)
142 |             yield lbs
143 | 
144 |     return heapq.nsmallest(options.ntops, _sweep(),
145 |                            key=_loop_blocking_cmp_key(options, cost))
146 | 
147 | 
148 | def gen_loopblocking(nested_loop_desc, resource, part, constraint, cost,
149 |                      options):
150 |     '''
151 |     Generator for loop blocking.
152 |     '''
153 | 
154 |     # Buffer sharing scheme.
155 |     bufshr = BufShrScheme(resource.proc_region, part,
156 |                           nested_loop_desc.data_loops)
157 | 
158 |     # Solver only works for CONV layer.
159 |     if options.sw_solve_loopblocking \
160 |             and nested_loop_desc.data_loops == ConvLayer.data_loops():
161 |         gen = loop_blocking_solver.gen_loopblocking_gbuf_reside
162 | 
163 |         for bl_ts, bl_ords in gen(nested_loop_desc, resource, options):
164 |             lbs = LoopBlockingScheme(nested_loop_desc, bl_ts, bl_ords,
165 |                                      resource, bufshr, options)
166 |             if constraint.is_valid_top_bl(lbs.bl_ts[0], lbs.bl_ords[0]):
167 |                 yield lbs
168 |         return
169 | 
170 |     ## Exhaustive search.
171 | 
172 |     results = []
173 | 
174 |     def retrieve_result():
175 |         ''' Retrieve results from multiprocessing.Pool. '''
176 |         for r in results:
177 |             for t in r.get(timeout=3600):
178 |                 yield t
179 | 
180 |     def retrieve_result_st():
181 |         ''' Retrieve results from single-process processing. '''
182 |         for r in results:
183 |             for t in r:
184 |                 yield t
185 | 
186 |     if options.nprocesses > 1:
187 |         pool = Pool(processes=options.nprocesses)
188 |         apply_func = pool.apply_async
189 |         retrieve_func = retrieve_result()
190 |     else:
191 |         pool = None
192 |         apply_func = util.apply
193 |         retrieve_func = retrieve_result_st()
194 | 
195 |     # Exhaustive generators.
196 |     gen_tifm = util.factorize(nested_loop_desc.loopcnt[le.IFM], 3)
197 |     gen_tofm = util.factorize(nested_loop_desc.loopcnt[le.OFM], 3)
198 |     gen_tbat = util.factorize(nested_loop_desc.loopcnt[le.BAT], 3)
199 |     gen_ords = itertools.product(itertools.permutations(range(le.NUM)),
200 |                                  itertools.permutations(range(le.NUM)))
201 | 
202 |     # Split the design space for multiprocessing.
203 |     # Let each process factorize tbat and orders, which constantly have many
204 |     # factors that can amortize the multiprocessing overhead.
205 |     # Note that we must materialize them into lists, since generators cannot be
206 |     # pickled. See
207 |     # http://peadrop.com/blog/2009/12/29/why-you-cannot-pickle-generators/
208 |     list_tbat = list(gen_tbat)
209 |     list_ords = list(gen_ords)
210 |     for tifm, tofm in itertools.product(gen_tifm, gen_tofm):
211 |         r = apply_func(_gen_loopblocking_perprocess,
212 |                        (nested_loop_desc, resource, bufshr, constraint, cost,
213 |                         options, [tifm], [tofm], list_tbat, list_ords))
214 |         results.append(r)
215 | 
216 |     for lbs in heapq.nsmallest(options.ntops, retrieve_func,
217 |                                key=_loop_blocking_cmp_key(options, cost)):
218 |         yield lbs
219 | 
220 |     if pool is not None:
221 |         pool.close()
222 |         pool.join()
223 | 
224 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/loop_blocking_solver.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import math
 18 | 
 19 | from . import data_category_enum as de
 20 | from . import loop_enum as le
 21 | from .. import util
 22 | from .layer import ConvLayer
 23 | 
 24 | '''
 25 | Analytical solvers for loop blocking.
 26 | '''
 27 | 
 28 | def _solve_gbuf_reside(nested_loop_desc, resource, reside_dce):
 29 |     '''
 30 |     Solve the analytical optimal loop blocking scheme, with the given data
 31 |     category `reside_dce` is the only one in GBUF; all the other data
 32 |     categories bypass GBUF.
 33 | 
 34 |     At the GBUF blocking level, the loops for the reside data category are at
 35 |     the outer, meaning it is only accessed once into GBUF. The others bypass
 36 |     GBUF and are streamed multiple times from DRAM to REGF.
 37 | 
 38 |     Let x, y, z be the three LoopEnum values, and x, y are for `reside_dce`,
 39 |     then the nested loop is:
 40 | 
 41 |     tx0/ty0, tz0, (tz1 = 1), tx1/ty1 (= 1, ?), tx2/ty2/tz2
 42 | 
 43 |     Note that tz1 = 1 is required since tz0 is the innermost of the outer level
 44 |     (otherwise tz1 can merge into tz0). The REGF level can only allow one
 45 |     non-trivial loop, so either tx1 or ty1 must also be 1.
 46 | 
 47 |     Opt I.
 48 | 
 49 |     min accesses to DRAM =
 50 |         (Nx * Ny * sgxy) * fxy + (Ny * Nz * sgyz) * fyz * tx0
 51 |         + (Nx * Nz * sgxz) * fxz * ty0
 52 |     s.t.
 53 |         1 <= tx0 <= Nx
 54 |         1 <= ty0 <= Ny
 55 |         (Nx // tx0) * (Ny // ty0) * sgxy <= Sgbuf
 56 |         min{(srxy + srxz) * (Nx // tx0) + sryz,
 57 |             (srxy + sryz) * (Ny // ty0) + srxz} <= Sregf
 58 | 
 59 |     Nx, Ny, Nz are the total loop factors.
 60 |     sgxy, sgyz, sgxz are the data unit sizes in GBUF.
 61 |     srxy, sryz, srxz are the data unit sizes in REGF.
 62 | 
 63 |     The last constraint is for the feasibility of REGF capacity. Note that tz2
 64 |     could be 1. If ty2 is minimized to 1 (so ty1 is not 1), tx1 must be 1;
 65 |     similarly, if tx2 is minimized to 1 (so tx1 is not 1), ty1 must be 1. At
 66 |     least one of these two cases must be feasible for REGF capacity.
 67 | 
 68 |     Although opt I is a convex optimization, we need to further require tx0 and
 69 |     ty0 to be factors of Nx and Ny, respectively. So we use exhaustive search
 70 |     to solve opt I.
 71 | 
 72 |     Opt II.
 73 | 
 74 |     min fetch to GBUF for `reside_dce` =
 75 |         1           if tx1 = ty1 = 1
 76 |         tz0         elsewise
 77 |     s.t.
 78 |         tx2 * ty2 * srxy + ty2 * tz2 * sryz + tx2 * tz2 * srxz <= Sregf
 79 | 
 80 |     If tx1 and ty1 could be 1, which means the reside data category could put
 81 |     all GBUF data Nx // tx0 and Ny // ty0 directly into REGF, then it is the
 82 |     optimal case.
 83 | 
 84 |     Otherwise, since tz1 = 1, min tz0 is equivalent to
 85 | 
 86 |     max tz2 =
 87 |         (Sregf - tx2 * ty2 * srxy) / (ty2 * sryz + tx2 * srxz)
 88 | 
 89 |     Special adjustment.
 90 | 
 91 |     The above model assumes tz0 is a non-trivial loop. If the final solution
 92 |     has tz0 = 1, the bypass data categories may not bypass. For example, if ty0
 93 |     is the innermost loop of the top level, data xz will have 1 fetch to DRAM,
 94 |     but ty0 fetch to GBUF. So we have to adjust the scheme by merging tx1 or
 95 |     ty1 into tx0 or ty0, and ensure it to be the inner loop at the top level.
 96 |     '''
 97 | 
 98 |     ldce = [reside_dce]  # xy, yz, xz
 99 |     llpe = []  # x, y, z
100 |     lfacc = []  # xy, yz, xz
101 | 
102 |     if ldce[0] == de.FIL:
103 |         llpe += [le.IFM, le.OFM, le.BAT]
104 |         ldce += [de.OFM, de.IFM]
105 |         lfacc += [1., 2., 1.]
106 |     elif ldce[0] == de.IFM:
107 |         llpe += [le.IFM, le.BAT, le.OFM]
108 |         ldce += [de.OFM, de.FIL]
109 |         lfacc += [1., 2., 1.]
110 |     else:
111 |         assert ldce[0] == de.OFM
112 |         llpe += [le.OFM, le.BAT, le.IFM]
113 |         ldce += [de.IFM, de.FIL]
114 |         lfacc += [2., 1., 1.]
115 | 
116 |     lnum = [nested_loop_desc.loopcnt[lpe] for lpe in llpe]  # x, y, z
117 |     lsgbuf = [nested_loop_desc.usize_gbuf_of(dce) for dce in ldce]  # xy, yz, xz
118 |     lsregf = [nested_loop_desc.usize_regf_of(dce) for dce in ldce]  # xy, yz, xz
119 | 
120 |     size_gbuf, size_regf = resource.size_gbuf, resource.size_regf
121 | 
122 |     def goal_opt1(tx0, ty0):
123 |         ''' Opt I goal function. min goal(). '''
124 |         lnumloops = [lnum[0] * lnum[1], lnum[1] * lnum[2], lnum[0] * lnum[2]]
125 |         ltloops = [1, tx0, ty0]
126 |         return sum(util.prod(tpl) for tpl
127 |                    in zip(lnumloops, lsgbuf, lfacc, ltloops))
128 | 
129 |     def constraints_opt1(tx0, ty0):
130 |         ''' Opt I constraints. s.t. constraints(). '''
131 |         if (lnum[0] // tx0) * (lnum[1] // ty0) * lsgbuf[0] > size_gbuf:
132 |             return False
133 |         if min(lnum[0] // tx0 * (lsregf[0] + lsregf[2]) + lsregf[1],
134 |                lnum[1] // ty0 * (lsregf[0] + lsregf[1]) + lsregf[2]) \
135 |                        > size_regf:
136 |             return False
137 |         return True
138 | 
139 |     # Exhaustive search for opt I.
140 |     min_goal = float('inf')
141 |     for tx0_, _ in util.factorize(lnum[0], 2):
142 |         for ty0_, _ in util.factorize(lnum[1], 2):
143 |             # Satisfy constraints.
144 |             if not constraints_opt1(tx0_, ty0_):
145 |                 continue
146 |             # Minimize goal.
147 |             goal = goal_opt1(tx0_, ty0_)
148 |             if goal < min_goal:
149 |                 min_goal = goal
150 |                 tx0, ty0 = tx0_, ty0_
151 | 
152 |     def goal_opt2(tx2, ty2):
153 |         ''' Opt II goal function. max goal(). '''
154 |         tz2 = (size_regf - tx2 * ty2 * lsregf[0]) * 1. \
155 |                 / (ty2 * lsregf[1] + tx2 * lsregf[2])
156 |         if tz2 < 0:
157 |             return -float('inf')
158 |         tz2_adj = util.closest_factor(lnum[2], tz2)
159 |         if tz2_adj[0] <= tz2:
160 |             return tz2_adj[0]
161 |         return -float('inf')
162 | 
163 |     # Try tx1 = ty1 = 1.
164 |     tx2, ty2 = lnum[0] // tx0, lnum[1] // ty0
165 |     tz2 = goal_opt2(tx2, ty2)
166 | 
167 |     if math.isinf(tz2):
168 |         # Candidates of tx2, ty2.
169 |         txy2_cands = [(1, lnum[1] // ty0), (lnum[0] // tx0, 1)]
170 | 
171 |         # Select.
172 |         tx2, ty2 = max(txy2_cands, key=lambda txy2: goal_opt2(*txy2))
173 |         tz2 = goal_opt2(tx2, ty2)
174 | 
175 |     assert not math.isinf(tz2)
176 |     tz0 = lnum[2] // tz2
177 |     tx1 = lnum[0] // tx0 // tx2
178 |     ty1 = lnum[1] // ty0 // ty2
179 | 
180 |     # Loop orders.
181 |     # Loop z is at the innermost of the top level. Do not care x, y.
182 |     bl_ord_0 = [0] * le.NUM
183 |     bl_ord_0[llpe[0]] = 2
184 |     bl_ord_0[llpe[1]] = 1
185 |     bl_ord_0[llpe[2]] = 0
186 |     # The non-1 loop x or y is at the innermost of the middle level.
187 |     bl_ord_1 = [0] * le.NUM
188 |     bl_ord_1[llpe[0]] = 0 if tx1 > 1 else 1
189 |     bl_ord_1[llpe[1]] = 1 if tx1 > 1 else 0
190 |     bl_ord_1[llpe[2]] = 2
191 | 
192 |     # Special adjustment when tz0 = 1: merge tx1/ty1 into tx0/ty0.
193 |     if tz0 == 1:
194 |         tx0 *= tx1
195 |         tx1 = 1
196 |         ty0 *= ty1
197 |         ty1 = 1
198 |         # Also maintain the order.
199 |         bl_ord_0 = bl_ord_1
200 | 
201 |     # Compose return values.
202 |     lp_ts = [None] * le.NUM
203 |     lp_ts[llpe[0]] = (tx0, tx1, tx2)
204 |     lp_ts[llpe[1]] = (ty0, ty1, ty2)
205 |     lp_ts[llpe[2]] = (tz0, 1, tz2)
206 |     bl_ts = tuple(zip(*lp_ts))
207 | 
208 |     bl_ords = (tuple(bl_ord_0), tuple(bl_ord_1))
209 | 
210 |     return bl_ts, bl_ords
211 | 
212 | 
213 | def gen_loopblocking_gbuf_reside(nested_loop_desc, resource, options):
214 |     '''
215 |     Generator for loop blocking schemes that are solved from gbuf reside
216 |     analytical models.
217 |     '''
218 |     if nested_loop_desc.data_loops != ConvLayer.data_loops():
219 |         raise ValueError('loop_blocking_solver: solver only applies to '
220 |                          'CONV layer nested loops')
221 | 
222 |     reside_dce_list = []
223 |     # reside_dce_list is a list of DataCategoryEnum, each element is a config
224 |     # with only that data category in gbuf, i.e., the others are all bypassed.
225 |     for reside_dce in range(de.NUM):
226 |         if all(options.sw_gbuf_bypass[dce] for dce in range(de.NUM)
227 |                if dce != reside_dce):
228 |             reside_dce_list.append(reside_dce)
229 | 
230 |     for reside_dce in reside_dce_list:
231 |         yield _solve_gbuf_reside(nested_loop_desc, resource, reside_dce)
232 | 
233 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/loop_enum.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | '''
18 | Enum for loop types.
19 | '''
20 | IFM = 0
21 | OFM = 1
22 | BAT = 2
23 | NUM = 3
24 | 
25 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/mem_hier_enum.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | '''
18 | Enum for memory hierarchy.
19 | '''
20 | DRAM = 0
21 | GBUF = 1
22 | ITCN = 2
23 | REGF = 3
24 | NUM = 4
25 | 
26 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/nested_loop_desc.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | from collections import namedtuple
 18 | 
 19 | from . import data_category_enum as de
 20 | from . import loop_enum as le
 21 | from . import mem_hier_enum as me
 22 | from .. import util
 23 | from .data_dim_loops import DataDimLoops
 24 | 
 25 | NESTED_LOOP_DESC_LIST = ['loopcnt',
 26 |                          'usize_gbuf',
 27 |                          'usize_regf',
 28 |                          'unit_access',
 29 |                          'unit_ops',
 30 |                          'unit_time',
 31 |                          'data_loops',
 32 |                         ]
 33 | 
 34 | class NestedLoopDesc(namedtuple('NestedLoopDesc', NESTED_LOOP_DESC_LIST)):
 35 |     '''
 36 |     Naive nested loop description.
 37 | 
 38 |     For our problem, only deal with the loops given by `LoopEnum`.
 39 |     '''
 40 | 
 41 |     def __new__(cls, *args, **kwargs):
 42 |         ntp = super(NestedLoopDesc, cls).__new__(cls, *args, **kwargs)
 43 | 
 44 |         if not isinstance(ntp.loopcnt, tuple):
 45 |             raise TypeError('NestedLoopDesc: loopcnt must be a tuple.')
 46 |         if len(ntp.loopcnt) != le.NUM:
 47 |             raise ValueError('NestedLoopDesc: loopcnt must have length {}.'
 48 |                              .format(le.NUM))
 49 | 
 50 |         if not isinstance(ntp.usize_gbuf, tuple):
 51 |             raise TypeError('NestedLoopDesc: usize_gbuf must be a tuple.')
 52 |         if not isinstance(ntp.usize_regf, tuple):
 53 |             raise TypeError('NestedLoopDesc: usize_regf must be a tuple.')
 54 |         if len(ntp.usize_gbuf) != de.NUM:
 55 |             raise ValueError('NestedLoopDesc: usize_gbuf must have length {}.'
 56 |                              .format(de.NUM))
 57 |         if len(ntp.usize_regf) != de.NUM:
 58 |             raise ValueError('NestedLoopDesc: usize_regf must have length {}.'
 59 |                              .format(de.NUM))
 60 | 
 61 |         if not isinstance(ntp.unit_access, tuple):
 62 |             raise TypeError('NestedLoopDesc: unit_access must be a tuple.')
 63 |         if len(ntp.unit_access) != me.NUM:
 64 |             raise ValueError('NestedLoopDesc: unit_access must have length {}.'
 65 |                              .format(me.NUM))
 66 |         for ua in ntp.unit_access:
 67 |             if not isinstance(ua, tuple):
 68 |                 raise TypeError('NestedLoopDesc: element in unit_access '
 69 |                                 'must be a tuple.')
 70 |             if len(ua) != de.NUM:
 71 |                 raise ValueError('NestedLoopDesc: element in unit_access '
 72 |                                  'must have length {}.'.format(de.NUM))
 73 | 
 74 |         if not isinstance(ntp.data_loops, tuple):
 75 |             raise TypeError('NestedLoopDesc: data_loops must be a tuple.')
 76 |         if len(ntp.data_loops) != de.NUM:
 77 |             raise ValueError('NestedLoopDesc: data_loops must have length {}.'
 78 |                              .format(de.NUM))
 79 |         for dls in ntp.data_loops:
 80 |             if not isinstance(dls, DataDimLoops):
 81 |                 raise TypeError('NestedLoopDesc: element in data_loops '
 82 |                                 'must be a DataDimLoops instance.')
 83 | 
 84 |         return ntp
 85 | 
 86 |     def usize_gbuf_of(self, dce):
 87 |         '''
 88 |         Get the occupied gbuf size by data category `dce` for one loop body.
 89 |         '''
 90 |         return self.usize_gbuf[dce]
 91 | 
 92 |     def usize_regf_of(self, dce):
 93 |         '''
 94 |         Get the occupied regf size by data category `dce` for one loop body.
 95 |         '''
 96 |         return self.usize_regf[dce]
 97 | 
 98 |     def unit_access_at_of(self, mhe, dce=None):
 99 |         '''
100 |         Get the number of accesses for one loop body at memory hierarchy `mhe`
101 |         of data category `dce`.
102 | 
103 |         If `dce` is None, return total accesses of all data.
104 |         '''
105 |         if dce is None:
106 |             return sum(self.unit_access[mhe])
107 |         return self.unit_access[mhe][dce]
108 | 
109 |     def total_ops(self):
110 |         '''
111 |         Get the total number of ops for all loops.
112 |         '''
113 |         return self.unit_ops * util.prod(self.loopcnt)
114 | 
115 |     def total_access_at_of(self, mhe, dce=None):
116 |         '''
117 |         Get the total number of accesses, i.e., accessing all data once, at
118 |         memory hierarchy `mhe` of data category `dce`.
119 | 
120 |         If `dce` is None, return total accesses of all data.
121 |         '''
122 |         if dce is None:
123 |             return sum(self.total_access_at_of(mhe, dce2)
124 |                        for dce2 in range(de.NUM))
125 | 
126 |         return self.unit_access_at_of(mhe, dce) \
127 |                 * util.prod(self.data_loops[dce].take(self.loopcnt))
128 | 
129 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/node_region.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import itertools
 18 | from collections import namedtuple
 19 | 
 20 | from .. import util
 21 | from .phy_dim2 import PhyDim2
 22 | 
 23 | NODE_REGION_LIST = ['dim',
 24 |                     'origin',
 25 |                     'dist',
 26 |                     'type',
 27 |                     'wtot',
 28 |                     'wbeg',
 29 |                    ]
 30 | 
 31 | class NodeRegion(namedtuple('NodeRegion', NODE_REGION_LIST)):
 32 |     '''
 33 |     A node region defined by the dimension and origin offset.
 34 | 
 35 |     The `type` attribute specifies the region type, which could be `PROC` for
 36 |     computation processing nodes or 'DRAM' for off-chip data storage nodes.
 37 | 
 38 |     The node region can be optionally folded along the w dimension in a zig-zag
 39 |     manner. The folding scheme is defined by (wtot, wbeg). `wtot` is always
 40 |     positive, representing the number of nodes between two turns (total width).
 41 |     `wbeg` is the number of nodes before reaching the first turning boundary,
 42 |     with its sign representing the direction. E.g.,
 43 | 
 44 |     ...
 45 |     ******************
 46 |               ********
 47 |               | wbeg |
 48 | 
 49 |     or
 50 | 
 51 |     ...
 52 |     ******************
 53 |     *********
 54 |     | -wbeg |
 55 | 
 56 |     With folded region, `origin` points to the first node.
 57 | 
 58 |     NOTE: we cannot overload __contains__ and __iter__ as a node container,
 59 |     because the base namedtuple already defines them.
 60 |     '''
 61 | 
 62 |     # Type enums.
 63 |     PROC = 0
 64 |     DRAM = 1
 65 |     NUM = 2
 66 | 
 67 |     def __new__(cls, *args, **kwargs):
 68 | 
 69 |         # Set default values.
 70 |         kwargs2 = kwargs.copy()
 71 |         if len(args) <= NODE_REGION_LIST.index('dist'):
 72 |             kwargs2.setdefault('dist', PhyDim2(1, 1))
 73 |         if len(args) <= NODE_REGION_LIST.index('wtot'):
 74 |             # Default to dim.w but we haven't checked dim yet. Replace later.
 75 |             kwargs2.setdefault('wtot', None)
 76 |         if len(args) <= NODE_REGION_LIST.index('wbeg'):
 77 |             # Default to wtot. Also replace later.
 78 |             kwargs2.setdefault('wbeg', None)
 79 | 
 80 |         ntp = super(NodeRegion, cls).__new__(cls, *args, **kwargs2)
 81 | 
 82 |         if not isinstance(ntp.dim, PhyDim2):
 83 |             raise TypeError('NodeRegion: dim must be a PhyDim2 object.')
 84 |         if not isinstance(ntp.origin, PhyDim2):
 85 |             raise TypeError('NodeRegion: origin must be a PhyDim2 object.')
 86 |         if not isinstance(ntp.dist, PhyDim2):
 87 |             raise TypeError('NodeRegion: dist must be a PhyDim2 object.')
 88 | 
 89 |         if ntp.type not in range(cls.NUM):
 90 |             raise ValueError('NodeRegion: type must be a valid type enum.')
 91 | 
 92 |         if ntp.wtot is None:
 93 |             ntp = ntp._replace(wtot=ntp.dim.w)
 94 |         if ntp.wbeg is None:
 95 |             ntp = ntp._replace(wbeg=ntp.wtot)
 96 | 
 97 |         if not isinstance(ntp.wtot, int):
 98 |             raise TypeError('NodeRegion: wtot must be an int.')
 99 |         if not isinstance(ntp.wbeg, int):
100 |             raise TypeError('NodeRegion: wbeg must be an int.')
101 | 
102 |         if not (0 < abs(ntp.wbeg) <= ntp.wtot) and ntp.dim.size() > 0:
103 |             raise ValueError('NodeRegion: |wbeg| must be in (0, wtot].')
104 | 
105 |         return ntp
106 | 
107 |     def contains_node(self, coordinate):
108 |         ''' Whether the region contains the given absolute node coordinate. '''
109 |         return coordinate in self.iter_node()
110 | 
111 |     def iter_node(self):
112 |         ''' Iterate through all absolute node coordinates in the region. '''
113 |         for rel_coord in itertools.product(*[range(d) for d in self.dim]):
114 |             yield self.rel2abs(PhyDim2(*rel_coord))
115 | 
116 |     def rel2abs(self, rel_coordinate):
117 |         ''' Convert relative node coordinate to absolute node coordinate. '''
118 |         if not isinstance(rel_coordinate, PhyDim2):
119 |             raise TypeError('NodeRegion: relative coordinate must be '
120 |                             'a PhyDim2 object.')
121 |         if not all(0 <= c < d for c, d in zip(rel_coordinate, self.dim)):
122 |             raise ValueError('NodeRegion: relative coordinate {} is not in '
123 |                              'node region {}.'.format(rel_coordinate, self))
124 | 
125 |         # Add starting offset to start from the boundary before the first node,
126 |         # then modulo wtot to get the delta h and w to this boundary point.
127 |         h, w = divmod(rel_coordinate.w + self.wtot - abs(self.wbeg), self.wtot)
128 |         # Direction for w, changing every time when h increments.
129 |         direction = (-1 if self.wbeg < 0 else 1) * (-1 if h % 2 else 1)
130 |         # Make w relative to the left boundary.
131 |         w = w if direction > 0 else self.wtot - 1 - w
132 | 
133 |         abs_coordinate = self.origin \
134 |                 + PhyDim2(h=h * self.dim.h + rel_coordinate.h,
135 |                           w=w - (self.wtot - self.wbeg if self.wbeg > 0
136 |                                  else -self.wbeg - 1)) \
137 |                 * self.dist
138 | 
139 |         return abs_coordinate
140 | 
141 |     def allocate(self, request_list):
142 |         '''
143 |         Allocate node subregions spatially within the node region according to
144 |         the given `request_list` which is a list of numbers of nodes requested.
145 | 
146 |         Return a list of NodeRegion instances, whose origins are absolute
147 |         offset (not relative to the origin of self). The allocation may fail if
148 |         and only if the total number of nodes requested is larger than the
149 |         number of nodes in the region, in which case an empty list is returned.
150 | 
151 |         The strategy is to allocate stripe-wise in a zig-zag order, allowing
152 |         for folding in width. We first determine a stripe height as the
153 |         greatest common divisor of the requested numbers of nodes. Then
154 |         allocate each request as (stripe height, request size / stripe height)
155 |         to fill in the stripe, and move to the next stripe after the current
156 |         one is filled. If the width of a request is larger than the remaining
157 |         width of the current stripe, we use up the remaining width, and fold
158 |         the request width to the next stripe.
159 |         '''
160 | 
161 |         if sum(request_list) > self.dim.size():
162 |             return []
163 | 
164 |         hstrp = util.gcd(self.dim.h, *request_list)
165 |         subregions = []
166 | 
167 |         wtot = self.dim.w
168 |         ofs_h, ofs_w = 0, 0
169 |         move_right = True
170 | 
171 |         for req in request_list:
172 | 
173 |             # Subregion.
174 |             assert req % hstrp == 0
175 |             width = req // hstrp
176 | 
177 |             subdim = PhyDim2(hstrp, width)
178 |             if move_right:
179 |                 origin = PhyDim2(ofs_h, ofs_w)
180 |                 wbeg = min(wtot - ofs_w, width)
181 |                 assert wbeg > 0
182 |             else:
183 |                 origin = PhyDim2(ofs_h, self.dim.w - ofs_w - 1)
184 |                 wbeg = -min(wtot - ofs_w, width)
185 |                 assert wbeg < 0
186 | 
187 |             subregions.append(NodeRegion(dim=subdim,
188 |                                          origin=self.origin \
189 |                                             + origin * self.dist,
190 |                                          dist=self.dist,
191 |                                          type=self.type,
192 |                                          wtot=wtot,
193 |                                          wbeg=wbeg))
194 | 
195 |             # Move the offset
196 |             ofs_w += width
197 |             while ofs_w >= self.dim.w:
198 |                 # Overflow, fold to the next stripe.
199 |                 ofs_w -= self.dim.w
200 |                 ofs_h += hstrp
201 |                 move_right = not move_right
202 | 
203 |         # Not moved outside the region.
204 |         assert ofs_h + hstrp <= self.dim.h or ofs_w == 0
205 | 
206 |         return subregions
207 | 
208 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/option.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | from collections import namedtuple
 18 | 
 19 | from . import data_category_enum as de
 20 | 
 21 | OPTION_LIST = ['sw_gbuf_bypass',
 22 |                'sw_solve_loopblocking',
 23 |                'hw_access_forwarding',
 24 |                'hw_gbuf_sharing',
 25 |                'hw_gbuf_save_writeback',
 26 |                'partition_hybrid',
 27 |                'partition_batch',
 28 |                'partition_ifmaps',
 29 |                'partition_interlayer',
 30 |                'layer_pipeline_time_ovhd',
 31 |                'layer_pipeline_max_degree',
 32 |                'layer_pipeline_opt',
 33 |                'opt_goal',
 34 |                'ntops',
 35 |                'nprocesses',
 36 |                'verbose',
 37 |               ]
 38 | 
 39 | class Option(namedtuple('Option', OPTION_LIST)):
 40 |     '''
 41 |     Schedule options.
 42 |     '''
 43 | 
 44 |     def __new__(cls, *args, **kwargs):
 45 | 
 46 |         if len(args) > len(OPTION_LIST):
 47 |             raise TypeError('Option: can take at most {} arguments ({} given).'
 48 |                             .format(len(OPTION_LIST), len(args)))
 49 | 
 50 |         if not set(kwargs).issubset(OPTION_LIST):
 51 |             raise TypeError('Option: got an unexpected keyword argument {}.'
 52 |                             .format(next(k for k in kwargs
 53 |                                          if k not in OPTION_LIST)))
 54 | 
 55 |         # Combine args and kwargs.
 56 |         kwdict = kwargs.copy()
 57 |         for k, v in zip(OPTION_LIST, args):
 58 |             if k in kwdict:
 59 |                 raise TypeError('Option: got multiple values for '
 60 |                                 'keyword argument {}.'
 61 |                                 .format(k))
 62 |             kwdict[k] = v
 63 | 
 64 |         kwdict.setdefault('sw_gbuf_bypass', (False,) * de.NUM)
 65 |         kwdict.setdefault('sw_solve_loopblocking', False)
 66 |         kwdict.setdefault('hw_access_forwarding', False)
 67 |         kwdict.setdefault('hw_gbuf_sharing', False)
 68 |         kwdict.setdefault('hw_gbuf_save_writeback', False)
 69 |         kwdict.setdefault('partition_hybrid', False)
 70 |         kwdict.setdefault('partition_batch', False)
 71 |         kwdict.setdefault('partition_ifmaps', False)
 72 |         kwdict.setdefault('partition_interlayer', False)
 73 |         kwdict.setdefault('layer_pipeline_time_ovhd', float('inf'))
 74 |         kwdict.setdefault('layer_pipeline_max_degree', float('inf'))
 75 |         kwdict.setdefault('layer_pipeline_opt', True)
 76 |         kwdict.setdefault('opt_goal', 'e')
 77 |         kwdict.setdefault('ntops', 1)
 78 |         kwdict.setdefault('nprocesses', 1)
 79 |         kwdict.setdefault('verbose', False)
 80 | 
 81 |         assert set(kwdict) == set(OPTION_LIST)
 82 | 
 83 |         ntp = super(Option, cls).__new__(cls, **kwdict)
 84 | 
 85 |         if not isinstance(ntp.sw_gbuf_bypass, tuple):
 86 |             raise TypeError('Option: sw_gbuf_bypass must be a tuple')
 87 |         if len(ntp.sw_gbuf_bypass) != de.NUM:
 88 |             raise ValueError('Option: sw_gbuf_bypass must have length {}'
 89 |                              .format(de.NUM))
 90 | 
 91 |         if ntp.sw_solve_loopblocking and ntp.hw_gbuf_sharing:
 92 |             raise ValueError('Option: sw_solve_loopblocking and '
 93 |                              'hw_gbuf_sharing cannot be simultaneously '
 94 |                              'enabled.')
 95 | 
 96 |         if ntp.hw_access_forwarding and ntp.hw_gbuf_sharing:
 97 |             raise ValueError('Option: hw_access_forwarding is implied by '
 98 |                              'hw_gbuf_sharing, thus cannot be both enabled.')
 99 | 
100 |         if ntp.sw_solve_loopblocking and ntp.hw_gbuf_save_writeback:
101 |             raise ValueError('Option: sw_solve_loopblocking and '
102 |                              'hw_gbuf_save_writeback cannot be simultaneously '
103 |                              'enabled.')
104 | 
105 |         if ntp.partition_ifmaps and not ntp.partition_hybrid:
106 |             raise ValueError('Option: partition_ifmaps requires '
107 |                              'partition_hybrid to be set.')
108 | 
109 |         if not isinstance(ntp.layer_pipeline_time_ovhd, (int, float)):
110 |             raise KeyError('Option: layer_pipeline_time_ovhd must be a '
111 |                            'number.')
112 |         if ntp.layer_pipeline_time_ovhd < 0:
113 |             raise ValueError('Option: layer_pipeline_time_ovhd must be '
114 |                              'positive.')
115 | 
116 |         if not isinstance(ntp.layer_pipeline_max_degree, (int, float)):
117 |             raise KeyError('Option: layer_pipeline_max_degree must be a '
118 |                            'number.')
119 |         if ntp.layer_pipeline_max_degree < 0:
120 |             raise ValueError('Option: layer_pipeline_max_degree must be '
121 |                              'positive.')
122 | 
123 |         if ntp.opt_goal not in ['e', 'd', 'ed']:
124 |             raise ValueError('Option: opt_goal is invalid, must be one of '
125 |                              '\'e\', \'d\', and \'ed\'.')
126 | 
127 |         return ntp
128 | 
129 |     @staticmethod
130 |     def option_list():
131 |         ''' List of options. '''
132 |         return OPTION_LIST
133 | 
134 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/parallel_enum.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | '''
18 | Enum for parallel partition taxonomy.
19 | '''
20 | OUTP = 0
21 | OFMP = 1
22 | BATP = 2
23 | INPP = 3
24 | NUM = 4
25 | 
26 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/phy_dim2.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from collections import namedtuple
18 | from functools import reduce
19 | from operator import add, sub, neg, mul
20 | 
21 | class PhyDim2(namedtuple('PhyDim2', ['h', 'w'])):
22 |     '''
23 |     Denote a physical 2D dimension.
24 |     '''
25 | 
26 |     def size(self):
27 |         ''' Total size. '''
28 |         return int(reduce(mul, self, 1))
29 | 
30 |     def hop_dist(self, other):
31 |         ''' Hop distance between twn coordinate. '''
32 |         if not isinstance(other, PhyDim2):
33 |             raise TypeError('PhyDim2: hop_dist only applies on two PhyDim2 '
34 |                             'instances.')
35 |         return abs(self.h - other.h) + abs(self.w - other.w)
36 | 
37 |     def __add__(self, other):
38 |         ''' Return element-wise `self + other`. '''
39 |         if not isinstance(other, PhyDim2):
40 |             other = PhyDim2(other, other)
41 |         return PhyDim2(*map(add, self, other))
42 | 
43 |     def __sub__(self, other):
44 |         ''' Return element-wise `self - other`. '''
45 |         if not isinstance(other, PhyDim2):
46 |             other = PhyDim2(other, other)
47 |         return PhyDim2(*map(sub, self, other))
48 | 
49 |     def __neg__(self):
50 |         ''' Return element-wise negative. '''
51 |         return PhyDim2(*map(neg, self))
52 | 
53 |     def __mul__(self, other):
54 |         ''' Return element-wise `self * other`. '''
55 |         if not isinstance(other, PhyDim2):
56 |             other = PhyDim2(other, other)
57 |         return PhyDim2(*map(mul, self, other))
58 | 
59 |     __rmul__ = __mul__
60 | 
61 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/resource.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from collections import namedtuple
18 | import math
19 | 
20 | from .node_region import NodeRegion
21 | from .phy_dim2 import PhyDim2
22 | 
23 | RESOURCE_LIST = ['proc_region',
24 |                  'dram_region',
25 |                  'src_data_region',
26 |                  'dst_data_region',
27 |                  'dim_array',
28 |                  'size_gbuf',
29 |                  'size_regf',
30 |                  'array_bus_width',
31 |                  'dram_bandwidth',
32 |                  'no_time_mux',
33 |                 ]
34 | 
35 | class Resource(namedtuple('Resource', RESOURCE_LIST)):
36 |     '''
37 |     Hardware resource specification.
38 | 
39 |     The origins of node region and memory regions are all absolute.
40 |     '''
41 | 
42 |     def __new__(cls, *args, **kwargs):
43 |         ntp = super(Resource, cls).__new__(cls, *args, **kwargs)
44 | 
45 |         if not isinstance(ntp.proc_region, NodeRegion):
46 |             raise TypeError('Resource: proc_region must be '
47 |                             'a NodeRegion instance.')
48 |         if ntp.proc_region.type != NodeRegion.PROC:
49 |             raise ValueError('Resource: proc_region must have type PROC.')
50 | 
51 |         if not isinstance(ntp.dram_region, NodeRegion):
52 |             raise TypeError('Resource: dram_region must be '
53 |                             'a NodeRegion instance.')
54 |         if ntp.dram_region.type != NodeRegion.DRAM:
55 |             raise ValueError('Resource: dram_region must have type DRAM.')
56 | 
57 |         if not isinstance(ntp.src_data_region, NodeRegion):
58 |             raise TypeError('Resource: src_data_region must be '
59 |                             'a NodeRegion instance.')
60 |         if not isinstance(ntp.dst_data_region, NodeRegion):
61 |             raise TypeError('Resource: dst_data_region must be '
62 |                             'a NodeRegion instance.')
63 | 
64 |         if not isinstance(ntp.dim_array, PhyDim2):
65 |             raise TypeError('Resource: dim_array must be a PhyDim2 object.')
66 | 
67 |         if hasattr(ntp.size_gbuf, '__len__'):
68 |             raise TypeError('Resource: size_gbuf must be a scalar')
69 |         if hasattr(ntp.size_regf, '__len__'):
70 |             raise TypeError('Resource: size_regf must be a scalar')
71 | 
72 |         if not isinstance(ntp.array_bus_width, int) \
73 |                 and not math.isinf(ntp.array_bus_width):
74 |             raise TypeError('Resource: array_bus_width must be an integer '
75 |                             'or infinity.')
76 |         if ntp.array_bus_width <= 0:
77 |             raise ValueError('Resource: array_bus_width must be positive.')
78 | 
79 |         if not isinstance(ntp.dram_bandwidth, (float, int)):
80 |             raise TypeError('Resource: dram_bandwidth must be a number')
81 |         if ntp.dram_bandwidth <= 0:
82 |             raise ValueError('Resource: dram_bandwidth must be positive.')
83 | 
84 |         if not isinstance(ntp.no_time_mux, bool):
85 |             raise TypeError('Resource: no_time_mux must be boolean')
86 | 
87 |         return ntp
88 | 
89 | 


--------------------------------------------------------------------------------
/nn_dataflow/core/scheduling_constraint.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import numbers
 18 | 
 19 | from . import loop_enum as le
 20 | from .. import util
 21 | from .loop_blocking_scheme import LoopBlockingScheme
 22 | 
 23 | class SchedulingConstraint(util.ContentHashClass):
 24 |     '''
 25 |     Layer scheduling constraint, which constrains top loop blocking factors.
 26 |     '''
 27 | 
 28 |     def __init__(self, topbat=0, topifm=0, topofm=0, update_dict=None):
 29 |         '''
 30 |         `topbat`, `topifm`, `topofm` specify the top-level loop blocking
 31 |         factors.
 32 | 
 33 |         `update_dict` specifies lazily updated rules to refine the constraint
 34 |         with previous scheduling results. It should be a mapping, from previous
 35 |         layer name to a function which takes two arguments: self, and the
 36 |         SchedulingResult instance of that layer.
 37 |         '''
 38 |         if any(n < 0 or not isinstance(n, numbers.Integral)
 39 |                for n in [topbat, topifm, topofm]):
 40 |             raise ValueError('SchedulingConstraint: '
 41 |                              'constrained factors must be positive integers.')
 42 | 
 43 |         if not update_dict:
 44 |             update_dict = {}
 45 |         if not isinstance(update_dict, dict):
 46 |             raise TypeError('SchedulingConstraint: '
 47 |                             'update_dict must be a dict instance.')
 48 |         update_dict = util.HashableDict.fromdict(update_dict)
 49 |         for val in update_dict.values():
 50 |             if not callable(val):
 51 |                 raise TypeError('SchedulingConstraint: '
 52 |                                 'values in update_dict must be callable.')
 53 | 
 54 |         self.topbat = topbat
 55 |         self.topifm = topifm
 56 |         self.topofm = topofm
 57 |         self.update_dict = update_dict
 58 | 
 59 |     def is_valid_top_bl(self, top_bl_t, top_bl_ord):
 60 |         '''
 61 |         Whether the given `top_bl_t` and `top_bl_lpe` are valid with the
 62 |         constraint.
 63 |         '''
 64 |         if self.update_dict:
 65 |             raise ValueError('SchedulingConstraint: update_dict is not empty, '
 66 |                              'rules have not been updated.')
 67 | 
 68 |         if self.topbat and self.topbat != top_bl_t[le.BAT]:
 69 |             return False
 70 |         if self.topifm and self.topifm != top_bl_t[le.IFM]:
 71 |             return False
 72 |         if self.topofm and self.topofm != top_bl_t[le.OFM]:
 73 |             return False
 74 | 
 75 |         del top_bl_ord
 76 | 
 77 |         return True
 78 | 
 79 |     def is_valid_part(self, part):
 80 |         '''
 81 |         Whether the given `part` is valid with the constraint.
 82 |         '''
 83 |         # pylint: disable=unused-argument
 84 |         if self.update_dict:
 85 |             raise ValueError('SchedulingConstraint: update_dict is not empty, '
 86 |                              'rules have not been updated.')
 87 | 
 88 |         return True
 89 | 
 90 |     def filter_gen_ts(self, gen_tifm, gen_tofm, gen_tbat):
 91 |         ''' Get the filtered generators for loop blocking factors. '''
 92 |         return self._filter_gen(gen_tifm, self.topifm), \
 93 |                 self._filter_gen(gen_tofm, self.topofm), \
 94 |                 self._filter_gen(gen_tbat, self.topbat)
 95 | 
 96 |     def update_by_prev(self, prev_results):
 97 |         '''
 98 |         Based on the previous layer scheduling results `prev_results` as a
 99 |         mapping from previous layer name to SchedulingResult instance, use the
100 |         rules specified by `update_dict` to update the constraint.
101 |         '''
102 |         for layer_name in self.update_dict:
103 |             self.update_dict[layer_name](self, prev_results[layer_name])
104 |         self.update_dict = util.HashableDict()  # clear updated rules.
105 | 
106 |     @staticmethod
107 |     def _filter_gen(gen, topt=0):
108 |         ''' Get a new generator which filters the top factor. '''
109 |         for tpl in gen:
110 |             if topt in (0, tpl[0]):
111 |                 yield tpl
112 | 
113 |     def __repr__(self):
114 |         return '{}({})'.format(
115 |             self.__class__.__name__,
116 |             ', '.join(['{}={}'.format(k, repr(v))
117 |                        for k, v in self.__dict__.items()]))
118 | 
119 | 
120 | class SchedulingConstraintLayerPipeline(SchedulingConstraint):
121 |     '''
122 |     Layer scheduling constraint for inter-layer pipelining.
123 | 
124 |     Constraint includes:
125 |     - topbat: top BAT loop blocking factor, which decides the number of groups
126 |       for batch pipelining. It must match between all layers in a pipeline
127 |       segment.
128 |     - topifm/topofm: top IFM/OFM blocking factor, which decides the number of
129 |       groups for fmap data forwarding between adjacent spatial scheduled layers
130 |       in a pipeline segment. It must match between forwarding
131 |       source/destination layers.
132 |     - fbifm/fbofm: whether to fully buffer the fmap data of the layer on-chip.
133 |       It indicates the baseline double-buffering between pipelined layers.
134 | 
135 |     For loop orders, the BAT loop must be at the outermost for batch
136 |     pipelining. Then the loop associated with the forwarded data (IFM or OFM)
137 |     must follow at the second outermost. If a data category (IFM or OFM) is
138 |     fully buffered, then the corresponding loop is a trivial loop, which can be
139 |     at any where.
140 |     '''
141 | 
142 |     def __init__(self, topbat=0, topifm=0, topofm=0, fbifm=False, fbofm=False,
143 |                  update_dict=None):
144 | 
145 |         if fbifm:
146 |             # Fully-buffered IFM <=> topifm = 1.
147 |             if topifm not in (0, 1):
148 |                 raise ValueError('SchedulingConstraintLayerPipeline: '
149 |                                  'fully-buffered IFM implies topifm = 1.')
150 |             topifm = 1
151 | 
152 |         if fbofm:
153 |             # Fully-buffered OFM <=> topofm = 1.
154 |             if topofm not in (0, 1):
155 |                 raise ValueError('SchedulingConstraintLayerPipeline: '
156 |                                  'fully-buffered OFM implies topofm = 1.')
157 |             topofm = 1
158 | 
159 |         if topifm > 1 and topofm > 1:
160 |             raise ValueError('SchedulingConstraintLayerPipeline: '
161 |                              'impossible to have both topifm and topofm > 1, '
162 |                              'at least one of IFM and OFM must be a trivial '
163 |                              'loop (= 1) or not constrained (= 0).')
164 | 
165 |         super(SchedulingConstraintLayerPipeline, self).__init__(
166 |             topbat=topbat, topifm=topifm, topofm=topofm,
167 |             update_dict=update_dict)
168 | 
169 |     def is_valid_top_bl(self, top_bl_t, top_bl_ord):
170 | 
171 |         if not super(SchedulingConstraintLayerPipeline, self).is_valid_top_bl(
172 |                 top_bl_t, top_bl_ord):
173 |             return False
174 | 
175 |         # Loop orders.
176 |         # Ordered loops from outer to inner.
177 |         ord_lpe = LoopBlockingScheme.ordered_loops(top_bl_t, top_bl_ord,
178 |                                                    lpe_only=True)
179 |         if self.topbat > 1:
180 |             if ord_lpe.pop(0) != le.BAT:
181 |                 return False
182 |         # topifm and topofm cannot trigger together.
183 |         if self.topifm > 1:
184 |             if ord_lpe.pop(0) != le.IFM:
185 |                 return False
186 |         if self.topofm > 1:
187 |             if ord_lpe.pop(0) != le.OFM:
188 |                 return False
189 | 
190 |         return True
191 | 
192 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | def import_network(name):
18 |     '''
19 |     Import an example network.
20 |     '''
21 |     import importlib
22 | 
23 |     if name not in all_networks():
24 |         raise ImportError('nns: NN {} has not been defined!'.format(name))
25 |     netmod = importlib.import_module('.' + name, 'nn_dataflow.nns')
26 |     network = netmod.NN
27 |     return network
28 | 
29 | 
30 | def all_networks():
31 |     '''
32 |     Get all defined networks.
33 |     '''
34 |     import os
35 | 
36 |     nns_dir = os.path.dirname(os.path.abspath(__file__))
37 |     nns = [f[:-len('.py')] for f in os.listdir(nns_dir)
38 |            if f.endswith('.py') and not f.startswith('__')]
39 |     return list(sorted(nns))
40 | 
41 | 
42 | def add_lstm_cell(network, name, size, xin, cin=None, hin=None):
43 |     '''
44 |     Add a LSTM cell named `name` to the `network`, with the dimension `size`.
45 |     `xin`, `cin`, `hin` are the layers' names whose outputs are x_t, C_{t-1},
46 |     h_{t-1}, respectively. Return the layers' names whose outputs are C_t, h_t.
47 |     '''
48 |     from nn_dataflow.core import Network
49 |     from nn_dataflow.core import InputLayer, FCLayer, EltwiseLayer
50 | 
51 |     if not isinstance(network, Network):
52 |         raise TypeError('add_lstm_cell: network must be a Network instance.')
53 | 
54 |     if cin is None:
55 |         cin = '{}_cinit'.format(name)
56 |         network.add_ext(cin, InputLayer(size, 1))
57 |     if hin is None:
58 |         hin = '{}_hinit'.format(name)
59 |         network.add_ext(hin, InputLayer(size, 1))
60 | 
61 |     if (cin not in network) or (hin not in network) or (xin not in network):
62 |         raise ValueError('add_lstm_cell: cin {}, hin {}, xin {} must all be '
63 |                          'in the network.'.format(cin, hin, xin))
64 | 
65 |     def gate_name(gate):
66 |         ''' Name of a gate. '''
67 |         return '{}_{}gate'.format(name, gate)
68 | 
69 |     # Candidate.
70 |     cand_name = '{}_cand'.format(name)
71 |     prevs = (hin, xin) if hin else (xin,)
72 |     network.add(cand_name, FCLayer(len(prevs) * size, size), prevs=prevs)
73 | 
74 |     # Three gates.
75 |     prevs = (hin, xin) if hin else (xin,)
76 |     for g in ['i', 'f', 'o']:
77 |         network.add(gate_name(g), FCLayer(len(prevs) * size, size), prevs=prevs)
78 | 
79 |     # C_t.
80 |     cout_name = '{}_cout'.format(name)
81 |     cout_f_name = cout_name + '_f'
82 |     prevs = (cin, gate_name('f')) if cin else (gate_name('f'),)
83 |     network.add(cout_f_name, EltwiseLayer(size, 1, len(prevs)), prevs=prevs)
84 |     cout_i_name = cout_name + '_i'
85 |     prevs = (cand_name, gate_name('i'))
86 |     network.add(cout_i_name, EltwiseLayer(size, 1, 2), prevs=prevs)
87 |     prevs = (cout_i_name, cout_f_name)
88 |     network.add(cout_name, EltwiseLayer(size, 1, 2), prevs=prevs)
89 | 
90 |     # h_t.
91 |     hout_name = '{}_hout'.format(name)
92 |     prevs = (cout_name, gate_name('o'))
93 |     network.add(hout_name, EltwiseLayer(size, 1, 2), prevs=prevs)
94 | 
95 |     return cout_name, hout_name
96 | 
97 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/alex_net.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer
19 | 
20 | '''
21 | AlexNet
22 | 
23 | Krizhevsky, Sutskever, and Hinton, 2012
24 | '''
25 | 
26 | NN = Network('AlexNet')
27 | 
28 | NN.set_input_layer(InputLayer(3, 224))
29 | 
30 | NN.add('conv1_a', ConvLayer(3, 48, 55, 11, 4), prevs=(NN.INPUT_LAYER_KEY,))
31 | NN.add('conv1_b', ConvLayer(3, 48, 55, 11, 4), prevs=(NN.INPUT_LAYER_KEY,))
32 | NN.add('pool1_a', PoolingLayer(48, 27, 3, strd=2), prevs=('conv1_a',))
33 | NN.add('pool1_b', PoolingLayer(48, 27, 3, strd=2), prevs=('conv1_b',))
34 | # Norm layer is ignored.
35 | 
36 | NN.add('conv2_a', ConvLayer(48, 128, 27, 5), prevs=('pool1_a',))
37 | NN.add('conv2_b', ConvLayer(48, 128, 27, 5), prevs=('pool1_b',))
38 | NN.add('pool2_a', PoolingLayer(128, 13, 3, strd=2), prevs=('conv2_a',))
39 | NN.add('pool2_b', PoolingLayer(128, 13, 3, strd=2), prevs=('conv2_b',))
40 | # Norm layer is ignored.
41 | 
42 | NN.add('conv3_a', ConvLayer(256, 192, 13, 3), prevs=('pool2_a', 'pool2_b'))
43 | NN.add('conv3_b', ConvLayer(256, 192, 13, 3), prevs=('pool2_a', 'pool2_b'))
44 | NN.add('conv4_a', ConvLayer(192, 192, 13, 3), prevs=('conv3_a',))
45 | NN.add('conv4_b', ConvLayer(192, 192, 13, 3), prevs=('conv3_b',))
46 | NN.add('conv5_a', ConvLayer(192, 128, 13, 3), prevs=('conv4_a',))
47 | NN.add('conv5_b', ConvLayer(192, 128, 13, 3), prevs=('conv4_b',))
48 | NN.add('pool3_a', PoolingLayer(128, 6, 3, strd=2), prevs=('conv5_a',))
49 | NN.add('pool3_b', PoolingLayer(128, 6, 3, strd=2), prevs=('conv5_b',))
50 | 
51 | NN.add('fc1', FCLayer(256, 4096, 6), prevs=('pool3_a', 'pool3_b'))
52 | NN.add('fc2', FCLayer(4096, 4096))
53 | NN.add('fc3', FCLayer(4096, 1000))
54 | 
55 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/googlenet.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer
19 | 
20 | '''
21 | GoogLeNet
22 | 
23 | ILSVRC 2014
24 | '''
25 | 
26 | NN = Network('GoogLeNet')
27 | 
28 | NN.set_input_layer(InputLayer(3, 224))
29 | 
30 | NN.add('conv1', ConvLayer(3, 64, 112, 7, 2))
31 | NN.add('pool1', PoolingLayer(64, 56, 3, strd=2))
32 | # Norm layer is ignored.
33 | 
34 | NN.add('conv2_3x3_reduce', ConvLayer(64, 64, 56, 1))
35 | NN.add('conv2_3x3', ConvLayer(64, 192, 56, 3))
36 | # Norm layer is ignored.
37 | NN.add('pool2', PoolingLayer(192, 28, 3, strd=2))
38 | 
39 | 
40 | def add_inception(network, incp_id, sfmap, nfmaps_in, nfmaps_1, nfmaps_3r,
41 |                   nfmaps_3, nfmaps_5r, nfmaps_5, nfmaps_pool, prevs):
42 |     ''' Add an inception module to the network. '''
43 |     pfx = 'inception_{}_'.format(incp_id)
44 |     # 1x1 branch.
45 |     network.add(pfx + '1x1', ConvLayer(nfmaps_in, nfmaps_1, sfmap, 1),
46 |                 prevs=prevs)
47 |     # 3x3 branch.
48 |     network.add(pfx + '3x3_reduce', ConvLayer(nfmaps_in, nfmaps_3r, sfmap, 1),
49 |                 prevs=prevs)
50 |     network.add(pfx + '3x3', ConvLayer(nfmaps_3r, nfmaps_3, sfmap, 3))
51 |     # 5x5 branch.
52 |     network.add(pfx + '5x5_reduce', ConvLayer(nfmaps_in, nfmaps_5r, sfmap, 1),
53 |                 prevs=prevs)
54 |     network.add(pfx + '5x5', ConvLayer(nfmaps_5r, nfmaps_5, sfmap, 5))
55 |     # Pooling branch.
56 |     network.add(pfx + 'pool_proj', ConvLayer(nfmaps_in, nfmaps_pool, sfmap, 1),
57 |                 prevs=prevs)
58 |     # Merge branches.
59 |     return (pfx + '1x1', pfx + '3x3', pfx + '5x5', pfx + 'pool_proj')
60 | 
61 | 
62 | _PREVS = ('pool2',)
63 | 
64 | # Inception 3.
65 | _PREVS = add_inception(NN, '3a', 28, 192, 64, 96, 128, 16, 32, 32,
66 |                        prevs=_PREVS)
67 | _PREVS = add_inception(NN, '3b', 28, 256, 128, 128, 192, 32, 96, 64,
68 |                        prevs=_PREVS)
69 | 
70 | NN.add('pool3', PoolingLayer(480, 14, 3, strd=2), prevs=_PREVS)
71 | _PREVS = ('pool3',)
72 | 
73 | # Inception 4.
74 | _PREVS = add_inception(NN, '4a', 14, 480, 192, 96, 208, 16, 48, 64,
75 |                        prevs=_PREVS)
76 | _PREVS = add_inception(NN, '4b', 14, 512, 160, 112, 224, 24, 64, 64,
77 |                        prevs=_PREVS)
78 | _PREVS = add_inception(NN, '4c', 14, 512, 128, 128, 256, 24, 64, 64,
79 |                        prevs=_PREVS)
80 | _PREVS = add_inception(NN, '4d', 14, 512, 112, 144, 288, 32, 64, 64,
81 |                        prevs=_PREVS)
82 | _PREVS = add_inception(NN, '4e', 14, 528, 256, 160, 320, 32, 128, 128,
83 |                        prevs=_PREVS)
84 | 
85 | NN.add('pool4', PoolingLayer(832, 7, 3, strd=2), prevs=_PREVS)
86 | _PREVS = ('pool4',)
87 | 
88 | # Inception 5.
89 | _PREVS = add_inception(NN, '5a', 7, 832, 256, 160, 320, 32, 128, 128,
90 |                        prevs=_PREVS)
91 | _PREVS = add_inception(NN, '5b', 7, 832, 384, 192, 384, 48, 128, 128,
92 |                        prevs=_PREVS)
93 | 
94 | NN.add('pool5', PoolingLayer(1024, 1, 7), prevs=_PREVS)
95 | 
96 | NN.add('fc', FCLayer(1024, 1000))
97 | 
98 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/lstm_gnmt.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, EltwiseLayer
19 | 
20 | from nn_dataflow.nns import add_lstm_cell
21 | 
22 | '''
23 | LSTM from GNMT.
24 | 
25 | Sutskever, Vinyals, Le, Google, NIPS 2014
26 | '''
27 | 
28 | NN = Network('GNMT')
29 | 
30 | NN.set_input_layer(InputLayer(1000, 1))
31 | 
32 | NL = 4
33 | 
34 | # Word embedding is a simple lookup.
35 | # Exclude or ignore embedding processing.
36 | WE = NN.INPUT_LAYER_KEY
37 | 
38 | # layered LSTM.
39 | X = WE
40 | for l in range(NL):
41 |     cell = 'cell_l{}'.format(l)
42 |     C, H = add_lstm_cell(NN, cell, 1000, X)
43 |     X = H
44 | 
45 | # log(p), softmax.
46 | NN.add('Wd', EltwiseLayer(1000, 1, 1), prevs=(X,))
47 | 
48 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/lstm_phoneme.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, FCLayer
19 | 
20 | from nn_dataflow.nns import add_lstm_cell
21 | 
22 | '''
23 | LSTM for phoneme classification.
24 | 
25 | Graves and Schmidhuber, 2005
26 | '''
27 | 
28 | NN = Network('PHONEME')
29 | 
30 | NN.set_input_layer(InputLayer(26, 1))
31 | 
32 | # Input.
33 | NN.add('We', FCLayer(26, 140), prevs=(NN.INPUT_LAYER_KEY,))
34 | 
35 | # LSTM.
36 | C, H = add_lstm_cell(NN, 'cell', 140, 'We')
37 | 
38 | # Output.
39 | NN.add('Wd', FCLayer(140, 61), prevs=(H,))
40 | 
41 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/lstm_showtell.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, EltwiseLayer
19 | 
20 | from nn_dataflow.nns import add_lstm_cell
21 | 
22 | '''
23 | LSTM from Show and Tell.
24 | 
25 | Vinyals et al., Google, CVPR 2015
26 | '''
27 | 
28 | NN = Network('ShowTell')
29 | 
30 | NN.set_input_layer(InputLayer(512, 1))
31 | 
32 | # Word embedding is a simple lookup.
33 | # Exclude or ignore embedding processing.
34 | WE = NN.INPUT_LAYER_KEY
35 | 
36 | # LSTM.
37 | C, H = add_lstm_cell(NN, 'cell', 512, WE)
38 | 
39 | # log(p), softmax.
40 | NN.add('Wd', EltwiseLayer(512, 1, 1), prevs=(H,))
41 | 
42 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/mlp_l.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, FCLayer
19 | 
20 | '''
21 | MLP-L
22 | 
23 | PRIME, 2016
24 | '''
25 | 
26 | NN = Network('MLP-L')
27 | 
28 | NN.set_input_layer(InputLayer(784, 1))
29 | 
30 | NN.add('fc1', FCLayer(784, 1500))
31 | NN.add('fc2', FCLayer(1500, 1000))
32 | NN.add('fc3', FCLayer(1000, 500))
33 | NN.add('fc4', FCLayer(500, 10))
34 | 
35 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/mlp_m.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, FCLayer
19 | 
20 | '''
21 | MLP-M
22 | 
23 | PRIME, 2016
24 | '''
25 | 
26 | NN = Network('MLP-M')
27 | 
28 | NN.set_input_layer(InputLayer(784, 1))
29 | 
30 | NN.add('fc1', FCLayer(784, 1000))
31 | NN.add('fc2', FCLayer(1000, 500))
32 | NN.add('fc3', FCLayer(500, 250))
33 | NN.add('fc4', FCLayer(250, 10))
34 | 
35 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/mlp_s.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, FCLayer
19 | 
20 | '''
21 | MLP-S
22 | 
23 | PRIME, 2016
24 | '''
25 | 
26 | NN = Network('MLP-S')
27 | 
28 | NN.set_input_layer(InputLayer(784, 1))
29 | 
30 | NN.add('fc1', FCLayer(784, 500))
31 | NN.add('fc2', FCLayer(500, 250))
32 | NN.add('fc3', FCLayer(250, 10))
33 | 
34 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/resnet152.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \
19 |         PoolingLayer, EltwiseLayer
20 | 
21 | '''
22 | ResNet-152
23 | 
24 | He, Zhang, Ren, and Sun, 2015
25 | '''
26 | 
27 | NN = Network('ResNet')
28 | 
29 | NN.set_input_layer(InputLayer(3, 224))
30 | 
31 | NN.add('conv1', ConvLayer(3, 64, 112, 7, 2))
32 | NN.add('pool1', PoolingLayer(64, 56, 3, 2))
33 | 
34 | RES_PREV = 'pool1'
35 | 
36 | for i in range(3):
37 |     NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1))
38 |     NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3))
39 |     NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1))
40 | 
41 |     # With residual shortcut.
42 |     if i == 0:
43 |         NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV,))
44 |         RES_PREV = 'conv2_br'
45 |     NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2),
46 |            prevs=(RES_PREV, 'conv2_{}_c'.format(i)))
47 |     RES_PREV = 'conv2_{}_res'.format(i)
48 | 
49 | for i in range(8):
50 |     NN.add('conv3_{}_a'.format(i),
51 |            ConvLayer(256, 128, 28, 1, 2) if i == 0
52 |            else ConvLayer(512, 128, 28, 1))
53 |     NN.add('conv3_{}_b'.format(i), ConvLayer(128, 128, 28, 3))
54 |     NN.add('conv3_{}_c'.format(i), ConvLayer(128, 512, 28, 1))
55 | 
56 |     # With residual shortcut.
57 |     if i == 0:
58 |         NN.add('conv3_br', ConvLayer(256, 512, 28, 1, 2), prevs=(RES_PREV,))
59 |         RES_PREV = 'conv3_br'
60 |     NN.add('conv3_{}_res'.format(i), EltwiseLayer(512, 28, 2),
61 |            prevs=(RES_PREV, 'conv3_{}_c'.format(i)))
62 |     RES_PREV = 'conv3_{}_res'.format(i)
63 | 
64 | for i in range(36):
65 |     NN.add('conv4_{}_a'.format(i),
66 |            ConvLayer(512, 256, 14, 1, 2) if i == 0
67 |            else ConvLayer(1024, 256, 14, 1))
68 |     NN.add('conv4_{}_b'.format(i), ConvLayer(256, 256, 14, 3))
69 |     NN.add('conv4_{}_c'.format(i), ConvLayer(256, 1024, 14, 1))
70 | 
71 |     # With residual shortcut.
72 |     if i == 0:
73 |         NN.add('conv4_br', ConvLayer(512, 1024, 14, 1, 2), prevs=(RES_PREV,))
74 |         RES_PREV = 'conv4_br'
75 |     NN.add('conv4_{}_res'.format(i), EltwiseLayer(1024, 14, 2),
76 |            prevs=(RES_PREV, 'conv4_{}_c'.format(i)))
77 |     RES_PREV = 'conv4_{}_res'.format(i)
78 | 
79 | for i in range(3):
80 |     NN.add('conv5_{}_a'.format(i),
81 |            ConvLayer(1024, 512, 7, 1, 2) if i == 0
82 |            else ConvLayer(2048, 512, 7, 1))
83 |     NN.add('conv5_{}_b'.format(i), ConvLayer(512, 512, 7, 3))
84 |     NN.add('conv5_{}_c'.format(i), ConvLayer(512, 2048, 7, 1))
85 | 
86 |     # With residual shortcut.
87 |     if i == 0:
88 |         NN.add('conv5_br', ConvLayer(1024, 2048, 7, 1, 2), prevs=(RES_PREV,))
89 |         RES_PREV = 'conv5_br'
90 |     NN.add('conv5_{}_res'.format(i), EltwiseLayer(2048, 7, 2),
91 |            prevs=(RES_PREV, 'conv5_{}_c'.format(i)))
92 |     RES_PREV = 'conv5_{}_res'.format(i)
93 | 
94 | NN.add('pool5', PoolingLayer(2048, 1, 7))
95 | 
96 | NN.add('fc', FCLayer(2048, 1000))
97 | 
98 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/resnet50.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, \
19 |         PoolingLayer, EltwiseLayer
20 | 
21 | '''
22 | ResNet-50
23 | 
24 | He, Zhang, Ren, and Sun, 2015
25 | '''
26 | 
27 | NN = Network('ResNet')
28 | 
29 | NN.set_input_layer(InputLayer(3, 224))
30 | 
31 | NN.add('conv1', ConvLayer(3, 64, 112, 7, 2))
32 | NN.add('pool1', PoolingLayer(64, 56, 3, 2))
33 | 
34 | RES_PREV = 'pool1'
35 | 
36 | for i in range(3):
37 |     NN.add('conv2_{}_a'.format(i), ConvLayer(64 if i == 0 else 256, 64, 56, 1))
38 |     NN.add('conv2_{}_b'.format(i), ConvLayer(64, 64, 56, 3))
39 |     NN.add('conv2_{}_c'.format(i), ConvLayer(64, 256, 56, 1))
40 | 
41 |     # With residual shortcut.
42 |     if i == 0:
43 |         NN.add('conv2_br', ConvLayer(64, 256, 56, 1), prevs=(RES_PREV,))
44 |         RES_PREV = 'conv2_br'
45 |     NN.add('conv2_{}_res'.format(i), EltwiseLayer(256, 56, 2),
46 |            prevs=(RES_PREV, 'conv2_{}_c'.format(i)))
47 |     RES_PREV = 'conv2_{}_res'.format(i)
48 | 
49 | for i in range(4):
50 |     NN.add('conv3_{}_a'.format(i),
51 |            ConvLayer(256, 128, 28, 1, 2) if i == 0
52 |            else ConvLayer(512, 128, 28, 1))
53 |     NN.add('conv3_{}_b'.format(i), ConvLayer(128, 128, 28, 3))
54 |     NN.add('conv3_{}_c'.format(i), ConvLayer(128, 512, 28, 1))
55 | 
56 |     # With residual shortcut.
57 |     if i == 0:
58 |         NN.add('conv3_br', ConvLayer(256, 512, 28, 1, 2), prevs=(RES_PREV,))
59 |         RES_PREV = 'conv3_br'
60 |     NN.add('conv3_{}_res'.format(i), EltwiseLayer(512, 28, 2),
61 |            prevs=(RES_PREV, 'conv3_{}_c'.format(i)))
62 |     RES_PREV = 'conv3_{}_res'.format(i)
63 | 
64 | for i in range(6):
65 |     NN.add('conv4_{}_a'.format(i),
66 |            ConvLayer(512, 256, 14, 1, 2) if i == 0
67 |            else ConvLayer(1024, 256, 14, 1))
68 |     NN.add('conv4_{}_b'.format(i), ConvLayer(256, 256, 14, 3))
69 |     NN.add('conv4_{}_c'.format(i), ConvLayer(256, 1024, 14, 1))
70 | 
71 |     # With residual shortcut.
72 |     if i == 0:
73 |         NN.add('conv4_br', ConvLayer(512, 1024, 14, 1, 2), prevs=(RES_PREV,))
74 |         RES_PREV = 'conv4_br'
75 |     NN.add('conv4_{}_res'.format(i), EltwiseLayer(1024, 14, 2),
76 |            prevs=(RES_PREV, 'conv4_{}_c'.format(i)))
77 |     RES_PREV = 'conv4_{}_res'.format(i)
78 | 
79 | for i in range(3):
80 |     NN.add('conv5_{}_a'.format(i),
81 |            ConvLayer(1024, 512, 7, 1, 2) if i == 0
82 |            else ConvLayer(2048, 512, 7, 1))
83 |     NN.add('conv5_{}_b'.format(i), ConvLayer(512, 512, 7, 3))
84 |     NN.add('conv5_{}_c'.format(i), ConvLayer(512, 2048, 7, 1))
85 | 
86 |     # With residual shortcut.
87 |     if i == 0:
88 |         NN.add('conv5_br', ConvLayer(1024, 2048, 7, 1, 2), prevs=(RES_PREV,))
89 |         RES_PREV = 'conv5_br'
90 |     NN.add('conv5_{}_res'.format(i), EltwiseLayer(2048, 7, 2),
91 |            prevs=(RES_PREV, 'conv5_{}_c'.format(i)))
92 |     RES_PREV = 'conv5_{}_res'.format(i)
93 | 
94 | NN.add('pool5', PoolingLayer(2048, 1, 7))
95 | 
96 | NN.add('fc', FCLayer(2048, 1000))
97 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/vgg19_net.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer
19 | 
20 | '''
21 | VGGNet-19
22 | 
23 | Simonyan and Zisserman, 2014
24 | '''
25 | 
26 | NN = Network('VGG19')
27 | 
28 | NN.set_input_layer(InputLayer(3, 224))
29 | 
30 | NN.add('conv1', ConvLayer(3, 64, 224, 3))
31 | NN.add('conv2', ConvLayer(64, 64, 224, 3))
32 | NN.add('pool1', PoolingLayer(64, 112, 2))
33 | 
34 | NN.add('conv3', ConvLayer(64, 128, 112, 3))
35 | NN.add('conv4', ConvLayer(128, 128, 112, 3))
36 | NN.add('pool2', PoolingLayer(128, 56, 2))
37 | 
38 | NN.add('conv5', ConvLayer(128, 256, 56, 3))
39 | NN.add('conv6', ConvLayer(256, 256, 56, 3))
40 | NN.add('conv7', ConvLayer(256, 256, 56, 3))
41 | NN.add('conv8', ConvLayer(256, 256, 56, 3))
42 | NN.add('pool3', PoolingLayer(256, 28, 2))
43 | 
44 | NN.add('conv9', ConvLayer(256, 512, 28, 3))
45 | NN.add('conv10', ConvLayer(512, 512, 28, 3))
46 | NN.add('conv11', ConvLayer(512, 512, 28, 3))
47 | NN.add('conv12', ConvLayer(512, 512, 28, 3))
48 | NN.add('pool4', PoolingLayer(512, 14, 2))
49 | 
50 | NN.add('conv13', ConvLayer(512, 512, 14, 3))
51 | NN.add('conv14', ConvLayer(512, 512, 14, 3))
52 | NN.add('conv15', ConvLayer(512, 512, 14, 3))
53 | NN.add('conv16', ConvLayer(512, 512, 14, 3))
54 | NN.add('pool5', PoolingLayer(512, 7, 2))
55 | 
56 | NN.add('fc1', FCLayer(512, 4096, 7))
57 | NN.add('fc2', FCLayer(4096, 4096))
58 | NN.add('fc3', FCLayer(4096, 1000))
59 | 
60 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/vgg_net.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer
19 | 
20 | '''
21 | VGGNet-16
22 | 
23 | Simonyan and Zisserman, 2014
24 | '''
25 | 
26 | NN = Network('VGG')
27 | 
28 | NN.set_input_layer(InputLayer(3, 224))
29 | 
30 | NN.add('conv1', ConvLayer(3, 64, 224, 3))
31 | NN.add('conv2', ConvLayer(64, 64, 224, 3))
32 | NN.add('pool1', PoolingLayer(64, 112, 2))
33 | 
34 | NN.add('conv3', ConvLayer(64, 128, 112, 3))
35 | NN.add('conv4', ConvLayer(128, 128, 112, 3))
36 | NN.add('pool2', PoolingLayer(128, 56, 2))
37 | 
38 | NN.add('conv5', ConvLayer(128, 256, 56, 3))
39 | NN.add('conv6', ConvLayer(256, 256, 56, 3))
40 | NN.add('conv7', ConvLayer(256, 256, 56, 3))
41 | NN.add('pool3', PoolingLayer(256, 28, 2))
42 | 
43 | NN.add('conv8', ConvLayer(256, 512, 28, 3))
44 | NN.add('conv9', ConvLayer(512, 512, 28, 3))
45 | NN.add('conv10', ConvLayer(512, 512, 28, 3))
46 | NN.add('pool4', PoolingLayer(512, 14, 2))
47 | 
48 | NN.add('conv11', ConvLayer(512, 512, 14, 3))
49 | NN.add('conv12', ConvLayer(512, 512, 14, 3))
50 | NN.add('conv13', ConvLayer(512, 512, 14, 3))
51 | NN.add('pool5', PoolingLayer(512, 7, 2))
52 | 
53 | NN.add('fc1', FCLayer(512, 4096, 7))
54 | NN.add('fc2', FCLayer(4096, 4096))
55 | NN.add('fc3', FCLayer(4096, 1000))
56 | 
57 | 


--------------------------------------------------------------------------------
/nn_dataflow/nns/zfnet.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import Network
18 | from nn_dataflow.core import InputLayer, ConvLayer, FCLayer, PoolingLayer
19 | 
20 | '''
21 | ZFNet
22 | 
23 | Zeiler and Fergus, 2013
24 | '''
25 | 
26 | NN = Network('ZFNet')
27 | 
28 | NN.set_input_layer(InputLayer(3, 224))
29 | 
30 | NN.add('conv1', ConvLayer(3, 96, 110, 7, 2))
31 | NN.add('pool1', PoolingLayer(96, 55, 3, strd=2))
32 | # Norm layer is ignored.
33 | NN.add('conv2', ConvLayer(96, 256, 26, 5, 2))
34 | NN.add('pool2', PoolingLayer(256, 13, 3, strd=2))
35 | # Norm layer is ignored.
36 | NN.add('conv3', ConvLayer(256, 512, 13, 3))
37 | NN.add('conv4', ConvLayer(512, 1024, 13, 3))
38 | NN.add('conv5', ConvLayer(1024, 512, 13, 3))
39 | NN.add('pool3', PoolingLayer(512, 6, 3, strd=2))
40 | NN.add('fc1', FCLayer(512, 4096, 6))
41 | NN.add('fc2', FCLayer(4096, 4096))
42 | NN.add('fc3', FCLayer(4096, 1000))
43 | 
44 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/dataflow_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/loop_blocking_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from .test_loop_blocking_fixture import TestLoopBlockingFixture
18 | 
19 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/loop_blocking_test/test_loop_blocking.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | from nn_dataflow.core import loop_blocking
 18 | from nn_dataflow.core import DataCategoryEnum as de
 19 | 
 20 | from . import TestLoopBlockingFixture
 21 | 
 22 | class TestLoopBlocking(TestLoopBlockingFixture):
 23 |     ''' Tests for loop_blocking module. '''
 24 | 
 25 |     def test_skip_not_reg(self):
 26 |         ''' skip non-regularized. '''
 27 | 
 28 |         for sch in self._gen_loopblocking_all():
 29 | 
 30 |             skip = loop_blocking.skip_conv(*sch)
 31 |             reg_sch = self._regularized_scheme(*sch)
 32 | 
 33 |             if not skip:
 34 |                 self.assertEqual(reg_sch, sch,
 35 |                                  'test_skip_not_reg: non-skipped {} should be '
 36 |                                  'regularized to {}'
 37 |                                  .format(sch, reg_sch))
 38 |                 continue
 39 | 
 40 |             lbs = self._lbs(*sch, rsrckey='LG')
 41 |             reg_lbs = self._lbs(*reg_sch, rsrckey='LG')
 42 | 
 43 |             self.assertFalse(loop_blocking.skip_conv(*reg_sch),
 44 |                              'test_skip_not_reg: regularized {} is skipped.'
 45 |                              .format(reg_sch))
 46 |             self.assertAlmostEqual(lbs.get_access_cost(self.cost),
 47 |                                    reg_lbs.get_access_cost(self.cost),
 48 |                                    msg=('test_skip_not_reg: cost mismatch. '
 49 |                                         'orig {}, reg {}.'
 50 |                                         .format(sch, reg_sch)))
 51 |             self.assertListEqual(lbs.get_access(), reg_lbs.get_access(),
 52 |                                  msg=('test_skip_not_reg: access mismatch. '
 53 |                                       'orig {}, reg {}.'
 54 |                                       .format(sch, reg_sch)))
 55 |             size = self._get_lbs_size(lbs)
 56 |             reg_size = self._get_lbs_size(reg_lbs)
 57 |             self.assertTrue(all(all(ss1 >= ss2 for ss1, ss2 in zip(s1, s2))
 58 |                                 for s1, s2 in zip(size, reg_size)),
 59 |                             'test_skip_not_reg: reg size is larger than eqv.\n'
 60 |                             'org {} has size {}\nreg {} has size {}'
 61 |                             .format(sch, size, reg_sch, reg_size))
 62 | 
 63 |     def test_skip_ratio(self):
 64 |         ''' skip ratio. '''
 65 | 
 66 |         cnts = [0, 0]
 67 | 
 68 |         for bl_ts, bl_ords in self._gen_loopblocking_all():
 69 | 
 70 |             skip = loop_blocking.skip_conv(bl_ts, bl_ords)
 71 |             cnts[skip] += 1
 72 | 
 73 |         skip_ratio = 1. * cnts[True] / sum(cnts)
 74 |         self.assertGreater(skip_ratio, 0.95,
 75 |                            'test_skip_ratio: skip ratio {} too low.'
 76 |                            .format(skip_ratio))
 77 | 
 78 |     def test_gen_loopblocking_all(self):
 79 |         ''' gen_loopblocking cover all. '''
 80 | 
 81 |         exp_cnt = 0
 82 |         for bl_ts, bl_ords in self._gen_loopblocking_all():
 83 |             exp_cnt += 1 if not loop_blocking.skip_conv(bl_ts, bl_ords) else 0
 84 | 
 85 |         cnt = 0
 86 |         for _ in self._gen_loopblocking(rsrckey='LG'):
 87 |             cnt += 1
 88 | 
 89 |         self.assertEqual(cnt, exp_cnt)
 90 | 
 91 |     def test_gen_loopblocking_mp(self):
 92 |         ''' gen_loopblocking multiprocessing. '''
 93 | 
 94 |         cnt1 = 0
 95 |         for _ in self._gen_loopblocking(rsrckey='LG'):
 96 |             cnt1 += 1
 97 | 
 98 |         cnt8 = 0
 99 |         for _ in self._gen_loopblocking(rsrckey='LG', optkey='MP'):
100 |             cnt8 += 1
101 | 
102 |         self.assertEqual(cnt1, cnt8)
103 | 
104 |     def test_gen_loopblocking_no_eqv(self):
105 |         ''' gen_loopblocking no equivalent. '''
106 | 
107 |         acc_dict = {}
108 | 
109 |         for lbs in self._gen_loopblocking(rsrckey='LG', skip_invalid=True):
110 | 
111 |             # Make the keys hashable (list -> tuple).
112 |             size = tuple(tuple(ss for ss in s) for s in self._get_lbs_size(lbs))
113 |             access = tuple(tuple(int(aa) for aa in a) for a in lbs.access)
114 |             keys = (size, access)
115 | 
116 |             self.assertNotIn(keys, acc_dict,
117 |                              'test_gen_loopblocking_no_eqv: found equivalents. '
118 |                              'keys: access {} size {}'
119 |                              .format(access, size))
120 |             acc_dict[keys] = lbs
121 | 
122 |     def test_gen_loopblocking_ntops(self):
123 |         ''' gen_loopblocking ntops. '''
124 | 
125 |         tops = list(self._gen_loopblocking(rsrckey='LG', optkey='NTOPS'))
126 | 
127 |         cost_prev = -float('inf')
128 | 
129 |         for lbs in self._gen_loopblocking(rsrckey='LG', skip_invalid=True):
130 | 
131 |             cost_curr = lbs.get_access_cost(self.cost)
132 |             self.assertLessEqual(cost_prev, cost_curr)
133 |             cost_prev = cost_curr
134 | 
135 |             if tops:
136 |                 top_lbs = tops.pop(0)
137 |                 self.assertAlmostEqual(cost_curr,
138 |                                        top_lbs.get_access_cost(self.cost))
139 | 
140 |     def test_gen_loopblocking_byp_sol(self):
141 |         ''' gen_loopblocking using bypass solvers. '''
142 | 
143 |         cnt = 0
144 | 
145 |         for lbs in self._gen_loopblocking(optkey='BYPSOL'):
146 | 
147 |             self.assertTrue(lbs.is_valid())
148 | 
149 |             cnt += 1
150 | 
151 |         self.assertLessEqual(cnt, 8)
152 | 
153 |     def test_gen_loopblocking_cstr(self):
154 |         ''' gen_loopblocking with constraint. '''
155 | 
156 |         for lbs in self._gen_loopblocking(rsrckey='LG', cstr=self.cstr):
157 | 
158 |             self.assertTrue(self.cstr.is_valid_top_bl(lbs.bl_ts[0],
159 |                                                       lbs.bl_ords[0]))
160 | 
161 |     def test_gen_loopblocking_cstr_sol(self):
162 |         ''' gen_loopblocking using bypass solvers with constraint. '''
163 | 
164 |         cnt1 = len(list(self._gen_loopblocking(optkey='BYPSOL')))
165 | 
166 |         lbs_list = list(self._gen_loopblocking(optkey='BYPSOL', cstr=self.cstr))
167 |         self.assertTrue(all(
168 |             self.cstr.is_valid_top_bl(lbs.bl_ts[0], lbs.bl_ords[0])
169 |             for lbs in lbs_list))
170 |         cnt2 = len(lbs_list)
171 | 
172 |         self.assertLessEqual(cnt2, cnt1)
173 | 
174 |     def _gen_loopblocking(self, wlkey='BASE', rsrckey='BASE',
175 |                           optkey='BASE', cstr=None, skip_invalid=False):
176 |         ''' gen_loopblocking trampoline. '''
177 |         if cstr is None:
178 |             cstr = self.none_cstr
179 |         for lbs in loop_blocking.gen_loopblocking(
180 |                 self.nld[wlkey], self.resource[rsrckey], self.part, cstr,
181 |                 self.cost, self.options[optkey]):
182 |             if not skip_invalid or lbs.is_valid():
183 |                 yield lbs
184 | 
185 |     @staticmethod
186 |     def _get_lbs_size(lbs):
187 |         ''' Get the size info. '''
188 |         assert lbs.is_valid()
189 |         return [[lbs.data_size(bl, dce) for dce in range(de.NUM)]
190 |                 for bl in range(lbs.BL.NUM)]
191 | 
192 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/loop_blocking_test/test_loop_blocking_solver.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | from nn_dataflow.core import DataCategoryEnum as de
 18 | from nn_dataflow.core import loop_blocking_solver
 19 | from nn_dataflow.core import MemHierEnum as me
 20 | from nn_dataflow.core import Option
 21 | 
 22 | from . import TestLoopBlockingFixture
 23 | 
 24 | class TestLoopBlockingSolver(TestLoopBlockingFixture):
 25 |     ''' Tests for loop_blocking_solver module. '''
 26 | 
 27 |     def setUp(self):
 28 | 
 29 |         super(TestLoopBlockingSolver, self).setUp()
 30 | 
 31 |         # Bypass solver for each reside data category.
 32 |         self.optkeys_bypsol = ['BYPSOL_{}'.format(dce) for dce in range(de.NUM)]
 33 | 
 34 |         for reside_dce in range(de.NUM):
 35 |             opt_dict = self.options['BYPSOL']._asdict()
 36 |             byp = [True] * de.NUM
 37 |             byp[reside_dce] = False
 38 |             opt_dict['sw_gbuf_bypass'] = tuple(byp)
 39 | 
 40 |             self.options[self.optkeys_bypsol[reside_dce]] = Option(**opt_dict)
 41 | 
 42 |     def test_reside_sol(self):
 43 |         ''' Data reside solution. '''
 44 | 
 45 |         for reside_dce in range(de.NUM):
 46 | 
 47 |             optkey = self.optkeys_bypsol[reside_dce]
 48 | 
 49 |             for bl_ts, bl_ords \
 50 |                     in loop_blocking_solver.gen_loopblocking_gbuf_reside(
 51 |                             self.nld['BASE'], self.resource['BASE'],
 52 |                             self.options[optkey]):
 53 | 
 54 |                 lbs = self._lbs(bl_ts, bl_ords, optkey=optkey)
 55 | 
 56 |                 self.assertTrue(lbs.stored_in_gbuf[reside_dce])
 57 |                 self.assertFalse(any(lbs.stored_in_gbuf[dce]
 58 |                                      for dce in range(de.NUM)
 59 |                                      if dce != reside_dce))
 60 | 
 61 |     def test_reside_sol_opt(self, rsrckey='BASE', wlkey='BASE'):
 62 |         ''' Data reside solution optimal. '''
 63 | 
 64 |         def _cost(lbs):
 65 |             access = lbs.get_access()
 66 |             return [int(sum(access[me.DRAM])), int(sum(access[me.GBUF]))]
 67 | 
 68 |         min_sch_dict = {}
 69 |         sol_sch_dict = {}
 70 | 
 71 |         # Among all schemes that bypass all non-reside data categories.
 72 |         for bl_ts, bl_ords in self._gen_loopblocking_all(wlkey=wlkey):
 73 | 
 74 |             lbs = self._lbs(bl_ts, bl_ords, wlkey=wlkey, rsrckey=rsrckey,
 75 |                             optkey='BYP')
 76 |             if not lbs.is_valid():
 77 |                 continue
 78 | 
 79 |             all_reside_dce = [dce for dce in range(de.NUM)
 80 |                               if lbs.stored_in_gbuf[dce]]
 81 |             # Only look at the cases with one or none reside data category.
 82 |             if not all_reside_dce:
 83 |                 min_sch = min_sch_dict.get(None, None)
 84 |                 if not min_sch or _cost(lbs) < min_sch:
 85 |                     min_sch_dict[None] = _cost(lbs)
 86 |             elif len(all_reside_dce) == 1:
 87 |                 dce, = all_reside_dce
 88 |                 min_sch = min_sch_dict.get(dce, None)
 89 |                 if not min_sch or _cost(lbs) < min_sch:
 90 |                     min_sch_dict[dce] = _cost(lbs)
 91 | 
 92 |         # Solve each reside data category.
 93 |         for reside_dce in range(de.NUM):
 94 | 
 95 |             optkey = self.optkeys_bypsol[reside_dce]
 96 | 
 97 |             for bl_ts, bl_ords \
 98 |                     in loop_blocking_solver.gen_loopblocking_gbuf_reside(
 99 |                             self.nld[wlkey], self.resource[rsrckey],
100 |                             self.options[optkey]):
101 | 
102 |                 lbs = self._lbs(bl_ts, bl_ords, wlkey=wlkey, rsrckey=rsrckey,
103 |                                 optkey='BYP')
104 |                 self.assertTrue(lbs.is_valid())
105 |                 self.assertFalse(any(lbs.stored_in_gbuf[dce]
106 |                                      for dce in range(de.NUM)
107 |                                      if dce != reside_dce))
108 | 
109 |                 true_reside_dce = reside_dce \
110 |                         if lbs.stored_in_gbuf[reside_dce] else None
111 | 
112 |                 sol_sch = sol_sch_dict.get(true_reside_dce, None)
113 |                 if not sol_sch or _cost(lbs) < sol_sch:
114 |                     sol_sch_dict[true_reside_dce] = _cost(lbs)
115 | 
116 |         self.assertTrue(sol_sch_dict.items() <= min_sch_dict.items(),
117 |                         'test_reside_sol_opt: wlkey {} rsrckey {}: '
118 |                         'solutions do not cover all optimal ones. '
119 |                         'sol {} opt {}.'
120 |                         .format(wlkey, rsrckey, sol_sch_dict, min_sch_dict))
121 | 
122 |         self.assertListEqual(
123 |             min(sol_sch_dict.values()), min(min_sch_dict.values()),
124 |             'test_reside_sol_opt: wlkey {} rsrckey {}: '
125 |             'solutions do not cover the optimal one. sol {} opt {}.'
126 |             .format(wlkey, rsrckey, sol_sch_dict, min_sch_dict))
127 | 
128 |     def test_reside_sol_opt_resource(self):
129 |         ''' Data reside solution optimal with different resources. '''
130 | 
131 |         for rsrckey in ['LG', 'SM']:
132 | 
133 |             self.test_reside_sol_opt(rsrckey=rsrckey)
134 | 
135 |     def test_reside_sol_opt_pool(self):
136 |         ''' Data reside solution optimal with PoolingLayer. '''
137 | 
138 |         with self.assertRaisesRegex(ValueError, 'loop_blocking_solver: .*'):
139 |             self.test_reside_sol_opt(wlkey='POOL')
140 | 
141 |     def test_reside_sol_opt_zero(self):
142 |         ''' Data reside solution optimal with zero size. '''
143 | 
144 |         for wlkey in ['ZERO_FIL', 'ZERO_IFM']:
145 | 
146 |             self.test_reside_sol_opt(wlkey=wlkey)
147 | 
148 |     def test_reside_sol_cnt(self):
149 |         ''' Data reside solution count. '''
150 | 
151 |         all_set = set(loop_blocking_solver.gen_loopblocking_gbuf_reside(
152 |             self.nld['BASE'], self.resource['BASE'], self.options['BYPSOL']))
153 | 
154 |         union_set = set()
155 |         reside_set_list = []
156 | 
157 |         for reside_dce in range(de.NUM):
158 | 
159 |             optkey = self.optkeys_bypsol[reside_dce]
160 | 
161 |             s = set(loop_blocking_solver.gen_loopblocking_gbuf_reside(
162 |                 self.nld['BASE'], self.resource['BASE'], self.options[optkey]))
163 | 
164 |             reside_set_list.append(s)
165 |             union_set |= s
166 | 
167 |         self.assertSetEqual(all_set, union_set)
168 |         self.assertEqual(len(union_set), sum(len(s) for s in reside_set_list))
169 | 
170 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/map_strategy_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from .test_map_strategy_fixture import TestMapStrategyFixture
18 | 
19 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/map_strategy_test/test_map_strategy.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from nn_dataflow.core import MapStrategy
18 | 
19 | from . import TestMapStrategyFixture
20 | 
21 | class TestMapStrategy(TestMapStrategyFixture):
22 |     ''' Tests for basic MapStrategy class. '''
23 | 
24 |     def setUp(self):
25 | 
26 |         super(TestMapStrategy, self).setUp()
27 | 
28 |         self.layer = self.convlayers['conv1']
29 |         self.dim_array = self.resource['BASE'].dim_array
30 | 
31 |     def test_args(self):
32 |         ''' Constructor arguments. '''
33 |         ms = MapStrategy(self.layer, 4, 1, self.dim_array)
34 | 
35 |         self.assertEqual(ms.layer, self.layer)
36 |         self.assertEqual(ms.batch_size, 4)
37 |         self.assertEqual(ms.dim_array, self.dim_array)
38 | 
39 |     def test_inv_args(self):
40 |         ''' Constructor arguments invalid. '''
41 |         with self.assertRaisesRegex(TypeError, 'MapStrategy: .*layer.*'):
42 |             _ = MapStrategy(None, 4, 1, self.dim_array)
43 | 
44 |         with self.assertRaisesRegex(ValueError, 'MapStrategy: .*occupancy.*'):
45 |             _ = MapStrategy(self.layer, 4, -.1, self.dim_array)
46 |         with self.assertRaisesRegex(ValueError, 'MapStrategy: .*occupancy.*'):
47 |             _ = MapStrategy(self.layer, 4, 1.1, self.dim_array)
48 | 
49 |         with self.assertRaisesRegex(TypeError, 'MapStrategy: .*dim_array.*'):
50 |             _ = MapStrategy(self.layer, 4, 1, None)
51 | 
52 |     def test_utilization(self):
53 |         ''' Accessor utilization. '''
54 |         ms = MapStrategy(self.layer, 4, 1, self.dim_array)
55 | 
56 |         with self.assertRaisesRegex(NotImplementedError, 'MapStrategy: .*'):
57 |             _ = ms.utilization()
58 | 
59 |     def test_gen_nested_loop_desc(self):
60 |         ''' Generator gen_nested_loop_desc. '''
61 |         ms = MapStrategy(self.layer, 4, 1, self.dim_array)
62 | 
63 |         with self.assertRaisesRegex(NotImplementedError, 'MapStrategy: .*'):
64 |             _ = ms.gen_nested_loop_desc()
65 | 
66 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/map_strategy_test/test_map_strategy_fixture.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import unittest
18 | from collections import OrderedDict
19 | 
20 | from nn_dataflow.core import ConvLayer, FCLayer, LocalRegionLayer, PoolingLayer
21 | from nn_dataflow.core import NodeRegion
22 | from nn_dataflow.core import PhyDim2
23 | from nn_dataflow.core import Resource
24 | 
25 | class TestMapStrategyFixture(unittest.TestCase):
26 |     ''' Base fixture class for MapStrategy tests. '''
27 | 
28 |     def setUp(self):
29 | 
30 |         # AlexNet.
31 |         self.convlayers = OrderedDict()
32 |         self.convlayers['conv1'] = ConvLayer(3, 96, 55, 11, 4)
33 |         self.convlayers['conv2'] = ConvLayer(48, 256, 27, 5)
34 |         self.convlayers['conv3'] = ConvLayer(256, 384, 13, 3)
35 |         self.convlayers['conv4'] = ConvLayer(192, 384, 13, 3)
36 |         self.convlayers['conv5'] = ConvLayer(192, 256, 13, 3)
37 |         self.fclayers = {}
38 |         self.fclayers['fc1'] = FCLayer(256, 4096, 6)
39 |         self.fclayers['fc2'] = FCLayer(4096, 4096)
40 |         self.fclayers['fc3'] = FCLayer(4096, 1000)
41 | 
42 |         # LocalRegionLayer.
43 |         self.lrlayers = {}
44 |         self.lrlayers['pool1'] = PoolingLayer(64, 7, 2)
45 |         self.lrlayers['pool2'] = PoolingLayer(29, 13, 3)
46 |         self.lrlayers['pool3'] = PoolingLayer(32, 7, 2, strd=3)
47 |         self.lrlayers['lr1'] = LocalRegionLayer(32, 7, nreg=5, sreg=1)
48 |         self.lrlayers['lr2'] = LocalRegionLayer(32, 7, nreg=5, sreg=1, strd=2)
49 | 
50 |         # Fake layers.
51 |         self.fake_layers = {}
52 |         # With irregular nifm/nofm.
53 |         self.fake_layers['IRR'] = ConvLayer(255, 383, 13, 3)
54 |         # With small numbers of fmaps.
55 |         self.fake_layers['SM'] = ConvLayer(5, 3, 13, 3)
56 |         # With large FIL height.
57 |         self.fake_layers['LGFIL'] = ConvLayer(64, 64, 13, 22)
58 | 
59 |         # Resource.
60 |         self.resource = {}
61 |         proc_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
62 |                                  type=NodeRegion.PROC)
63 |         data_region = NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
64 |                                  type=NodeRegion.DRAM)
65 |         # Eyeriss, ISSCC'16, JSSC'17.
66 |         self.resource['BASE'] = Resource(
67 |             proc_region=proc_region, dram_region=data_region,
68 |             src_data_region=data_region, dst_data_region=data_region,
69 |             dim_array=PhyDim2(12, 14), size_gbuf=108*1024, size_regf=520,
70 |             array_bus_width=float('inf'), dram_bandwidth=float('inf'),
71 |             no_time_mux=False)
72 | 
73 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/nns_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/nns_test/test_nns.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import unittest
18 | 
19 | from nn_dataflow.core import Network
20 | from nn_dataflow.core import InputLayer
21 | 
22 | import nn_dataflow.nns as nns
23 | 
24 | class TestNNs(unittest.TestCase):
25 |     ''' Tests for NN definitions. '''
26 | 
27 |     def test_all_networks(self):
28 |         ''' Get all_networks. '''
29 |         self.assertIn('alex_net', nns.all_networks())
30 |         self.assertIn('vgg_net', nns.all_networks())
31 |         self.assertGreater(len(nns.all_networks()), 5)
32 | 
33 |     def test_import_network(self):
34 |         ''' Get import_network. '''
35 |         for name in nns.all_networks():
36 |             network = nns.import_network(name)
37 |             self.assertIsInstance(network, Network)
38 | 
39 |     def test_import_network_invalid(self):
40 |         ''' Get import_network invalid. '''
41 |         with self.assertRaisesRegex(ImportError, 'nns: .*defined.*'):
42 |             _ = nns.import_network('aaa')
43 | 
44 |     def test_add_lstm_cell(self):
45 |         ''' Add LSTM cell. '''
46 |         net = Network('LSTM')
47 |         net.set_input_layer(InputLayer(512, 1))
48 |         c, h = nns.add_lstm_cell(net, 'cell0', 512,
49 |                                  net.INPUT_LAYER_KEY, net.INPUT_LAYER_KEY,
50 |                                  net.INPUT_LAYER_KEY)
51 |         c, h = nns.add_lstm_cell(net, 'cell1', 512,
52 |                                  net.INPUT_LAYER_KEY, c, h)
53 |         c, h = nns.add_lstm_cell(net, 'cell2', 512,
54 |                                  net.INPUT_LAYER_KEY, c, h)
55 |         num_weights = 0
56 |         for layer in net:
57 |             try:
58 |                 num_weights += net[layer].total_filter_size()
59 |             except AttributeError:
60 |                 pass
61 |         self.assertEqual(num_weights, 512 * 512 * 2 * 4 * 3)
62 | 
63 |     def test_add_lstm_cell_invalid_type(self):
64 |         ''' Add LSTM cell with invalid type. '''
65 |         with self.assertRaisesRegex(TypeError, 'add_lstm_cell: .*network.*'):
66 |             _ = nns.add_lstm_cell(InputLayer(512, 1), 'cell0', 512,
67 |                                   None, None, None)
68 | 
69 |     def test_add_lstm_cell_not_in(self):
70 |         ''' Add LSTM cell input not in. '''
71 |         net = Network('LSTM')
72 |         net.set_input_layer(InputLayer(512, 1))
73 |         with self.assertRaisesRegex(ValueError, 'add_lstm_cell: .*in.*'):
74 |             _ = nns.add_lstm_cell(net, 'cell0', 512,
75 |                                   'a', net.INPUT_LAYER_KEY,
76 |                                   net.INPUT_LAYER_KEY)
77 | 
78 |         net = Network('LSTM')
79 |         net.set_input_layer(InputLayer(512, 1))
80 |         with self.assertRaisesRegex(ValueError, 'add_lstm_cell: .*in.*'):
81 |             _ = nns.add_lstm_cell(net, 'cell0', 512,
82 |                                   net.INPUT_LAYER_KEY, 'a',
83 |                                   net.INPUT_LAYER_KEY)
84 | 
85 |         net = Network('LSTM')
86 |         net.set_input_layer(InputLayer(512, 1))
87 |         with self.assertRaisesRegex(ValueError, 'add_lstm_cell: .*in.*'):
88 |             _ = nns.add_lstm_cell(net, 'cell0', 512,
89 |                                   net.INPUT_LAYER_KEY, net.INPUT_LAYER_KEY,
90 |                                   'a')
91 | 
92 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/partition_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from .test_partition_fixture import TestPartitionFixture
18 | 
19 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/partition_test/test_partition_fixture.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import itertools
 18 | import unittest
 19 | 
 20 | from nn_dataflow.core import partition
 21 | from nn_dataflow.core import ConvLayer, FCLayer, LocalRegionLayer, PoolingLayer
 22 | from nn_dataflow.core import Option
 23 | from nn_dataflow.core import PartitionScheme
 24 | from nn_dataflow.core import ParallelEnum as pe
 25 | from nn_dataflow.core import PhyDim2
 26 | from nn_dataflow import util
 27 | 
 28 | class TestPartitionFixture(unittest.TestCase):
 29 |     ''' Base fixture class for Partition tests. '''
 30 | 
 31 |     def setUp(self):
 32 | 
 33 |         self.layers = {}
 34 |         self.layers['BASE'] = ConvLayer(64, 64, 28, 3)
 35 |         self.layers['FC'] = FCLayer(4096, 1000, 6)
 36 |         self.layers['POOL'] = PoolingLayer(32, 7, 3, strd=2)
 37 |         self.layers['LR'] = LocalRegionLayer(32, 7, nreg=5, sreg=1)
 38 |         # With irregular nifm/nofm.
 39 |         self.layers['IRR'] = ConvLayer(255, 383, 13, 3)
 40 |         # With small numbers of fmaps.
 41 |         self.layers['SM'] = ConvLayer(5, 3, 13, 3)
 42 |         # Super small networks. No partitioning schemes.
 43 |         self.layers['SSM1'] = ConvLayer(1, 1, 2, 3)
 44 |         self.layers['SSM2'] = FCLayer(2, 2)
 45 |         self.layers['SSM3'] = PoolingLayer(1, 2, 2)
 46 | 
 47 |         self.batch_size = 8
 48 | 
 49 |         self.dim_nodes = {}
 50 |         self.dim_nodes['BASE'] = PhyDim2(4, 4)
 51 |         self.dim_nodes['LG'] = PhyDim2(10, 10)
 52 |         self.dim_nodes['PRIME'] = PhyDim2(3, 3)
 53 | 
 54 |         self.options = {}
 55 |         # Irrelevant options.
 56 |         optdict = {'ntops': 10000}
 57 |         self.options['BASE'] = Option(partition_hybrid=True,
 58 |                                       partition_batch=True,
 59 |                                       partition_ifmaps=True,
 60 |                                       **optdict)
 61 |         self.options['NOBATP'] = Option(partition_hybrid=True,
 62 |                                         partition_batch=False,
 63 |                                         partition_ifmaps=True,
 64 |                                         **optdict)
 65 |         self.options['NOINPP'] = Option(partition_hybrid=True,
 66 |                                         partition_batch=True,
 67 |                                         partition_ifmaps=False,
 68 |                                         **optdict)
 69 |         self.options['NOHYB'] = Option(partition_hybrid=False,
 70 |                                        partition_batch=True,
 71 |                                        partition_ifmaps=False,
 72 |                                        **optdict)
 73 |         self.options['ACCFWD'] = Option(partition_hybrid=True,
 74 |                                         partition_batch=True,
 75 |                                         partition_ifmaps=True,
 76 |                                         hw_access_forwarding=True,
 77 |                                         **optdict)
 78 |         self.options['BUFSHR'] = Option(partition_hybrid=True,
 79 |                                         partition_batch=True,
 80 |                                         partition_ifmaps=True,
 81 |                                         hw_gbuf_sharing=True,
 82 |                                         **optdict)
 83 | 
 84 |     def _gen_partition(self, wlkey='BASE', dnkey='BASE', optkey='BASE',
 85 |                        guaranteed=False):
 86 |         ''' Generate PartitionScheme. '''
 87 |         for part in partition.gen_partition(self.layers[wlkey],
 88 |                                             self.batch_size,
 89 |                                             self.dim_nodes[dnkey],
 90 |                                             self.options[optkey],
 91 |                                             guaranteed=guaranteed):
 92 |             yield part
 93 | 
 94 |     def _gen_partition_full(self, wlkey='BASE', dnkey='BASE'):
 95 |         ''' Generate all PartitionScheme regardless of equivalence. '''
 96 | 
 97 |         layer = self.layers[wlkey]
 98 |         dim_nodes = self.dim_nodes[dnkey]
 99 | 
100 |         for ph, pw in itertools.product(util.factorize(dim_nodes.h, pe.NUM),
101 |                                         util.factorize(dim_nodes.w, pe.NUM)):
102 | 
103 |             pdims = [PhyDim2(h, w) for h, w in zip(ph, pw)]
104 | 
105 |             # BATP.
106 |             if self.batch_size % pdims[pe.BATP].size() != 0:
107 |                 continue
108 | 
109 |             # OUTP.
110 |             if not util.approx_dividable(layer.nofm, pdims[pe.OUTP].size()):
111 |                 continue
112 | 
113 |             # OFMP.
114 |             if not util.approx_dividable(layer.hofm, pdims[pe.OFMP].h) \
115 |                     or not util.approx_dividable(layer.wofm, pdims[pe.OFMP].w):
116 |                 continue
117 | 
118 |             # INPP.
119 |             if isinstance(layer, ConvLayer):
120 |                 if not util.approx_dividable(layer.nifm,
121 |                                              pdims[pe.INPP].size()):
122 |                     continue
123 |             elif isinstance(layer, LocalRegionLayer):
124 |                 if pdims[pe.INPP].size() > 1:
125 |                     continue
126 | 
127 |             # Fully utilize one dimension.
128 |             pdims_no_ofmp = pdims[:pe.OFMP] + pdims[pe.OFMP + 1:]
129 |             if any(pd.h != 1 and pd.h != dim_nodes.h
130 |                    and pd.w != 1 and pd.w != dim_nodes.w
131 |                    for pd in pdims_no_ofmp):
132 |                 continue
133 | 
134 |             for order in itertools.permutations(range(pe.NUM)):
135 | 
136 |                 # Batch parallelism should be at the top.
137 |                 filtered_order = [pae for pae in order
138 |                                   if pdims[pae].size() > 1]
139 |                 if pe.BATP in filtered_order and filtered_order[0] != pe.BATP:
140 |                     continue
141 | 
142 |                 yield PartitionScheme(order=order, pdims=pdims)
143 | 
144 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/pipeline_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | from .test_pipeline_fixture import TestPipelineFixture
18 | 
19 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/tool_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/tool_test/test_nn_dataflow_search.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import unittest
18 | 
19 | import os
20 | import subprocess
21 | 
22 | class TestNNDataflowSearch(unittest.TestCase):
23 |     ''' Tests for NN dataflow search tool. '''
24 | 
25 |     def setUp(self):
26 |         cwd = os.path.dirname(os.path.abspath(__file__))
27 |         self.cwd = os.path.join(cwd, '..', '..', '..')
28 |         self.assertTrue(os.path.isdir(self.cwd))
29 |         self.assertTrue(os.path.isdir(
30 |             os.path.join(self.cwd, 'nn_dataflow', 'tools')))
31 | 
32 |         self.args = ['python3', '-m', 'nn_dataflow.tools.nn_dataflow_search',
33 |                      'alex_net', '--batch', '1',
34 |                      '--node', '1', '1', '--array', '16', '16',
35 |                      '--regf', '512', '--gbuf', '131072']
36 | 
37 |     def test_default_invoke(self):
38 |         ''' Default invoke. '''
39 |         ret = self._call(self.args)
40 |         self.assertEqual(ret, 0)
41 | 
42 |     def test_3d_mem(self):
43 |         ''' With 3D memory. '''
44 |         ret = self._call(self.args + ['--mem-type', '3D'])
45 |         self.assertEqual(ret, 0)
46 | 
47 |     def test_no_dataflow(self):
48 |         ''' No dataflow scheme found. '''
49 |         args = self.args[:]
50 |         args[args.index('--gbuf') + 1] = '2'
51 |         args += ['--disable-bypass', 'i', 'o', 'f']
52 |         ret = self._call(args)
53 |         self.assertEqual(ret, 2)
54 | 
55 |     def _call(self, args):
56 |         with open(os.devnull, 'w') as output:
57 |             result = subprocess.call(args, cwd=self.cwd,
58 |                                      stderr=subprocess.STDOUT,
59 |                                      stdout=output)
60 | 
61 |         return result
62 | 
63 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/tool_test/test_nn_layer_stats.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import unittest
18 | 
19 | import os
20 | import subprocess
21 | 
22 | class TestNNLayerStats(unittest.TestCase):
23 |     ''' Tests for NN layer stats tool. '''
24 | 
25 |     def setUp(self):
26 |         cwd = os.path.dirname(os.path.abspath(__file__))
27 |         self.cwd = os.path.join(cwd, '..', '..', '..')
28 |         self.assertTrue(os.path.isdir(self.cwd))
29 |         self.assertTrue(os.path.isdir(
30 |             os.path.join(self.cwd, 'nn_dataflow', 'tools')))
31 | 
32 |         self.args = ['python3', '-m', 'nn_dataflow.tools.nn_layer_stats',
33 |                      'alex_net', '-b', '16']
34 | 
35 |     def test_default_invoke(self):
36 |         ''' Default invoke. '''
37 |         ret = self._call(self.args)
38 |         self.assertEqual(ret, 0)
39 | 
40 |     def _call(self, args):
41 |         with open(os.devnull, 'w') as output:
42 |             result = subprocess.call(args, cwd=self.cwd,
43 |                                      stderr=subprocess.STDOUT,
44 |                                      stdout=output)
45 |         return result
46 | 
47 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_cost.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import unittest
 18 | 
 19 | from nn_dataflow.core import Cost
 20 | from nn_dataflow.core import MemHierEnum as me
 21 | 
 22 | class TestCost(unittest.TestCase):
 23 |     ''' Tests for Cost. '''
 24 | 
 25 |     def test_valid_args(self):
 26 |         ''' Valid arguments. '''
 27 |         cost = Cost(mac_op=1,
 28 |                     mem_hier=(200, 6, 2, 1),
 29 |                     noc_hop=10,
 30 |                     idl_unit=0,
 31 |                    )
 32 |         self.assertEqual(cost.mac_op, 1, 'mac_op')
 33 |         self.assertEqual(cost.mem_hier, (200, 6, 2, 1), 'mem_hier')
 34 |         self.assertEqual(cost.noc_hop, 10, 'noc_hop')
 35 |         self.assertEqual(cost.idl_unit, 0, 'idl_unit')
 36 | 
 37 |     def test_invalid_mac_op(self):
 38 |         ''' Invalid mac_op. '''
 39 |         with self.assertRaisesRegex(TypeError, 'Cost: .*mac_op.*'):
 40 |             _ = Cost(mac_op=(1, 2),
 41 |                      mem_hier=(200, 6, 2, 1),
 42 |                      noc_hop=10,
 43 |                      idl_unit=0,
 44 |                     )
 45 | 
 46 |     def test_invalid_mem_hier_type(self):
 47 |         ''' Invalid mem_hier type. '''
 48 |         with self.assertRaisesRegex(TypeError, 'Cost: .*mem_hier.*'):
 49 |             _ = Cost(mac_op=1,
 50 |                      mem_hier=200,
 51 |                      noc_hop=10,
 52 |                      idl_unit=0,
 53 |                     )
 54 |         with self.assertRaisesRegex(TypeError, 'Cost: .*mem_hier.*'):
 55 |             _ = Cost(mac_op=1,
 56 |                      mem_hier=[200, 6, 2, 1],
 57 |                      noc_hop=10,
 58 |                      idl_unit=0,
 59 |                     )
 60 | 
 61 |     def test_invalid_mem_hier_len(self):
 62 |         ''' Invalid mem_hier len. '''
 63 |         with self.assertRaisesRegex(ValueError, 'Cost: .*mem_hier.*'):
 64 |             _ = Cost(mac_op=1,
 65 |                      mem_hier=(200, 6),
 66 |                      noc_hop=10,
 67 |                      idl_unit=0,
 68 |                     )
 69 | 
 70 |     def test_invalid_noc_hop(self):
 71 |         ''' Invalid noc_hop. '''
 72 |         with self.assertRaisesRegex(TypeError, 'Cost: .*noc_hop.*'):
 73 |             _ = Cost(mac_op=1,
 74 |                      mem_hier=(200, 6, 2, 1),
 75 |                      noc_hop=[10, 10],
 76 |                      idl_unit=0,
 77 |                     )
 78 | 
 79 |     def test_invalid_idl_unit(self):
 80 |         ''' Invalid idl_unit. '''
 81 |         with self.assertRaisesRegex(TypeError, 'Cost: .*idl_unit.*'):
 82 |             _ = Cost(mac_op=1,
 83 |                      mem_hier=(200, 6, 2, 1),
 84 |                      noc_hop=10,
 85 |                      idl_unit=set([1, 2]),
 86 |                     )
 87 | 
 88 |     def test_mem_hier_at(self):
 89 |         ''' Accessor mem_hier. '''
 90 |         cost = Cost(mac_op=1,
 91 |                     mem_hier=(200, 6, 2, 1),
 92 |                     noc_hop=10,
 93 |                     idl_unit=0,
 94 |                    )
 95 |         self.assertEqual(cost.mem_hier_at(me.DRAM), 200, 'mem_hier: DRAM')
 96 |         self.assertEqual(cost.mem_hier_at(me.GBUF), 6, 'mem_hier: GBUF')
 97 |         self.assertEqual(cost.mem_hier_at(me.ITCN), 2, 'mem_hier: ITCN')
 98 |         self.assertEqual(cost.mem_hier_at(me.REGF), 1, 'mem_hier: REGF')
 99 | 
100 |     def test_mem_hier_at_error(self):
101 |         ''' Accessor mem_hier error. '''
102 |         cost = Cost(mac_op=1,
103 |                     mem_hier=(200, 6, 2, 1),
104 |                     noc_hop=10,
105 |                     idl_unit=0,
106 |                    )
107 |         self.assertIsNone(cost.mem_hier_at(me.NUM))
108 |         self.assertIsNone(cost.mem_hier_at(None))
109 | 
110 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_data_dim_loops.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import itertools
 18 | import unittest
 19 | 
 20 | from nn_dataflow.core import DataDimLoops
 21 | from nn_dataflow.core import LoopEnum as le
 22 | 
 23 | class TestDataDimLoops(unittest.TestCase):
 24 |     ''' Tests for DataDimLoops. '''
 25 | 
 26 |     def test_valid_args(self):
 27 |         ''' Valid arguments. '''
 28 |         ddls = DataDimLoops(le.IFM, le.OFM)
 29 |         self.assertTupleEqual(ddls.loops(), (le.IFM, le.OFM))
 30 | 
 31 |         ddls = DataDimLoops(le.BAT, le.IFM, le.OFM)
 32 |         self.assertTupleEqual(ddls.loops(), (le.IFM, le.OFM, le.BAT))
 33 | 
 34 |     def test_valid_repeated_args(self):
 35 |         ''' Valid repeated arguments. '''
 36 |         ddls = DataDimLoops(le.IFM, le.OFM, le.IFM, le.IFM)
 37 |         self.assertTupleEqual(ddls.loops(), (le.IFM, le.OFM))
 38 | 
 39 |         ddls = DataDimLoops(*([le.BAT] * 10))
 40 |         self.assertTupleEqual(ddls.loops(), (le.BAT,))
 41 | 
 42 |     def test_invalid_args(self):
 43 |         ''' Invalid arguments. '''
 44 |         with self.assertRaisesRegex(ValueError,
 45 |                                     'DataDimLoops: .*LoopEnum.*'):
 46 |             _ = DataDimLoops(le.NUM + 1)
 47 | 
 48 |         with self.assertRaisesRegex(ValueError,
 49 |                                     'DataDimLoops: .*LoopEnum.*'):
 50 |             _ = DataDimLoops(le.IFM, le.NUM)
 51 | 
 52 |     def test_loops(self):
 53 |         ''' Get loops. '''
 54 |         for loops in self._gen_loop_combs():
 55 |             ddls = DataDimLoops(*loops)
 56 |             self.assertTupleEqual(ddls.loops(), loops)
 57 | 
 58 |     def test_take(self):
 59 |         ''' take. '''
 60 |         lst = [str(lpe) for lpe in range(le.NUM)]
 61 | 
 62 |         for loops in self._gen_loop_combs():
 63 |             ddls = DataDimLoops(*loops)
 64 |             sublst = ddls.take(lst)
 65 | 
 66 |             self.assertEqual(len(sublst), len(loops))
 67 |             self.assertListEqual(sublst, [str(lpe) for lpe in loops])
 68 | 
 69 |     def test_drop(self):
 70 |         ''' drop. '''
 71 |         lst = [str(lpe) for lpe in range(le.NUM)]
 72 | 
 73 |         for loops in self._gen_loop_combs():
 74 |             ddls = DataDimLoops(*loops)
 75 |             sublst = ddls.drop(lst)
 76 | 
 77 |             self.assertEqual(len(sublst), le.NUM - len(loops))
 78 | 
 79 |     def test_take_and_drop(self):
 80 |         ''' take and drop. '''
 81 |         lst = [str(lpe) for lpe in range(le.NUM)]
 82 | 
 83 |         for loops in self._gen_loop_combs():
 84 |             ddls = DataDimLoops(*loops)
 85 |             takelst = ddls.take(lst)
 86 |             droplst = ddls.drop(lst)
 87 | 
 88 |             self.assertEqual(len(takelst) + len(droplst), le.NUM)
 89 |             self.assertTrue(set(takelst).isdisjoint(set(droplst)))
 90 |             self.assertSetEqual(set(takelst) | set(droplst), set(lst))
 91 | 
 92 |     def test_repr(self):
 93 |         ''' __repr__. '''
 94 |         # pylint: disable=eval-used
 95 |         for loops in self._gen_loop_combs():
 96 |             ddls = DataDimLoops(*loops)
 97 |             self.assertEqual(eval(repr(ddls)), ddls)
 98 | 
 99 |     @staticmethod
100 |     def _gen_loop_combs():
101 |         ''' Generate all combinations of LoopEnum with all lengths. '''
102 |         for num in range(1, le.NUM + 1):
103 |             for comb in itertools.combinations(range(le.NUM), num):
104 |                 yield comb
105 | 
106 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_int_range.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import unittest
 18 | 
 19 | from nn_dataflow.core import IntRange
 20 | 
 21 | class TestIntRange(unittest.TestCase):
 22 |     ''' Tests for IntRange. '''
 23 | 
 24 |     def test_valid_args(self):
 25 |         ''' Valid arguments. '''
 26 |         ir1 = IntRange(1, 7)
 27 |         self.assertEqual(ir1.beg, 1)
 28 |         self.assertEqual(ir1.end, 7)
 29 |         ir2 = IntRange(-3, 0)
 30 |         self.assertEqual(ir2.beg, -3)
 31 |         self.assertEqual(ir2.end, 0)
 32 |         ir3 = IntRange(4, 4)
 33 |         self.assertEqual(ir3.beg, 4)
 34 |         self.assertEqual(ir3.end, 4)
 35 | 
 36 |     def test_invalid_args(self):
 37 |         ''' Invalid arguments. '''
 38 |         with self.assertRaisesRegex(TypeError, 'IntRange: .*beg.*'):
 39 |             _ = IntRange(7.2, 3)
 40 |         with self.assertRaisesRegex(TypeError, 'IntRange: .*end.*'):
 41 |             _ = IntRange(7, None)
 42 | 
 43 |         with self.assertRaisesRegex(ValueError, 'IntRange: .*beg.*end.*'):
 44 |             _ = IntRange(7, 3)
 45 |         with self.assertRaisesRegex(ValueError, 'IntRange: .*beg.*end.*'):
 46 |             _ = IntRange(-3, -7)
 47 | 
 48 |     def test_size(self):
 49 |         ''' Get size. '''
 50 |         ir1 = IntRange(1, 7)
 51 |         self.assertEqual(ir1.size(), 6)
 52 |         ir2 = IntRange(-3, 0)
 53 |         self.assertEqual(ir2.size(), 3)
 54 |         ir3 = IntRange(4, 4)
 55 |         self.assertEqual(ir3.size(), 0)
 56 | 
 57 |     def test_empty(self):
 58 |         ''' Get empty. '''
 59 |         ir1 = IntRange(1, 7)
 60 |         self.assertFalse(ir1.empty())
 61 |         ir2 = IntRange(-3, 0)
 62 |         self.assertFalse(ir2.empty())
 63 |         ir3 = IntRange(4, 4)
 64 |         self.assertTrue(ir3.empty())
 65 | 
 66 |     def test_range(self):
 67 |         ''' Get range. '''
 68 |         ir1 = IntRange(1, 7)
 69 |         self.assertEqual(len(set(ir1.range())), ir1.size())
 70 |         ir2 = IntRange(-3, 0)
 71 |         self.assertListEqual(list(ir2.range()), [-3, -2, -1])
 72 |         ir3 = IntRange(4, 4)
 73 |         self.assertEqual(len(list(ir3.range())), 0)
 74 | 
 75 |     def test_overlap(self):
 76 |         ''' Get overlap. '''
 77 |         ir1 = IntRange(-11, 5)
 78 |         ir2 = IntRange(3, 8)
 79 |         ir_ovlp = ir1.overlap(ir2)
 80 |         self.assertEqual(ir_ovlp, IntRange(3, 5))
 81 |         self.assertEqual(ir1.overlap(ir2), ir2.overlap(ir1))
 82 | 
 83 |         ir3 = IntRange(-3, 3)
 84 |         ir_ovlp = ir1.overlap(ir3)
 85 |         self.assertEqual(ir_ovlp, IntRange(-3, 3))
 86 | 
 87 |         ir4 = IntRange(8, 10)
 88 |         ir_ovlp = ir1.overlap(ir4)
 89 |         self.assertTrue(ir_ovlp.empty())
 90 | 
 91 |     def test_overlap_error(self):
 92 |         ''' Get overlap error. '''
 93 |         ir = IntRange(-11, 5)
 94 |         with self.assertRaisesRegex(TypeError, 'IntRange: .*'):
 95 |             ir.overlap((0, 1))
 96 | 
 97 |     def test_offset(self):
 98 |         ''' Get offset. '''
 99 |         ir1 = IntRange(1, 7)
100 |         self.assertEqual(ir1.offset(3), IntRange(4, 10))
101 |         ir2 = IntRange(-3, 0)
102 |         self.assertEqual(ir2.offset(-2), IntRange(-5, -2))
103 | 
104 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_option.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import unittest
 18 | 
 19 | from nn_dataflow.core import Option
 20 | 
 21 | class TestOption(unittest.TestCase):
 22 |     ''' Tests for Option. '''
 23 | 
 24 |     def test_valid_kwargs(self):
 25 |         ''' Valid keyword arguments. '''
 26 |         options = Option(sw_gbuf_bypass=(False, False, False),
 27 |                          sw_solve_loopblocking=False,
 28 |                          hw_access_forwarding=False,
 29 |                          hw_gbuf_sharing=False,
 30 |                          partition_hybrid=True,
 31 |                          partition_batch=False,
 32 |                          partition_ifmaps=False,
 33 |                          partition_interlayer=False,
 34 |                          opt_goal='ed',
 35 |                          ntops=10,
 36 |                          nprocesses=16,
 37 |                          verbose=False
 38 |                         )
 39 |         self.assertEqual(options.sw_gbuf_bypass, (False, False, False),
 40 |                          'sw_gbuf_bypass')
 41 |         self.assertEqual(options.sw_solve_loopblocking, False,
 42 |                          'sw_solve_loopblocking')
 43 |         self.assertEqual(options.hw_access_forwarding, False,
 44 |                          'hw_access_forwarding')
 45 |         self.assertEqual(options.hw_gbuf_sharing, False,
 46 |                          'hw_gbuf_sharing')
 47 |         self.assertEqual(options.partition_hybrid, True,
 48 |                          'partition_hybrid')
 49 |         self.assertEqual(options.partition_batch, False,
 50 |                          'partition_batch')
 51 |         self.assertEqual(options.partition_ifmaps, False,
 52 |                          'partition_ifmaps')
 53 |         self.assertEqual(options.partition_interlayer, False,
 54 |                          'partition_interlayer')
 55 |         self.assertEqual(options.opt_goal, 'ed', 'opt_goal')
 56 |         self.assertEqual(options.ntops, 10, 'ntops')
 57 |         self.assertEqual(options.nprocesses, 16, 'nprocesses')
 58 |         self.assertEqual(options.verbose, False, 'verbose')
 59 | 
 60 |     def test_valid_args(self):
 61 |         ''' Valid arguments. '''
 62 |         options = Option((False, True, False), True)
 63 |         self.assertEqual(options.sw_gbuf_bypass, (False, True, False),
 64 |                          'sw_gbuf_bypass')
 65 |         self.assertEqual(options.sw_solve_loopblocking, True,
 66 |                          'sw_solve_loopblocking')
 67 | 
 68 |     def test_default_args(self):
 69 |         ''' Default arguments. '''
 70 |         options = Option()
 71 |         self.assertTupleEqual(options.sw_gbuf_bypass, (False, False, False))
 72 |         self.assertEqual(options.sw_solve_loopblocking, False)
 73 |         self.assertEqual(options.partition_hybrid, False)
 74 |         self.assertEqual(options.partition_batch, False)
 75 |         self.assertEqual(options.partition_ifmaps, False)
 76 |         self.assertEqual(options.opt_goal, 'e')
 77 |         self.assertEqual(options.ntops, 1)
 78 |         self.assertEqual(options.nprocesses, 1)
 79 |         self.assertEqual(options.verbose, False)
 80 | 
 81 |     def test_invalid_args(self):
 82 |         ''' Invalid args. '''
 83 |         with self.assertRaisesRegex(TypeError, 'Option: .*at most.*100'):
 84 |             _ = Option(*[None] * 100)
 85 | 
 86 |     def test_invalid_kwargs(self):
 87 |         ''' Invalid kwargs. '''
 88 |         with self.assertRaisesRegex(TypeError, 'Option: .*bad.*'):
 89 |             _ = Option(bad='')
 90 | 
 91 |     def test_invalid_both_args_kwargs(self):
 92 |         ''' Invalid both args and kwargs are given. '''
 93 |         with self.assertRaisesRegex(TypeError, 'Option: .*sw_gbuf_bypass.*'):
 94 |             _ = Option((False,) * 3, sw_gbuf_bypass=(False,) * 3)
 95 | 
 96 |     def test_invalid_swgbyp_type(self):
 97 |         ''' Invalid sw_gbuf_bypass type. '''
 98 |         with self.assertRaisesRegex(TypeError, 'Option: .*sw_gbuf_bypass.*'):
 99 |             _ = Option(sw_gbuf_bypass=[False, False, False])
100 | 
101 |     def test_invalid_swgbyp_len(self):
102 |         ''' Invalid sw_gbuf_bypass len. '''
103 |         with self.assertRaisesRegex(ValueError, 'Option: .*sw_gbuf_bypass.*'):
104 |             _ = Option(sw_gbuf_bypass=(False, False))
105 | 
106 |     def test_invalid_swsol_hwbufshr(self):
107 |         ''' Invalid sw_solve_loopblocking and hw_gbuf_sharing comb. '''
108 |         with self.assertRaisesRegex(ValueError,
109 |                                     'Option: .*sw_solve_loopblocking.*'
110 |                                     'hw_gbuf_sharing.*'):
111 |             _ = Option(sw_solve_loopblocking=True, hw_gbuf_sharing=True)
112 | 
113 |     def test_invalid_hwaccfwd_hwbufshr(self):
114 |         ''' Invalid hw_access_forwarding and hw_gbuf_sharing comb. '''
115 |         with self.assertRaisesRegex(ValueError,
116 |                                     'Option: .*hw_access_forwarding.*'
117 |                                     'hw_gbuf_sharing.*'):
118 |             _ = Option(hw_access_forwarding=True, hw_gbuf_sharing=True)
119 | 
120 |     def test_invalid_swsol_hwswb(self):
121 |         ''' Invalid sw_solve_loopblocking and hw_gbuf_save_writeback comb. '''
122 |         with self.assertRaisesRegex(ValueError,
123 |                                     'Option: .*sw_solve_loopblocking.*'
124 |                                     'hw_gbuf_save_writeback.*'):
125 |             _ = Option(sw_solve_loopblocking=True, hw_gbuf_save_writeback=True)
126 | 
127 |     def test_invalid_part_hybrid_ifmaps(self):
128 |         ''' Invalid partition_hybrid and partition_ifmaps comb. '''
129 |         with self.assertRaisesRegex(ValueError,
130 |                                     'Option: .*partition_ifmaps.*'
131 |                                     'partition_hybrid.*'):
132 |             _ = Option(partition_hybrid=False, partition_ifmaps=True)
133 | 
134 |     def test_invalid_time_ovhd(self):
135 |         ''' Invalid layer_pipeline_time_ovhd. '''
136 |         with self.assertRaisesRegex(KeyError,
137 |                                     'Option: .*layer_pipeline_time_ovhd.*'):
138 |             _ = Option(layer_pipeline_time_ovhd=None)
139 | 
140 |         with self.assertRaisesRegex(ValueError,
141 |                                     'Option: .*layer_pipeline_time_ovhd.*'):
142 |             _ = Option(layer_pipeline_time_ovhd=-1)
143 | 
144 |     def test_invalid_max_degree(self):
145 |         ''' Invalid layer_pipeline_max_degree. '''
146 |         with self.assertRaisesRegex(KeyError,
147 |                                     'Option: .*layer_pipeline_max_degree.*'):
148 |             _ = Option(layer_pipeline_max_degree=None)
149 | 
150 |         with self.assertRaisesRegex(ValueError,
151 |                                     'Option: .*layer_pipeline_max_degree.*'):
152 |             _ = Option(layer_pipeline_max_degree=-1)
153 | 
154 |     def test_invalid_opt_goal(self):
155 |         ''' Invalid opt_goal. '''
156 |         with self.assertRaisesRegex(ValueError, 'Option: .*opt_goal.*'):
157 |             _ = Option(opt_goal='o')
158 |         with self.assertRaisesRegex(ValueError, 'Option: .*opt_goal.*'):
159 |             _ = Option(opt_goal='E')
160 | 
161 |     def test_option_list(self):
162 |         ''' Accessor option_list. '''
163 |         options = Option()
164 |         self.assertCountEqual(options.option_list(), options._fields)
165 | 
166 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_phy_dim2.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import unittest
18 | 
19 | from nn_dataflow.core import PhyDim2
20 | 
21 | class TestPhyDim2(unittest.TestCase):
22 |     ''' Tests for PhyDim2. '''
23 | 
24 |     def test_valid_args(self):
25 |         ''' Valid arguments. '''
26 |         dim = PhyDim2(14, 12)
27 |         self.assertEqual(dim.h, 14, 'h')
28 |         self.assertEqual(dim.w, 12, 'w')
29 | 
30 |     def test_size(self):
31 |         ''' Get size. '''
32 |         dim = PhyDim2(14, 12)
33 |         self.assertEqual(dim.size(), 14 * 12, 'size')
34 | 
35 |     def test_add(self):
36 |         ''' Operation add. '''
37 |         dim1 = PhyDim2(14, 12)
38 |         dim2 = PhyDim2(5, 3)
39 |         self.assertTupleEqual(dim1 + dim2, (19, 15), 'add')
40 |         self.assertTupleEqual(dim1 + 3, (17, 15), 'add')
41 | 
42 |     def test_sub(self):
43 |         ''' Operation sub. '''
44 |         dim1 = PhyDim2(14, 12)
45 |         dim2 = PhyDim2(5, 3)
46 |         self.assertTupleEqual(dim1 - dim2, (9, 9), 'sub')
47 |         self.assertTupleEqual(dim1 - 3, (11, 9), 'sub')
48 | 
49 |     def test_neg(self):
50 |         ''' Operation neg. '''
51 |         dim1 = PhyDim2(14, 12)
52 |         dim2 = PhyDim2(5, 3)
53 |         self.assertTupleEqual(-dim1, (-14, -12), 'neg')
54 |         self.assertTupleEqual(-dim2, (-5, -3), 'neg')
55 | 
56 |     def test_mul(self):
57 |         ''' Operation mul. '''
58 |         dim1 = PhyDim2(14, 12)
59 |         dim2 = PhyDim2(5, 3)
60 |         self.assertTupleEqual(dim1 * dim2, (70, 36), 'mul')
61 |         self.assertTupleEqual(dim1 * 2, (28, 24), 'mul')
62 |         self.assertTupleEqual(2 * dim1, (28, 24), 'rmul')
63 | 
64 |     def test_hop_dist(self):
65 |         ''' Get hop distance. '''
66 |         dim1 = PhyDim2(14, 12)
67 |         dim2 = PhyDim2(5, 20)
68 |         self.assertEqual(dim1.hop_dist(dim2), 9 + 8, 'hop_dist')
69 |         self.assertEqual(dim2.hop_dist(dim1), 9 + 8, 'hop_dist')
70 | 
71 |     def test_hop_dist_error(self):
72 |         ''' Get hop distance. '''
73 |         dim1 = PhyDim2(14, 12)
74 |         with self.assertRaisesRegex(TypeError, 'hop_dist'):
75 |             _ = dim1.hop_dist((5, 20))
76 | 
77 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_scheduling_condition.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import unittest
 18 | 
 19 | from nn_dataflow.core import DataLayout
 20 | from nn_dataflow.core import FmapRange
 21 | from nn_dataflow.core import NodeRegion
 22 | from nn_dataflow.core import ParallelEnum as pe
 23 | from nn_dataflow.core import PartitionScheme
 24 | from nn_dataflow.core import PhyDim2
 25 | from nn_dataflow.core import Resource
 26 | from nn_dataflow.core import SchedulingCondition
 27 | from nn_dataflow.core import SchedulingConstraint
 28 | 
 29 | class TestSchedulingCondition(unittest.TestCase):
 30 |     ''' Tests for SchedulingCondition. '''
 31 | 
 32 |     def setUp(self):
 33 | 
 34 |         self.resource = Resource(
 35 |             proc_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
 36 |                                    type=NodeRegion.PROC),
 37 |             dram_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
 38 |                                    type=NodeRegion.DRAM),
 39 |             src_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
 40 |                                        type=NodeRegion.DRAM),
 41 |             dst_data_region=NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
 42 |                                        type=NodeRegion.DRAM),
 43 |             dim_array=PhyDim2(16, 16), size_gbuf=65536, size_regf=64,
 44 |             array_bus_width=float('inf'), dram_bandwidth=float('inf'),
 45 |             no_time_mux=False)
 46 | 
 47 |         self.none_cstr = SchedulingConstraint()
 48 | 
 49 |         part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
 50 |         self.ifmap_layout = DataLayout(frngs=(FmapRange((0, 0, 0, 0),
 51 |                                                         (2, 4, 16, 16)),),
 52 |                                        regions=(self.resource.src_data_region,),
 53 |                                        parts=(part,))
 54 | 
 55 |         self.sched_seq = (2, 0, 0)
 56 | 
 57 |     def test_valid_args(self):
 58 |         ''' Valid arguments. '''
 59 |         condition = SchedulingCondition(resource=self.resource,
 60 |                                         constraint=self.none_cstr,
 61 |                                         ifmap_layout=self.ifmap_layout,
 62 |                                         sched_seq=self.sched_seq)
 63 |         self.assertEqual(condition.resource, self.resource)
 64 |         self.assertEqual(condition.constraint, self.none_cstr)
 65 |         self.assertEqual(condition.ifmap_layout, self.ifmap_layout)
 66 |         self.assertTupleEqual(condition.sched_seq, self.sched_seq)
 67 | 
 68 |     def test_invalid_resource(self):
 69 |         ''' Invalid resource. '''
 70 |         with self.assertRaisesRegex(TypeError,
 71 |                                     'SchedulingCondition: .*resource.*'):
 72 |             _ = SchedulingCondition(resource=None,
 73 |                                     constraint=self.none_cstr,
 74 |                                     ifmap_layout=self.ifmap_layout,
 75 |                                     sched_seq=self.sched_seq)
 76 | 
 77 |     def test_invalid_constraint(self):
 78 |         ''' Invalid constraint. '''
 79 |         with self.assertRaisesRegex(TypeError,
 80 |                                     'SchedulingCondition: .*constraint.*'):
 81 |             _ = SchedulingCondition(resource=self.resource,
 82 |                                     constraint=None,
 83 |                                     ifmap_layout=self.ifmap_layout,
 84 |                                     sched_seq=self.sched_seq)
 85 | 
 86 |     def test_invalid_ifmap_layout(self):
 87 |         ''' Invalid ifmap_layout. '''
 88 |         with self.assertRaisesRegex(TypeError,
 89 |                                     'SchedulingCondition: .*ifmap_layout.*'):
 90 |             _ = SchedulingCondition(resource=self.resource,
 91 |                                     constraint=self.none_cstr,
 92 |                                     ifmap_layout=None,
 93 |                                     sched_seq=self.sched_seq)
 94 | 
 95 |     def test_invalid_sched_seq(self):
 96 |         ''' Invalid sched_seq. '''
 97 |         with self.assertRaisesRegex(TypeError,
 98 |                                     'SchedulingCondition: .*sched_seq.*'):
 99 |             _ = SchedulingCondition(resource=self.resource,
100 |                                     constraint=self.none_cstr,
101 |                                     ifmap_layout=self.ifmap_layout,
102 |                                     sched_seq=list(self.sched_seq))
103 | 
104 |         with self.assertRaisesRegex(ValueError,
105 |                                     'SchedulingCondition: .*sched_seq.*'):
106 |             _ = SchedulingCondition(resource=self.resource,
107 |                                     constraint=self.none_cstr,
108 |                                     ifmap_layout=self.ifmap_layout,
109 |                                     sched_seq=self.sched_seq[:-1])
110 | 
111 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_scheduling_result.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import unittest
 18 | from collections import OrderedDict
 19 | 
 20 | from nn_dataflow.core import DataLayout
 21 | from nn_dataflow.core import FmapRange
 22 | from nn_dataflow.core import NodeRegion
 23 | from nn_dataflow.core import ParallelEnum as pe
 24 | from nn_dataflow.core import PartitionScheme
 25 | from nn_dataflow.core import PhyDim2
 26 | from nn_dataflow.core import SchedulingResult
 27 | 
 28 | class TestSchedulingResult(unittest.TestCase):
 29 |     ''' Tests for SchedulingResult. '''
 30 | 
 31 |     def setUp(self):
 32 | 
 33 |         self.scheme = OrderedDict([('cost', 9.876 + 1.234),
 34 |                                    ('time', 123.4),
 35 |                                    ('ops', 1234),
 36 |                                    ('num_nodes', 4),
 37 |                                    ('cost_op', 1.234),
 38 |                                    ('cost_access', 9.876),
 39 |                                    ('cost_noc', 0),
 40 |                                    ('cost_static', 0),
 41 |                                    ('proc_time', 59),
 42 |                                    ('bus_time', 40),
 43 |                                    ('dram_time', 120),
 44 |                                    ('access', [[2, 3, 4],
 45 |                                                [30, 40, 50],
 46 |                                                [400, 500, 600],
 47 |                                                [5000, 6000, 7000]]),
 48 |                                    ('remote_gbuf_access', [0, 0, 0]),
 49 |                                    ('total_nhops', [123, 456, 789]),
 50 |                                    ('fetch', [[1, 2, 1], [3, 4, 5]]),
 51 |                                   ])
 52 | 
 53 |         part = PartitionScheme(order=range(pe.NUM), pdims=[(1, 1)] * pe.NUM)
 54 |         self.ofmap_layout = DataLayout(
 55 |             frngs=(FmapRange((0, 0, 0, 0), (2, 4, 16, 16)),),
 56 |             regions=(NodeRegion(origin=PhyDim2(0, 0), dim=PhyDim2(1, 1),
 57 |                                 type=NodeRegion.DRAM),),
 58 |             parts=(part,))
 59 | 
 60 |         self.sched_seq = (2, 0, 0)
 61 | 
 62 |     def test_valid_args(self):
 63 |         ''' Valid arguments. '''
 64 |         result = SchedulingResult(scheme=self.scheme,
 65 |                                   ofmap_layout=self.ofmap_layout,
 66 |                                   sched_seq=self.sched_seq)
 67 |         self.assertIn('ops', result.scheme)
 68 |         self.assertIn('total_nhops', result.scheme)
 69 |         self.assertEqual(result.ofmap_layout, self.ofmap_layout)
 70 |         self.assertTupleEqual(result.sched_seq, self.sched_seq)
 71 | 
 72 |     def test_invalid_scheme(self):
 73 |         ''' Invalid scheme. '''
 74 |         with self.assertRaisesRegex(TypeError,
 75 |                                     'SchedulingResult: .*scheme.*'):
 76 |             _ = SchedulingResult(scheme={},
 77 |                                  ofmap_layout=self.ofmap_layout,
 78 |                                  sched_seq=self.sched_seq)
 79 | 
 80 |     def test_invalid_ofmap_layout(self):
 81 |         ''' Invalid ofmap_layout. '''
 82 |         with self.assertRaisesRegex(TypeError,
 83 |                                     'SchedulingResult: .*ofmap_layout.*'):
 84 |             _ = SchedulingResult(scheme=self.scheme,
 85 |                                  ofmap_layout=None,
 86 |                                  sched_seq=self.sched_seq)
 87 | 
 88 |     def test_invalid_sched_seq(self):
 89 |         ''' Invalid sched_seq. '''
 90 |         with self.assertRaisesRegex(TypeError,
 91 |                                     'SchedulingResult: .*sched_seq.*'):
 92 |             _ = SchedulingResult(scheme=self.scheme,
 93 |                                  ofmap_layout=self.ofmap_layout,
 94 |                                  sched_seq=list(self.sched_seq))
 95 | 
 96 |         with self.assertRaisesRegex(ValueError,
 97 |                                     'SchedulingResult: .*sched_seq.*'):
 98 |             _ = SchedulingResult(scheme=self.scheme,
 99 |                                  ofmap_layout=self.ofmap_layout,
100 |                                  sched_seq=self.sched_seq[:-1])
101 | 
102 |     def test_total_cost(self):
103 |         ''' Accessor total_cost. '''
104 |         result = SchedulingResult(scheme=self.scheme,
105 |                                   ofmap_layout=self.ofmap_layout,
106 |                                   sched_seq=self.sched_seq)
107 |         self.assertAlmostEqual(result.total_cost, 1.234 + 9.876)
108 | 
109 |     def test_total_time(self):
110 |         ''' Accessor total_time. '''
111 |         result = SchedulingResult(scheme=self.scheme,
112 |                                   ofmap_layout=self.ofmap_layout,
113 |                                   sched_seq=self.sched_seq)
114 |         self.assertAlmostEqual(result.total_time, 123.4)
115 | 
116 |         self.assertGreaterEqual(result.total_time, result.total_node_time)
117 |         self.assertGreaterEqual(result.total_time, result.total_dram_time)
118 | 
119 |     def test_total_node_time(self):
120 |         ''' Accessor total_node_time. '''
121 |         result = SchedulingResult(scheme=self.scheme,
122 |                                   ofmap_layout=self.ofmap_layout,
123 |                                   sched_seq=self.sched_seq)
124 |         self.assertAlmostEqual(result.total_node_time, max(59, 40))
125 | 
126 |         scheme = self.scheme
127 |         scheme['bus_time'] = 100
128 |         result = SchedulingResult(scheme=scheme,
129 |                                   ofmap_layout=self.ofmap_layout,
130 |                                   sched_seq=self.sched_seq)
131 |         self.assertAlmostEqual(result.total_node_time, max(59, 100))
132 | 
133 |     def test_total_dram_time(self):
134 |         ''' Accessor total_dram_time. '''
135 |         result = SchedulingResult(scheme=self.scheme,
136 |                                   ofmap_layout=self.ofmap_layout,
137 |                                   sched_seq=self.sched_seq)
138 |         self.assertAlmostEqual(result.total_dram_time, 120)
139 | 
140 |     def test_total_proc_time(self):
141 |         ''' Accessor total_proc_time. '''
142 |         result = SchedulingResult(scheme=self.scheme,
143 |                                   ofmap_layout=self.ofmap_layout,
144 |                                   sched_seq=self.sched_seq)
145 |         self.assertAlmostEqual(result.total_proc_time, 59)
146 | 
147 |         scheme = self.scheme
148 |         scheme['bus_time'] = 100
149 |         result = SchedulingResult(scheme=scheme,
150 |                                   ofmap_layout=self.ofmap_layout,
151 |                                   sched_seq=self.sched_seq)
152 |         self.assertAlmostEqual(result.total_proc_time, 59)
153 | 
154 |     def test_total_ops(self):
155 |         ''' Accessor total_ops. '''
156 |         result = SchedulingResult(scheme=self.scheme,
157 |                                   ofmap_layout=self.ofmap_layout,
158 |                                   sched_seq=self.sched_seq)
159 |         self.assertEqual(result.total_ops, 1234)
160 | 
161 |     def test_total_accesses(self):
162 |         ''' Accessor total_cost. '''
163 |         result = SchedulingResult(scheme=self.scheme,
164 |                                   ofmap_layout=self.ofmap_layout,
165 |                                   sched_seq=self.sched_seq)
166 |         self.assertSequenceEqual(result.total_accesses,
167 |                                  [9, 120, 1500, 18000])
168 | 
169 |     def test_total_accesses_rgbuf(self):
170 |         ''' Accessor total_accesses remote gbuf. '''
171 |         scheme = self.scheme.copy()
172 |         scheme['remote_gbuf_access'] = [10, 20, 30]
173 |         result = SchedulingResult(scheme=scheme,
174 |                                   ofmap_layout=self.ofmap_layout,
175 |                                   sched_seq=self.sched_seq)
176 |         self.assertSequenceEqual(result.total_accesses,
177 |                                  [9, 120 + 60, 1500, 18000])
178 | 
179 |     def test_total_noc_hops(self):
180 |         ''' Accessor total_noc_hops. '''
181 |         result = SchedulingResult(scheme=self.scheme,
182 |                                   ofmap_layout=self.ofmap_layout,
183 |                                   sched_seq=self.sched_seq)
184 |         self.assertEqual(result.total_noc_hops, 1368)
185 | 
186 |     def test_num_nodes(self):
187 |         ''' Accessor num_nodes. '''
188 |         result = SchedulingResult(scheme=self.scheme,
189 |                                   ofmap_layout=self.ofmap_layout,
190 |                                   sched_seq=self.sched_seq)
191 |         self.assertEqual(result.num_nodes, 4)
192 | 
193 | 


--------------------------------------------------------------------------------
/nn_dataflow/tests/unit_test/test_version.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import unittest
18 | 
19 | from nn_dataflow import version
20 | 
21 | class TestVersion(unittest.TestCase):
22 |     ''' Tests for version. '''
23 | 
24 |     def test_get_version(self):
25 |         ''' get_version. '''
26 |         ver_raw = version.get_version()
27 |         ver_lcl = version.get_version(with_local=True)
28 |         self.assertIn(ver_raw, ver_lcl)
29 | 
30 | 


--------------------------------------------------------------------------------
/nn_dataflow/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | 


--------------------------------------------------------------------------------
/nn_dataflow/tools/nn_layer_stats.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | import argparse
 18 | import sys
 19 | 
 20 | from nn_dataflow.core import ConvLayer, FCLayer
 21 | 
 22 | from nn_dataflow.nns import import_network
 23 | 
 24 | KILO = 1024.
 25 | MILLION = 1024.*1024.
 26 | 
 27 | STR_FMT_NAME_LEN = '30'
 28 | STR_FMT_NUMB_LEN = '12'
 29 | STR_FMT_NUMB_PCS = '2'
 30 | 
 31 | STR_FMT_NAME = '{:' + STR_FMT_NAME_LEN + 's}'
 32 | STR_FMT_NUMB_HDER = '{:>' + STR_FMT_NUMB_LEN + '}'
 33 | STR_FMT_NUMB = '{:' + STR_FMT_NUMB_LEN + '.' + STR_FMT_NUMB_PCS + 'f}'
 34 | 
 35 | def layer_stats(args):
 36 |     ''' Print stats of layers in the network. '''
 37 | 
 38 |     network = import_network(args.net)
 39 |     word_bytes = (args.word + 7) // 8
 40 |     batch = args.batch
 41 | 
 42 |     hder_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB_HDER] * 5) + '\n'
 43 |     line_fmt = ','.join([STR_FMT_NAME] + [STR_FMT_NUMB] * 5) + '\n'
 44 |     line_sep = '-' * int(STR_FMT_NAME_LEN) + '\n'
 45 | 
 46 |     # Header.
 47 |     sys.stdout.write(hder_fmt
 48 |                      .format('Layer',
 49 |                              'Ifmap/kB', 'Ofmap/kB', 'Weight/kB',
 50 |                              'MACs/M', 'MinOptBuf/kB'))
 51 | 
 52 |     # Aggregate stats.
 53 |     max_fmaps = 0
 54 |     max_filters = 0
 55 |     max_ops = 0
 56 |     sum_fmaps = 0
 57 |     sum_filters = 0
 58 |     sum_ops = 0
 59 |     convs = 0
 60 |     fcs = 0
 61 | 
 62 |     for name in network:
 63 | 
 64 |         layer = network[name]
 65 | 
 66 |         if isinstance(layer, FCLayer):
 67 |             fcs += 1
 68 |         elif isinstance(layer, ConvLayer):
 69 |             convs += 1
 70 | 
 71 |         ifmap_size = layer.total_ifmap_size(batch, word_bytes) / KILO
 72 |         ofmap_size = layer.total_ofmap_size(batch, word_bytes) / KILO
 73 |         try:
 74 |             filter_size = layer.total_filter_size(word_bytes) / KILO
 75 |         except AttributeError:
 76 |             filter_size = 0
 77 | 
 78 |         ops = layer.total_ops(batch) / MILLION
 79 | 
 80 |         # The minimum optimal buffer size is the sum of the full size (two
 81 |         # dimensions) for one data category, the size of one dimension for the
 82 |         # second, and the size of one point for the third.
 83 |         min_opt_buf_size = min(
 84 |             filter_size + (ifmap_size + ofmap_size / layer.nofm) / batch,
 85 |             filter_size + (ifmap_size / layer.nifm + ofmap_size) / batch,
 86 |             ifmap_size + (ofmap_size + filter_size / layer.nifm) / layer.nofm,
 87 |             ifmap_size + (ofmap_size / batch + filter_size) / layer.nofm,
 88 |             ofmap_size + (ifmap_size + filter_size / layer.nofm) / layer.nifm,
 89 |             ofmap_size + (ifmap_size / batch + filter_size) / layer.nifm)
 90 | 
 91 |         sys.stdout.write(line_fmt
 92 |                          .format(name,
 93 |                                  ifmap_size, ofmap_size, filter_size,
 94 |                                  ops, min_opt_buf_size))
 95 | 
 96 |         max_fmaps = max(max_fmaps, ofmap_size)
 97 |         max_filters = max(max_filters, filter_size)
 98 |         max_ops = max(max_ops, ops)
 99 |         sum_fmaps += ofmap_size
100 |         sum_filters += filter_size
101 |         sum_ops += ops
102 | 
103 |     sys.stdout.write(line_sep)
104 | 
105 |     sys.stdout.write(line_fmt
106 |                      .format('MAX',
107 |                              float('nan'), max_fmaps, max_filters,
108 |                              max_ops, float('nan')))
109 |     sys.stdout.write(line_fmt
110 |                      .format('SUM',
111 |                              float('nan'), sum_fmaps, sum_filters,
112 |                              sum_ops, float('nan')))
113 | 
114 |     sys.stdout.write(line_sep)
115 | 
116 |     sys.stdout.write('# CONV layers = {}, # FC layers = {}\n'
117 |                      .format(convs, fcs))
118 | 
119 | 
120 | def argparser():
121 |     ''' Argument parser. '''
122 | 
123 |     ap = argparse.ArgumentParser()
124 | 
125 |     ap.add_argument('net',
126 |                     help='network name, should be a .py file under examples')
127 | 
128 |     ap.add_argument('-b', '--batch', type=int, default=1,
129 |                     help='batch size')
130 |     ap.add_argument('-w', '--word', type=int, default=16,
131 |                     help='word size in bits')
132 | 
133 |     return ap
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     layer_stats(argparser().parse_args())
138 | 
139 | 


--------------------------------------------------------------------------------
/nn_dataflow/util.py:
--------------------------------------------------------------------------------
  1 | """ $lic$
  2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
  3 | Stanford University
  4 | 
  5 | This program is free software: you can redistribute it and/or modify it under
  6 | the terms of the Modified BSD-3 License as published by the Open Source
  7 | Initiative.
  8 | 
  9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
 10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
 12 | 
 13 | You should have received a copy of the Modified BSD-3 License along with this
 14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
 15 | """
 16 | 
 17 | from functools import reduce
 18 | import math
 19 | from operator import mul
 20 | 
 21 | '''
 22 | Utilities.
 23 | '''
 24 | 
 25 | class ContentHashClass():
 26 |     '''
 27 |     Class using the content instead of the object ID for hash.
 28 | 
 29 |     Such class instance can be used as key in dictionary.
 30 |     '''
 31 |     # pylint: disable=too-few-public-methods
 32 | 
 33 |     def __eq__(self, other):
 34 |         if isinstance(other, self.__class__):
 35 |             return self.__dict__ == other.__dict__
 36 |         return NotImplemented
 37 | 
 38 |     def __ne__(self, other):
 39 |         r = self.__eq__(other)
 40 |         if r is NotImplemented:
 41 |             # "not" NotImplemented will be True.
 42 |             return r
 43 |         return not r
 44 | 
 45 |     def __hash__(self):
 46 |         return hash(frozenset(self.__dict__.items()))
 47 | 
 48 | 
 49 | class HashableDict(dict):
 50 |     ''' Hashable dict. '''
 51 |     def __eq__(self, other):
 52 |         if isinstance(other, self.__class__):
 53 |             return (frozenset(self), frozenset(self.values())) \
 54 |                     == (frozenset(other), frozenset(other.values()))
 55 |         return NotImplemented
 56 | 
 57 |     def __ne__(self, other):
 58 |         r = self.__eq__(other)
 59 |         if r is NotImplemented:
 60 |             # "not" NotImplemented will be True.
 61 |             return r
 62 |         return not r
 63 | 
 64 |     def __hash__(self):
 65 |         return hash((frozenset(self), frozenset(self.values())))
 66 | 
 67 |     def copy(self):
 68 |         return self.__class__.fromdict(self)
 69 | 
 70 |     def __setitem__(self, key, val):
 71 |         raise KeyError('Cannot insert items to HashableDict.')
 72 | 
 73 |     def __delitem__(self, key):
 74 |         raise KeyError('Cannot delete items from HashableDict.')
 75 | 
 76 |     def setdefault(self, key, default=None):
 77 |         del key, default
 78 |         raise KeyError('Cannot insert items to HashableDict.')
 79 | 
 80 |     def update(self, other):
 81 |         del other
 82 |         raise KeyError('Cannot insert items to HashableDict.')
 83 | 
 84 |     def pop(self, key, default=None):
 85 |         del key, default
 86 |         raise KeyError('Cannot delete items from HashableDict.')
 87 | 
 88 |     def popitem(self):
 89 |         raise KeyError('Cannot delete items from HashableDict.')
 90 | 
 91 |     def clear(self):
 92 |         raise KeyError('Cannot delete items from HashableDict.')
 93 | 
 94 |     @classmethod
 95 |     def fromdict(cls, other, keyfunc=None, valfunc=None):
 96 |         '''
 97 |         Construct a HashableDict from a normal dict instance.
 98 | 
 99 |         The keys and values can be modified during the translation.
100 |         '''
101 |         if not isinstance(other, dict):
102 |             raise TypeError('HashableDict: fromdict expects a dict argument.')
103 | 
104 |         keyfunc = keyfunc if keyfunc else lambda x: x
105 |         valfunc = valfunc if valfunc else lambda x: x
106 | 
107 |         return cls((keyfunc(k), valfunc(v)) for k, v in other.items())
108 | 
109 | 
110 | def idivc(valx, valy):
111 |     '''
112 |     Integer division and ceiling.
113 | 
114 |     Return the min integer that is no less than `valx / valy`.
115 |     '''
116 |     if math.isinf(valy):
117 |         if math.isinf(valx):
118 |             return float('nan')
119 |         return 0
120 |     return (valx + valy - 1) // valy
121 | 
122 | 
123 | def prod(lst):
124 |     ''' Get the product of a list. '''
125 |     return reduce(mul, lst, 1)
126 | 
127 | 
128 | def approx_dividable(total, num, rel_overhead=0.1, abs_overhead=1):
129 |     ''' Whether it is reasonable to divide `total` into `num` parts.
130 |     `rel_overhead` is the allowed max padding overhead measured
131 |     relatively; `abs_overhead` is the allowed max padding
132 |     overhead measured by absolute value.'''
133 |     return total >= num and isclose(
134 |         idivc(total, num) * num, total,
135 |         rel_tol=rel_overhead, abs_tol=abs_overhead)
136 | 
137 | 
138 | def factorize(value, num, limits=None):
139 |     '''
140 |     Factorize given `value` into `num` numbers. Return a tuple of length
141 |     `num`.
142 | 
143 |     Iterate over factor combinations of which the product is `value`.
144 | 
145 |     `limits` is a (num-1)-length tuple, specifying the upper limits for the
146 |     first num-1 factors.
147 |     '''
148 |     if limits is None:
149 |         limits = [float('inf')] * (num - 1)
150 |     assert len(limits) >= num - 1
151 |     limits = list(limits[:num-1]) + [float('inf')]
152 | 
153 |     factors = [1] * num
154 |     while True:
155 |         # Calculate the last factor.
156 |         factors[-1] = idivc(value, prod(factors[:-1]))
157 |         if prod(factors) == value \
158 |                 and all(f <= l for f, l in zip(factors, limits)):
159 |             yield tuple(factors)
160 | 
161 |         # Update the first n - 1 factor combination, backwards.
162 |         lvl = num - 1
163 |         while lvl >= 0:
164 |             factors[lvl] += 1
165 |             if prod(factors[:lvl+1]) <= value:
166 |                 break
167 |             factors[lvl] = 1
168 |             lvl -= 1
169 |         if lvl < 0:
170 |             return
171 | 
172 | 
173 | def closest_factor(value, factor):
174 |     '''
175 |     Return the maximum factor of `value` that is no larger than `factor` (if
176 |     any), and the minimum factor of `value` that is no less than `factor` (if
177 |     any), as a tuple.
178 |     '''
179 |     if not isinstance(value, int):
180 |         raise TypeError('value must be integers.')
181 | 
182 |     if value < 0 or factor < 0:
183 |         raise ValueError('arguments must not be negative.')
184 | 
185 |     res = tuple()
186 | 
187 |     # Maximum no-larger factor.
188 |     if factor >= 1:
189 |         f = int(factor) + 1
190 |         while f > factor:
191 |             f -= 1
192 |         while True:
193 |             if f != 0 and value % f == 0:
194 |                 break
195 |             f -= 1
196 |         assert f <= factor and value % f == 0
197 |         res += (f,)
198 | 
199 |     # Minimum no-smaller factor.
200 |     if factor <= abs(value):
201 |         f = int(factor) - 1
202 |         while f < factor:
203 |             f += 1
204 |         while True:
205 |             if f != 0 and value % f == 0:
206 |                 break
207 |             f += 1
208 |         assert f >= factor and value % f == 0
209 |         res += (f,)
210 | 
211 |     return res
212 | 
213 | 
214 | def get_ith_range(rng, idx, num):
215 |     '''
216 |     Divide the full range `rng` into `num` parts, and get the `idx`-th range.
217 |     '''
218 |     length = rng[1] - rng[0]
219 |     beg = rng[0] + idx * length // num
220 |     end = rng[0] + (idx + 1) * length // num
221 |     assert end <= rng[1]
222 |     return beg, end
223 | 
224 | 
225 | def gcd(*values):
226 |     '''
227 |     Get the greatest common divisor of the given values.
228 |     '''
229 |     if any(not isinstance(v, int) for v in values):
230 |         raise TypeError('value must be integers.')
231 |     if any(v <= 0 for v in values):
232 |         raise ValueError('arguments must be positive.')
233 | 
234 |     if not values:
235 |         raise ValueError('must give at least 1 value.')
236 |     if len(values) == 1:
237 |         return values[0]
238 |     if len(values) > 2:
239 |         return reduce(gcd, values)
240 | 
241 |     a, b = values
242 |     while b:
243 |         a, b = b, a % b
244 |     return a
245 | 
246 | 
247 | def lcm(*values):
248 |     '''
249 |     Get the least common multiple of the given values.
250 |     '''
251 |     if any(not isinstance(v, int) for v in values):
252 |         raise TypeError('value must be integers.')
253 |     if any(v <= 0 for v in values):
254 |         raise ValueError('arguments must be positive.')
255 | 
256 |     if not values:
257 |         raise ValueError('must give at least 1 value.')
258 |     if len(values) == 1:
259 |         return values[0]
260 |     if len(values) > 2:
261 |         return reduce(lcm, values)
262 | 
263 |     a, b = values
264 |     return a * b // gcd(a, b)
265 | 
266 | 
267 | def isclose(vala, valb, rel_tol=1e-9, abs_tol=0.0):
268 |     '''
269 |     Whether two values are close to each other.
270 | 
271 |     Identical to math.isclose() in Python 3.5.
272 |     '''
273 |     return abs(vala - valb) <= max(rel_tol * max(abs(vala), abs(valb)), abs_tol)
274 | 
275 | 
276 | def assert_float_eq_int(vfloat, vint, message=''):
277 |     '''
278 |     Check the given float value is equal to the given int value. Print the
279 |     optional message if not equal.
280 |     '''
281 |     if abs(vfloat - vint) > 1:
282 |         raise AssertionError(message + ' {} != {}'.format(vfloat, vint))
283 | 
284 | def apply(func, argv):
285 |     '''
286 |     Similar to python2 built-in apply function.
287 |     '''
288 |     return func(*argv)
289 | 
290 | 


--------------------------------------------------------------------------------
/nn_dataflow/version.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import hashlib
18 | import os
19 | import string
20 | import subprocess
21 | 
22 | from . import __version__
23 | 
24 | def _command_output(args, cwd):
25 |     return subprocess.check_output(args, cwd=cwd).strip()
26 | 
27 | def get_version(with_local=False):
28 |     ''' Get the version number, optionally with the local version number. '''
29 | 
30 |     version = __version__
31 | 
32 |     if with_local:
33 |         cwd = os.path.dirname(os.path.abspath(__file__))
34 | 
35 |         with open(os.devnull, 'w') as devnull:
36 |             result = subprocess.call(['git', 'rev-parse'], cwd=cwd,
37 |                                      stderr=subprocess.STDOUT,
38 |                                      stdout=devnull)
39 |         if result != 0:
40 |             # Not in git repo.
41 |             return version  # pragma: no cover
42 | 
43 |         # Dirty summary.
44 |         short_stat = _command_output(['git', 'diff', 'HEAD', '--shortstat'],
45 |                                      cwd).decode() \
46 |                 .replace('files changed', 'fc').replace('file changed', 'fc') \
47 |                 .replace('insertions(+)', 'a').replace(' insertion(+)', 'a') \
48 |                 .replace('deletions(-)', 'd').replace(' deletion(-)', 'd') \
49 |                 .replace(',', '').replace(' ', '')
50 |         diff_hash = hashlib.md5(_command_output(['git', 'diff', 'HEAD'], cwd)) \
51 |                 .hexdigest()[:8]
52 |         dirty = '' if not short_stat else '-' + short_stat + '-' + diff_hash
53 | 
54 |         # Git describe.
55 |         desc = _command_output(['git', 'describe', '--tags', '--always',
56 |                                 '--dirty={}'.format(dirty)],
57 |                                cwd).decode()
58 |         version += '+' + desc
59 | 
60 |     assert not any(w in version for w in string.whitespace)
61 |     return version
62 | 
63 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | argparse
2 | coverage==5.0
3 | fastcache==1.1.0
4 | pytest==5.3.2
5 | pytest-cov==2.8.1
6 | pytest-xdist==1.30.0
7 | sympy==1.4
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """ $lic$
 2 | Copyright (C) 2016-2020 by Tsinghua University and The Board of Trustees of
 3 | Stanford University
 4 | 
 5 | This program is free software: you can redistribute it and/or modify it under
 6 | the terms of the Modified BSD-3 License as published by the Open Source
 7 | Initiative.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT ANY
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
11 | PARTICULAR PURPOSE. See the BSD-3 License for more details.
12 | 
13 | You should have received a copy of the Modified BSD-3 License along with this
14 | program. If not, see <https://opensource.org/licenses/BSD-3-Clause>.
15 | """
16 | 
17 | import os
18 | import re
19 | import setuptools
20 | 
21 | PACKAGE = 'nn_dataflow'
22 | DESC = 'Explore the energy-efficient dataflow scheduling for neural networks.'
23 | 
24 | def _get_version():
25 |     here = os.path.abspath(os.path.dirname(__file__))
26 |     with open(os.path.join(here, PACKAGE, '__init__.py'), 'r') as fh:
27 |         matches = re.findall(r'^\s*__version__\s*=\s*[\'"]([^\'"]+)[\'"]',
28 |                              fh.read(), re.M)
29 |         if matches:
30 |             return matches[-1]
31 |     return '0.0.0'
32 | 
33 | def _readme():
34 |     here = os.path.abspath(os.path.dirname(__file__))
35 |     with open(os.path.join(here, 'README.rst'), 'r') as fh:
36 |         return fh.read()
37 | 
38 | setuptools.setup(
39 |     name=PACKAGE,
40 |     version=_get_version(),
41 |     description=DESC,
42 | 
43 |     author='Mingyu Gao',
44 |     author_email='mgao12@stanford.edu',
45 |     long_description=_readme(),
46 |     url='https://github.com/stanford-mast/nn_dataflow',
47 |     license='BSD 3-clause',
48 | 
49 |     packages=setuptools.find_packages(),
50 | 
51 |     install_requires=[
52 |         'argparse',
53 |         'coverage>=4',
54 |         'fastcache>=1',
55 |         'pytest>=3',
56 |         'pytest-cov>=2',
57 |         'pytest-xdist>=1',
58 |         'sympy>=1',
59 |     ],
60 | 
61 |     entry_points={
62 |         'console_scripts': [
63 |             'nn_dataflow_search=nn_dataflow.tools.nn_dataflow_search:main',
64 |         ]
65 |     },
66 | 
67 |     keywords='neural-network scheduling dataflow optimizer',
68 |     classifiers=[
69 |         'Development Status :: 3 - Alpha',
70 |         'Intended Audience :: Developers',
71 |         'Intended Audience :: Science/Research',
72 |         'License :: OSI Approved :: BSD License',
73 |         'Programming Language :: Python :: 3.6',
74 |         'Programming Language :: Python :: 3.7',
75 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
76 |         'Topic :: System :: Hardware',
77 |     ],
78 | )
79 | 
80 | 


--------------------------------------------------------------------------------