├── JSON_files
    ├── JSON.md
    ├── NVDLA_lenet.json
    └── NVDLA_lenet_readable.json
├── JSON_format.md
├── README.md
├── TVM-notes.md
├── compilerOverview.png
└── files
    ├── detailed_design.jpg
    ├── project_presentation.pdf
    └── project_report.pdf


/JSON_files/JSON.md:
--------------------------------------------------------------------------------
1 | Contains various JSON files. 
2 | 


--------------------------------------------------------------------------------
/JSON_format.md:
--------------------------------------------------------------------------------
  1 | # Relay -> NVDLA JSON Format
  2 | 
  3 | NVDLA JSON format consists of two parts: "input" and "op". Here, "op" refers to the operators such as Conv2D, MaxPool, ReLu etc. to be offloaded on NVDLA architecture. 
  4 | JSON consists of network inputs followed by layer-wise operations. One can look at core tensor operator primitives in available in Relay [here](https://tvm.apache.org/docs/langref/relay_op.html).   
  5 | 
  6 | ## JSON Format
  7 | ```yaml
  8 | {
  9 |   "input": {
 10 |     "dtype": [
 11 |       [
 12 |         "float32"
 13 |       ]
 14 |     ],
 15 |     "shape": [
 16 |       [
 17 |         [
 18 |           1,
 19 |           14,
 20 |           14,
 21 |           512
 22 |         ]
 23 |       ]
 24 |     ]
 25 |   },
 26 |   "op": {attrs}
 27 | }  
 28 | ```
 29 | 
 30 | ### Syntax for different operator primitives:
 31 | 
 32 | ```yaml
 33 | "dense": {
 34 |   "dtype": [
 35 |     [
 36 |       "float32"
 37 |     ]
 38 |   ],
 39 |   "num_inputs": "2",
 40 |   "num_outputs": "1",
 41 |   "out_dtype": [
 42 |     [
 43 |       ""
 44 |     ]
 45 |   ],
 46 |   "shape": [
 47 |     [
 48 |       [
 49 |         1,
 50 |         128
 51 |       ]
 52 |     ]
 53 |   ],
 54 |   "units": [
 55 |     [
 56 |       "128"
 57 |     ]
 58 |   ]
 59 | }
 60 | ```
 61 | 
 62 | ```yaml
 63 | "relu": {
 64 |   "dtype": [
 65 |     [
 66 |       "float32"
 67 |     ]
 68 |   ],
 69 |   "num_inputs": "1",
 70 |   "num_outputs": "1",
 71 |   "shape": [
 72 |     [
 73 |       [
 74 |         1,
 75 |         128
 76 |       ]
 77 |     ]
 78 |   ]
 79 | }
 80 | ```
 81 | 
 82 | ```yaml
 83 | "softmax": {
 84 |   "axis": [
 85 |     [
 86 |       "-1"
 87 |     ]
 88 |   ],
 89 |   "dtype": [
 90 |     [
 91 |       "float32"
 92 |     ]
 93 |   ],
 94 |   "num_inputs": "1",
 95 |   "num_outputs": "1",
 96 |   "shape": [
 97 |     [
 98 |       [
 99 |         1,
100 |         10
101 |       ]
102 |     ]
103 |   ]
104 | }
105 | 
106 | ```
107 | 
108 | ```yaml
109 | "bias_add": {
110 |   "axis": [
111 |     [
112 |       "-1"
113 |     ]
114 |   ],
115 |   "dtype": [
116 |     [
117 |       "float32"
118 |     ]
119 |   ],
120 |   "num_inputs": "2",
121 |   "num_outputs": "1",
122 |   "shape": [
123 |     [
124 |       [
125 |         1,
126 |         128
127 |       ]
128 |     ]
129 |   ]
130 | }
131 | ```
132 | 
133 | ```yaml
134 | "batch_flatten": {
135 |   "dtype": [
136 |     [
137 |       "float32"
138 |     ]
139 |   ],
140 |   "num_inputs": "1",
141 |   "num_outputs": "1",
142 |   "shape": [
143 |     [
144 |       [
145 |         1,
146 |         784
147 |       ]
148 |     ]
149 |   ]
150 | }
151 | ```
152 | 
153 | 
154 | 
155 | 
156 | ## Example 1
157 | 
158 | ### Relay IR:
159 | ```yaml
160 | def @main(%data: Tensor[(1, 14, 14, 512), float32]) -> Tensor[(1, 7, 7, 512), float32] {
161 |   nn.max_pool2d(%data, pool_size=[2, 2], strides=[2, 2], padding=[0, 0, 0, 0], 
162 |   layout="NHWC") /* ty=Tensor[(1, 7, 7, 512), float32] */
163 | }
164 | 
165 | ```
166 | 
167 | ### JSON:
168 | ```yaml
169 | {
170 |   "input": {
171 |     "dtype": [
172 |       [
173 |         "float32"
174 |       ]
175 |     ],
176 |     "shape": [
177 |       [
178 |         [
179 |           1,
180 |           14,
181 |           14,
182 |           512
183 |         ]
184 |       ]
185 |     ]
186 |   },
187 |   "max_pool2d_0": {
188 |     "ceil_mode": [
189 |       [
190 |         "0"
191 |       ]
192 |     ],
193 |     "dtype": [
194 |       [
195 |         "float32"
196 |       ]
197 |     ],
198 |     "layout": [
199 |       [
200 |         "NHWC"
201 |       ]
202 |     ],
203 |     "num_inputs": "1",
204 |     "num_outputs": "1",
205 |     "padding": [
206 |       [
207 |         "0",
208 |         "0",
209 |         "0",
210 |         "0"
211 |       ]
212 |     ],
213 |     "pool_size": [
214 |       [
215 |         "2",
216 |         "2"
217 |       ]
218 |     ],
219 |     "shape": [
220 |       [
221 |         [
222 |           1,
223 |           7,
224 |           7,
225 |           512
226 |         ]
227 |       ]
228 |     ],
229 |     "strides": [
230 |       [
231 |         "2",
232 |         "2"
233 |       ]
234 |     ]
235 |   }
236 | }
237 | ```
238 | 
239 | ## Example 2
240 | 
241 | ### Relay IR:
242 | 
243 | ```yaml
244 | def @main(%data: Tensor[(1, 1, 28, 28), float32], %fc1_weight: Tensor[(128, 784), float32], %fc1_bias: Tensor[(128), float32], %fc2_weight: Tensor[(64, 128), float32], %fc2_bias: Tensor[(64), float32], %fc3_weight: Tensor[(10, 64), float32], %fc3_bias: Tensor[(10), float32]) -> Tensor[(1, 10), float32] {
245 |   %0 = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), float32] */;
246 |   %1 = nn.dense(%0, %fc1_weight, units=128) /* ty=Tensor[(1, 128), float32] */;
247 |   %2 = nn.bias_add(%1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), float32] */;
248 |   %3 = nn.relu(%2) /* ty=Tensor[(1, 128), float32] */;
249 |   %4 = nn.dense(%3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), float32] */;
250 |   %5 = nn.bias_add(%4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), float32] */;
251 |   %6 = nn.relu(%5) /* ty=Tensor[(1, 64), float32] */;
252 |   %7 = nn.dense(%6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), float32] */;
253 |   %8 = nn.bias_add(%7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), float32] */;
254 |   nn.softmax(%8) /* ty=Tensor[(1, 10), float32] */
255 | }
256 | ```
257 | 
258 | ### JSON:
259 | ```yaml
260 | {'input': {'dtype': [['float32']], 'shape': [[[1, 1, 28, 28]]]}, 'batch_flatten_0': {'num_outputs': '1', 'num_inputs': '1', 'dtype': [['float32']], 'shape': [[[1, 784]]]}, 'dense_0': {'num_outputs': '1', 'num_inputs': '2', 'out_dtype': [['']], 'dtype': [['float32']], 'units': [['128']], 'shape': [[[1, 128]]]}, 'bias_add_0': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 128]]], 'dtype': [['float32']], 'num_inputs': '2'}, 'relu_0': {'num_outputs': '1', 'num_inputs': '1', 'dtype': [['float32']], 'shape': [[[1, 128]]]}, 'dense_1': {'num_outputs': '1', 'num_inputs': '2', 'out_dtype': [['']], 'dtype': [['float32']], 'units': [['64']], 'shape': [[[1, 64]]]}, 'bias_add_1': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 64]]], 'dtype': [['float32']], 'num_inputs': '2'}, 'relu_1': {'num_outputs': '1', 'num_inputs': '1', 'dtype': [['float32']], 'shape': [[[1, 64]]]}, 'dense_2': {'num_outputs': '1', 'num_inputs': '2', 'out_dtype': [['']], 'dtype': [['float32']], 'units': [['10']], 'shape': [[[1, 10]]]}, 'bias_add_2': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 10]]], 'dtype': [['float32']], 'num_inputs': '2'}, 'softmax_2': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 10]]], 'dtype': [['float32']], 'num_inputs': '1'}}
261 | ```
262 | 
263 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Integrate-NVDLA-and-TVM (no longer maintained)
 2 | 
 3 | Official code for NVDLA software [sw](https://github.com/nvdla/sw). Official document [sw doc](http://nvdla.org/sw/contents.html). 
 4 | 
 5 | ## Designed Workflow
 6 | 
 7 | ![alt text](files/detailed_design.jpg)
 8 | 
 9 | First, TVM performs frontend compilation to translate frontend languages such as Caffe to intermediate representation using the existing Relay compiler. Then, TVM’s Bring Your Own Codegen (BYOC) framework infrastructure is used to convert the Relay IR into a json file containing the neural network information. Then, the modified and rebuilt NVDLA compiler accepts the json file as input and outputs a loadable file. Finally, the loadable file and test image are fed to the NVDLA runtime for model inference. 
10 | 
11 | For more information, refer to the [complete project report](files/project_presentation.pdf) in the folder. 
12 | 
13 | ## Current Progress:
14 | 1. Generate NVDLA-specific JSON using TVM frontend compilation tool. 
15 | 2. Succesfully run LeNet network architecture (in Relay) on the NVDLA hardware simulator.
16 | 
17 | 
18 | ## How to use 
19 | \[TVM part\]
20 | 1. Install TVM from source using [modified GitHub source code](https://github.com/shivmgg/tvm). 
21 | ```
22 | git clone --recursive https://github.com/shivmgg/tvm
23 | ```
24 | 2. Follow instructions given [here](https://tvm.apache.org/docs/install/from_source.html#developers-get-source-from-github) to complete the installation. 
25 | 3. Run one of example files in `examples` folder to generate an NVDLA-specific JSON file. 
26 | 4. To generate a JSON for LeNet architecture using Relay, run
27 | ```
28 | python3.6 examples/run_LeNet_Relay.py
29 | ````
30 | 
31 | \[NVDLA part\]
32 | 1. Build NVDLA hardware simulator. Follow by [NVDLA official document](http://nvdla.org/vp.html).
33 | 2. Get and build the [modified NVDLA compiler code](https://github.com/WuDan0399/nvdla_sw/)
34 | ```
35 | cd {sw-repo-root}/umd
36 | export TOP={sw-repo-root}/umd
37 | make compiler
38 | ```
39 | The compiler is in `./out/apps/compiler/nvdla_compiler`, copy libnvdla_compiler.so to the same folder to use the compiler:
40 | `cp <path to sw>/sw/umd/out/core/src/compiler/libnvdla_compiler/libnvdla_compiler.so <path to sw>/sw/umd/out/apps/compiler/nvdla_compiler/`
41 | 
42 | ## How to Run the Whole Process for Model Inference
43 | 1. Generate a JSON for LeNet architecture using Relay, run
44 | ```
45 | python3.6 examples/run_LeNet_Relay.py
46 | ```
47 | 2. Use Json file as input and generate loadable file.
48 | ```
49 | cd <path to sw>/sw/umd/out/apps/compiler/nvdla_compiler/
50 | ./nvdla_compiler --json_file <path_to_json_file>
51 | ```
52 | 


--------------------------------------------------------------------------------
/TVM-notes.md:
--------------------------------------------------------------------------------
  1 | # Working with TVM codebase
  2 | 
  3 | ## Reading:
  4 | 1. TVM's BYOC: https://tvm.apache.org/2020/07/15/how-to-bring-your-own-codegen-to-tvm
  5 | 2. Deploy and Integration: https://tvm.apache.org/docs/deploy/index.html
  6 | 3. Contribute to TVM: https://tvm.apache.org/docs/contribute/pull_request.html
  7 | 
  8 | 
  9 | ## Using BYOC
 10 | 1. https://tvm.apache.org/docs/dev/relay_bring_your_own_codegen.html
 11 | 2. Relay Arm ® Compute Library Integration:
 12 |       1. https://tvm.apache.org/docs/deploy/arm_compute_lib.html
 13 |       2. https://discuss.tvm.apache.org/t/rfc-byoc-arm-compute-library-integration/7082
 14 |       3. Codebase: https://github.com/apache/incubator-tvm/pull/5915/files
 15 |       4. Integrating "add" operation: https://github.com/apache/incubator-tvm/pull/6532/files
 16 |       
 17 | 
 18 | ## Testing code:
 19 | 
 20 | We want to test and validate our code using unittests. In general, we can use the following code to test our implementation:
 21 | https://tvm.apache.org/docs/contribute/pull_request.html#testing
 22 | 
 23 | Dependency:
 24 | ```pip install --user pytest Cython```
 25 | 
 26 | To run all tests:
 27 | ```
 28 | # build tvm
 29 | make
 30 | # change Python version in the script accordingly
 31 | ./tests/scripts/task_python_unittest.sh
 32 | ```
 33 | To run any particular tests:
 34 | ```
 35 | # build tvm
 36 | make
 37 | # replace testfile name with your target file
 38 | # All tests reside in tests folder
 39 | TVM_FFI=ctypes python3.6 -m pytest -v tests/python/unittest/test_pass_storage_rewrite.py
 40 | ```
 41 | ## To generate JSON file using ARM Compute Library:
 42 | 
 43 | ### Rebuild TVM compiler
 44 | 
 45 | Change set(USE_ARM_COMPUTE_LIB OFF) to set(USE_ARM_COMPUTE_LIB ON) to enable ARM Compute Libraray backend in the build/config.cmake file. Use following commands to rebuild TVM stack. 
 46 | 
 47 | ```
 48 | cd build
 49 | cmake ..
 50 | make -j4
 51 | ```
 52 | 
 53 | ### Code to dump JSON file
 54 | 
 55 | ```
 56 | import tvm
 57 | from tvm import relay
 58 | from tvm.contrib import util
 59 | from tvm.relay.op.contrib import arm_compute_lib
 60 | 
 61 | from itertools import zip_longest, combinations
 62 | import json
 63 | import os
 64 | import warnings
 65 | 
 66 | import numpy as np
 67 | 
 68 | 
 69 | data_type = "float32"
 70 | data_shape = (1, 14, 14, 512)
 71 | strides = (2, 2)
 72 | padding = (0, 0, 0, 0)
 73 | pool_size = (2, 2)
 74 | layout = "NHWC"
 75 | output_shape = (1, 7, 7, 512)
 76 | 
 77 | data = relay.var('data', shape=data_shape, dtype=data_type)
 78 | out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, layout=layout, padding=padding)
 79 | module = tvm.IRModule.from_expr(out)
 80 | 
 81 | def extract_acl_modules(module):
 82 |     """Get the ACL module(s) from llvm module."""
 83 |     return list(filter(lambda mod: mod.type_key == "arm_compute_lib",
 84 |                        module.get_lib().imported_modules))
 85 | 
 86 | target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon"
 87 | enable_acl = True
 88 | params=None
 89 | tvm_ops=0
 90 | acl_partitions=1
 91 | 
 92 | with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
 93 |     if enable_acl:
 94 |         module = arm_compute_lib.partition_for_arm_compute_lib(module, params)
 95 |     lib = relay.build(module, target=target, params=params)
 96 |     acl_modules = extract_acl_modules(lib)
 97 |     for mod in acl_modules:
 98 |         source = mod.get_source("json")
 99 |         codegen = json.loads(source)["nodes"]
100 |         codegen_str = json.dumps(codegen, sort_keys=True, indent=2)
101 |         with open('./tvm/JSON_dump/max_pool2d_readable.json', 'w') as outfile:
102 |             outfile.write(json.dumps(codegen, sort_keys=True, indent=2))
103 |         with open('./tvm/JSON_dump/max_pool2d.json', 'w') as outfile:
104 |             json.dump(codegen, outfile)
105 | ```
106 | 
107 | ### Standard format of JSON in ARM's Compute Library
108 | 
109 | JSON representation has two parts: input and node. Input represents the input for the operation while node represents the attributes of the operations. 
110 | 
111 | ```
112 | Example: Pooling 
113 |    {
114 |    input = {
115 |         "op": "input",
116 |         "name": "",
117 |         "attrs": {"shape": [[list(shape)]], "dtype": [[dtype]]}},
118 |    node = {
119 |         "op": "kernel",
120 |         "name": typef,
121 |         "inputs": [[0, 0, 0]],
122 |         "attrs": {
123 |             "num_inputs": "1",
124 |             "num_outputs": "1",
125 |             "layout": [["NHWC"]],
126 |             "shape": [[list(output_shape)]],
127 |             "dtype": [[dtype]],
128 |             "padding": [[str(p) for p in padding]],
129 |             "strides": [[str(s) for s in strides]],
130 |             "pool_size": [[str(s) for s in sizes]],
131 |             "ceil_mode": [[str(1 if ceil_mode else 0)]]
132 |         },
133 |    }
134 |  ```
135 |  
136 | ## Storing network parameters
137 | We will use Numpy's np.savez command to save dict of numpy arrays. We can refer to https://github.com/rogersce/cnpy to load parameter file in NVDLA. 
138 | 
139 | ## TODO:
140 | 1. To define Annotation Rules to describe the supported operators and patterns for NVDLA. 
141 | 2. To implement NVDLA Codegen to serialize a Relay graph to a JSON representation.
142 | 3. ~~To implement an NVDLA JSON runtime to interpret and execute the serialized JSON graph.~~
143 | 4. To combine the necessary NVDLA APIs (including default compiler and runtime) with the TVM default code.
144 | 5. To test functional verification of the complete flow and target operators.
145 | 
146 | 
147 | ## Queries:
148 | 1. Can we run single operators like maxpool, conv2d in NVDLA? How does JSON representation look like for such an operator?
149 | 2. How can we integrate JSON runtime with NVDLA compiler? How does memory allocation takes place for input/output/weights?
150 | See this: https://tvm.apache.org/docs/dev/relay_bring_your_own_codegen.html#implement-a-customized-runtime
151 | 


--------------------------------------------------------------------------------
/compilerOverview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/compilerOverview.png


--------------------------------------------------------------------------------
/files/detailed_design.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/files/detailed_design.jpg


--------------------------------------------------------------------------------
/files/project_presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/files/project_presentation.pdf


--------------------------------------------------------------------------------
/files/project_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/files/project_report.pdf


--------------------------------------------------------------------------------