├── JSON_files ├── JSON.md ├── NVDLA_lenet.json └── NVDLA_lenet_readable.json ├── JSON_format.md ├── README.md ├── TVM-notes.md ├── compilerOverview.png └── files ├── detailed_design.jpg ├── project_presentation.pdf └── project_report.pdf /JSON_files/JSON.md: -------------------------------------------------------------------------------- 1 | Contains various JSON files. 2 | -------------------------------------------------------------------------------- /JSON_format.md: -------------------------------------------------------------------------------- 1 | # Relay -> NVDLA JSON Format 2 | 3 | NVDLA JSON format consists of two parts: "input" and "op". Here, "op" refers to the operators such as Conv2D, MaxPool, ReLu etc. to be offloaded on NVDLA architecture. 4 | JSON consists of network inputs followed by layer-wise operations. One can look at core tensor operator primitives in available in Relay [here](https://tvm.apache.org/docs/langref/relay_op.html). 5 | 6 | ## JSON Format 7 | ```yaml 8 | { 9 | "input": { 10 | "dtype": [ 11 | [ 12 | "float32" 13 | ] 14 | ], 15 | "shape": [ 16 | [ 17 | [ 18 | 1, 19 | 14, 20 | 14, 21 | 512 22 | ] 23 | ] 24 | ] 25 | }, 26 | "op": {attrs} 27 | } 28 | ``` 29 | 30 | ### Syntax for different operator primitives: 31 | 32 | ```yaml 33 | "dense": { 34 | "dtype": [ 35 | [ 36 | "float32" 37 | ] 38 | ], 39 | "num_inputs": "2", 40 | "num_outputs": "1", 41 | "out_dtype": [ 42 | [ 43 | "" 44 | ] 45 | ], 46 | "shape": [ 47 | [ 48 | [ 49 | 1, 50 | 128 51 | ] 52 | ] 53 | ], 54 | "units": [ 55 | [ 56 | "128" 57 | ] 58 | ] 59 | } 60 | ``` 61 | 62 | ```yaml 63 | "relu": { 64 | "dtype": [ 65 | [ 66 | "float32" 67 | ] 68 | ], 69 | "num_inputs": "1", 70 | "num_outputs": "1", 71 | "shape": [ 72 | [ 73 | [ 74 | 1, 75 | 128 76 | ] 77 | ] 78 | ] 79 | } 80 | ``` 81 | 82 | ```yaml 83 | "softmax": { 84 | "axis": [ 85 | [ 86 | "-1" 87 | ] 88 | ], 89 | "dtype": [ 90 | [ 91 | "float32" 92 | ] 93 | ], 94 | "num_inputs": "1", 95 | "num_outputs": "1", 96 | "shape": [ 97 | [ 98 | [ 99 | 1, 100 | 10 101 | ] 102 | ] 103 | ] 104 | } 105 | 106 | ``` 107 | 108 | ```yaml 109 | "bias_add": { 110 | "axis": [ 111 | [ 112 | "-1" 113 | ] 114 | ], 115 | "dtype": [ 116 | [ 117 | "float32" 118 | ] 119 | ], 120 | "num_inputs": "2", 121 | "num_outputs": "1", 122 | "shape": [ 123 | [ 124 | [ 125 | 1, 126 | 128 127 | ] 128 | ] 129 | ] 130 | } 131 | ``` 132 | 133 | ```yaml 134 | "batch_flatten": { 135 | "dtype": [ 136 | [ 137 | "float32" 138 | ] 139 | ], 140 | "num_inputs": "1", 141 | "num_outputs": "1", 142 | "shape": [ 143 | [ 144 | [ 145 | 1, 146 | 784 147 | ] 148 | ] 149 | ] 150 | } 151 | ``` 152 | 153 | 154 | 155 | 156 | ## Example 1 157 | 158 | ### Relay IR: 159 | ```yaml 160 | def @main(%data: Tensor[(1, 14, 14, 512), float32]) -> Tensor[(1, 7, 7, 512), float32] { 161 | nn.max_pool2d(%data, pool_size=[2, 2], strides=[2, 2], padding=[0, 0, 0, 0], 162 | layout="NHWC") /* ty=Tensor[(1, 7, 7, 512), float32] */ 163 | } 164 | 165 | ``` 166 | 167 | ### JSON: 168 | ```yaml 169 | { 170 | "input": { 171 | "dtype": [ 172 | [ 173 | "float32" 174 | ] 175 | ], 176 | "shape": [ 177 | [ 178 | [ 179 | 1, 180 | 14, 181 | 14, 182 | 512 183 | ] 184 | ] 185 | ] 186 | }, 187 | "max_pool2d_0": { 188 | "ceil_mode": [ 189 | [ 190 | "0" 191 | ] 192 | ], 193 | "dtype": [ 194 | [ 195 | "float32" 196 | ] 197 | ], 198 | "layout": [ 199 | [ 200 | "NHWC" 201 | ] 202 | ], 203 | "num_inputs": "1", 204 | "num_outputs": "1", 205 | "padding": [ 206 | [ 207 | "0", 208 | "0", 209 | "0", 210 | "0" 211 | ] 212 | ], 213 | "pool_size": [ 214 | [ 215 | "2", 216 | "2" 217 | ] 218 | ], 219 | "shape": [ 220 | [ 221 | [ 222 | 1, 223 | 7, 224 | 7, 225 | 512 226 | ] 227 | ] 228 | ], 229 | "strides": [ 230 | [ 231 | "2", 232 | "2" 233 | ] 234 | ] 235 | } 236 | } 237 | ``` 238 | 239 | ## Example 2 240 | 241 | ### Relay IR: 242 | 243 | ```yaml 244 | def @main(%data: Tensor[(1, 1, 28, 28), float32], %fc1_weight: Tensor[(128, 784), float32], %fc1_bias: Tensor[(128), float32], %fc2_weight: Tensor[(64, 128), float32], %fc2_bias: Tensor[(64), float32], %fc3_weight: Tensor[(10, 64), float32], %fc3_bias: Tensor[(10), float32]) -> Tensor[(1, 10), float32] { 245 | %0 = nn.batch_flatten(%data) /* ty=Tensor[(1, 784), float32] */; 246 | %1 = nn.dense(%0, %fc1_weight, units=128) /* ty=Tensor[(1, 128), float32] */; 247 | %2 = nn.bias_add(%1, %fc1_bias, axis=-1) /* ty=Tensor[(1, 128), float32] */; 248 | %3 = nn.relu(%2) /* ty=Tensor[(1, 128), float32] */; 249 | %4 = nn.dense(%3, %fc2_weight, units=64) /* ty=Tensor[(1, 64), float32] */; 250 | %5 = nn.bias_add(%4, %fc2_bias, axis=-1) /* ty=Tensor[(1, 64), float32] */; 251 | %6 = nn.relu(%5) /* ty=Tensor[(1, 64), float32] */; 252 | %7 = nn.dense(%6, %fc3_weight, units=10) /* ty=Tensor[(1, 10), float32] */; 253 | %8 = nn.bias_add(%7, %fc3_bias, axis=-1) /* ty=Tensor[(1, 10), float32] */; 254 | nn.softmax(%8) /* ty=Tensor[(1, 10), float32] */ 255 | } 256 | ``` 257 | 258 | ### JSON: 259 | ```yaml 260 | {'input': {'dtype': [['float32']], 'shape': [[[1, 1, 28, 28]]]}, 'batch_flatten_0': {'num_outputs': '1', 'num_inputs': '1', 'dtype': [['float32']], 'shape': [[[1, 784]]]}, 'dense_0': {'num_outputs': '1', 'num_inputs': '2', 'out_dtype': [['']], 'dtype': [['float32']], 'units': [['128']], 'shape': [[[1, 128]]]}, 'bias_add_0': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 128]]], 'dtype': [['float32']], 'num_inputs': '2'}, 'relu_0': {'num_outputs': '1', 'num_inputs': '1', 'dtype': [['float32']], 'shape': [[[1, 128]]]}, 'dense_1': {'num_outputs': '1', 'num_inputs': '2', 'out_dtype': [['']], 'dtype': [['float32']], 'units': [['64']], 'shape': [[[1, 64]]]}, 'bias_add_1': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 64]]], 'dtype': [['float32']], 'num_inputs': '2'}, 'relu_1': {'num_outputs': '1', 'num_inputs': '1', 'dtype': [['float32']], 'shape': [[[1, 64]]]}, 'dense_2': {'num_outputs': '1', 'num_inputs': '2', 'out_dtype': [['']], 'dtype': [['float32']], 'units': [['10']], 'shape': [[[1, 10]]]}, 'bias_add_2': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 10]]], 'dtype': [['float32']], 'num_inputs': '2'}, 'softmax_2': {'num_outputs': '1', 'axis': [['-1']], 'shape': [[[1, 10]]], 'dtype': [['float32']], 'num_inputs': '1'}} 261 | ``` 262 | 263 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Integrate-NVDLA-and-TVM (no longer maintained) 2 | 3 | Official code for NVDLA software [sw](https://github.com/nvdla/sw). Official document [sw doc](http://nvdla.org/sw/contents.html). 4 | 5 | ## Designed Workflow 6 | 7 | ![alt text](files/detailed_design.jpg) 8 | 9 | First, TVM performs frontend compilation to translate frontend languages such as Caffe to intermediate representation using the existing Relay compiler. Then, TVM’s Bring Your Own Codegen (BYOC) framework infrastructure is used to convert the Relay IR into a json file containing the neural network information. Then, the modified and rebuilt NVDLA compiler accepts the json file as input and outputs a loadable file. Finally, the loadable file and test image are fed to the NVDLA runtime for model inference. 10 | 11 | For more information, refer to the [complete project report](files/project_presentation.pdf) in the folder. 12 | 13 | ## Current Progress: 14 | 1. Generate NVDLA-specific JSON using TVM frontend compilation tool. 15 | 2. Succesfully run LeNet network architecture (in Relay) on the NVDLA hardware simulator. 16 | 17 | 18 | ## How to use 19 | \[TVM part\] 20 | 1. Install TVM from source using [modified GitHub source code](https://github.com/shivmgg/tvm). 21 | ``` 22 | git clone --recursive https://github.com/shivmgg/tvm 23 | ``` 24 | 2. Follow instructions given [here](https://tvm.apache.org/docs/install/from_source.html#developers-get-source-from-github) to complete the installation. 25 | 3. Run one of example files in `examples` folder to generate an NVDLA-specific JSON file. 26 | 4. To generate a JSON for LeNet architecture using Relay, run 27 | ``` 28 | python3.6 examples/run_LeNet_Relay.py 29 | ```` 30 | 31 | \[NVDLA part\] 32 | 1. Build NVDLA hardware simulator. Follow by [NVDLA official document](http://nvdla.org/vp.html). 33 | 2. Get and build the [modified NVDLA compiler code](https://github.com/WuDan0399/nvdla_sw/) 34 | ``` 35 | cd {sw-repo-root}/umd 36 | export TOP={sw-repo-root}/umd 37 | make compiler 38 | ``` 39 | The compiler is in `./out/apps/compiler/nvdla_compiler`, copy libnvdla_compiler.so to the same folder to use the compiler: 40 | `cp /sw/umd/out/core/src/compiler/libnvdla_compiler/libnvdla_compiler.so /sw/umd/out/apps/compiler/nvdla_compiler/` 41 | 42 | ## How to Run the Whole Process for Model Inference 43 | 1. Generate a JSON for LeNet architecture using Relay, run 44 | ``` 45 | python3.6 examples/run_LeNet_Relay.py 46 | ``` 47 | 2. Use Json file as input and generate loadable file. 48 | ``` 49 | cd /sw/umd/out/apps/compiler/nvdla_compiler/ 50 | ./nvdla_compiler --json_file 51 | ``` 52 | -------------------------------------------------------------------------------- /TVM-notes.md: -------------------------------------------------------------------------------- 1 | # Working with TVM codebase 2 | 3 | ## Reading: 4 | 1. TVM's BYOC: https://tvm.apache.org/2020/07/15/how-to-bring-your-own-codegen-to-tvm 5 | 2. Deploy and Integration: https://tvm.apache.org/docs/deploy/index.html 6 | 3. Contribute to TVM: https://tvm.apache.org/docs/contribute/pull_request.html 7 | 8 | 9 | ## Using BYOC 10 | 1. https://tvm.apache.org/docs/dev/relay_bring_your_own_codegen.html 11 | 2. Relay Arm ® Compute Library Integration: 12 | 1. https://tvm.apache.org/docs/deploy/arm_compute_lib.html 13 | 2. https://discuss.tvm.apache.org/t/rfc-byoc-arm-compute-library-integration/7082 14 | 3. Codebase: https://github.com/apache/incubator-tvm/pull/5915/files 15 | 4. Integrating "add" operation: https://github.com/apache/incubator-tvm/pull/6532/files 16 | 17 | 18 | ## Testing code: 19 | 20 | We want to test and validate our code using unittests. In general, we can use the following code to test our implementation: 21 | https://tvm.apache.org/docs/contribute/pull_request.html#testing 22 | 23 | Dependency: 24 | ```pip install --user pytest Cython``` 25 | 26 | To run all tests: 27 | ``` 28 | # build tvm 29 | make 30 | # change Python version in the script accordingly 31 | ./tests/scripts/task_python_unittest.sh 32 | ``` 33 | To run any particular tests: 34 | ``` 35 | # build tvm 36 | make 37 | # replace testfile name with your target file 38 | # All tests reside in tests folder 39 | TVM_FFI=ctypes python3.6 -m pytest -v tests/python/unittest/test_pass_storage_rewrite.py 40 | ``` 41 | ## To generate JSON file using ARM Compute Library: 42 | 43 | ### Rebuild TVM compiler 44 | 45 | Change set(USE_ARM_COMPUTE_LIB OFF) to set(USE_ARM_COMPUTE_LIB ON) to enable ARM Compute Libraray backend in the build/config.cmake file. Use following commands to rebuild TVM stack. 46 | 47 | ``` 48 | cd build 49 | cmake .. 50 | make -j4 51 | ``` 52 | 53 | ### Code to dump JSON file 54 | 55 | ``` 56 | import tvm 57 | from tvm import relay 58 | from tvm.contrib import util 59 | from tvm.relay.op.contrib import arm_compute_lib 60 | 61 | from itertools import zip_longest, combinations 62 | import json 63 | import os 64 | import warnings 65 | 66 | import numpy as np 67 | 68 | 69 | data_type = "float32" 70 | data_shape = (1, 14, 14, 512) 71 | strides = (2, 2) 72 | padding = (0, 0, 0, 0) 73 | pool_size = (2, 2) 74 | layout = "NHWC" 75 | output_shape = (1, 7, 7, 512) 76 | 77 | data = relay.var('data', shape=data_shape, dtype=data_type) 78 | out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, layout=layout, padding=padding) 79 | module = tvm.IRModule.from_expr(out) 80 | 81 | def extract_acl_modules(module): 82 | """Get the ACL module(s) from llvm module.""" 83 | return list(filter(lambda mod: mod.type_key == "arm_compute_lib", 84 | module.get_lib().imported_modules)) 85 | 86 | target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon" 87 | enable_acl = True 88 | params=None 89 | tvm_ops=0 90 | acl_partitions=1 91 | 92 | with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): 93 | if enable_acl: 94 | module = arm_compute_lib.partition_for_arm_compute_lib(module, params) 95 | lib = relay.build(module, target=target, params=params) 96 | acl_modules = extract_acl_modules(lib) 97 | for mod in acl_modules: 98 | source = mod.get_source("json") 99 | codegen = json.loads(source)["nodes"] 100 | codegen_str = json.dumps(codegen, sort_keys=True, indent=2) 101 | with open('./tvm/JSON_dump/max_pool2d_readable.json', 'w') as outfile: 102 | outfile.write(json.dumps(codegen, sort_keys=True, indent=2)) 103 | with open('./tvm/JSON_dump/max_pool2d.json', 'w') as outfile: 104 | json.dump(codegen, outfile) 105 | ``` 106 | 107 | ### Standard format of JSON in ARM's Compute Library 108 | 109 | JSON representation has two parts: input and node. Input represents the input for the operation while node represents the attributes of the operations. 110 | 111 | ``` 112 | Example: Pooling 113 | { 114 | input = { 115 | "op": "input", 116 | "name": "", 117 | "attrs": {"shape": [[list(shape)]], "dtype": [[dtype]]}}, 118 | node = { 119 | "op": "kernel", 120 | "name": typef, 121 | "inputs": [[0, 0, 0]], 122 | "attrs": { 123 | "num_inputs": "1", 124 | "num_outputs": "1", 125 | "layout": [["NHWC"]], 126 | "shape": [[list(output_shape)]], 127 | "dtype": [[dtype]], 128 | "padding": [[str(p) for p in padding]], 129 | "strides": [[str(s) for s in strides]], 130 | "pool_size": [[str(s) for s in sizes]], 131 | "ceil_mode": [[str(1 if ceil_mode else 0)]] 132 | }, 133 | } 134 | ``` 135 | 136 | ## Storing network parameters 137 | We will use Numpy's np.savez command to save dict of numpy arrays. We can refer to https://github.com/rogersce/cnpy to load parameter file in NVDLA. 138 | 139 | ## TODO: 140 | 1. To define Annotation Rules to describe the supported operators and patterns for NVDLA. 141 | 2. To implement NVDLA Codegen to serialize a Relay graph to a JSON representation. 142 | 3. ~~To implement an NVDLA JSON runtime to interpret and execute the serialized JSON graph.~~ 143 | 4. To combine the necessary NVDLA APIs (including default compiler and runtime) with the TVM default code. 144 | 5. To test functional verification of the complete flow and target operators. 145 | 146 | 147 | ## Queries: 148 | 1. Can we run single operators like maxpool, conv2d in NVDLA? How does JSON representation look like for such an operator? 149 | 2. How can we integrate JSON runtime with NVDLA compiler? How does memory allocation takes place for input/output/weights? 150 | See this: https://tvm.apache.org/docs/dev/relay_bring_your_own_codegen.html#implement-a-customized-runtime 151 | -------------------------------------------------------------------------------- /compilerOverview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/compilerOverview.png -------------------------------------------------------------------------------- /files/detailed_design.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/files/detailed_design.jpg -------------------------------------------------------------------------------- /files/project_presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/files/project_presentation.pdf -------------------------------------------------------------------------------- /files/project_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WuDan0399/Integrate-NVDLA-and-TVM/3f3d41371c8cc54cf18e58bc50ebfb6e6bb45f0b/files/project_report.pdf --------------------------------------------------------------------------------