├── imageslist.txt
├── images
    ├── dog.jpg
    ├── eagle.jpg
    └── horses.jpg
├── requirements.txt
├── coco_labels.txt
├── README.md
├── yolov3-tiny.cfg
├── common.py
├── onnx_to_tensorrt.py
├── data_processing.py
└── yolov3_to_onnx.py


/imageslist.txt:
--------------------------------------------------------------------------------
1 | dog
2 | eagle
3 | horses
4 | 
5 | 


--------------------------------------------------------------------------------
/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zombie0117/yolov3-tiny-onnx-TensorRT/HEAD/images/dog.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.15.1
2 | onnx>=1.1.1
3 | pycuda>=2017.1.1
4 | Pillow>=5.2.0
5 | wget>=3.2
6 | 


--------------------------------------------------------------------------------
/images/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zombie0117/yolov3-tiny-onnx-TensorRT/HEAD/images/eagle.jpg


--------------------------------------------------------------------------------
/images/horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zombie0117/yolov3-tiny-onnx-TensorRT/HEAD/images/horses.jpg


--------------------------------------------------------------------------------
/coco_labels.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # yolov3-tiny2onnx2trt
 2 | Convert your yolov3-tiny model to trt model
 3 | 
 4 | # device: nvidia jetson tx2
 5 | 
 6 | 
 7 | # jetpack version:jetpack4.2:
 8 | 	ubuntu18.04 
 9 | 	tensorrt5.0.6.3 
10 | 	cuda10.0 
11 | 	cudnn7.3.1
12 | 
13 | 
14 | # others:
15 | 	python=2.7
16 | 	numpy=1.16.1
17 | 	onnx=1.4.1 (important)
18 | 	pycuda=2019.1.1
19 | 	Pillow=6.1.0
20 | 	wget=3.2
21 | 
22 | 
23 | # custom settings
24 | 
25 | 	data_processing.py:
26 | 		line14: LABEL_FILE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/coco_labels.txt'
27 | 		line19: CATEGORY_NUM = 80
28 | 
29 | 	yolov3_to_onnx.py:
30 | 		line778: img_size = 416
31 | 		line784: cfg_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.cfg'
32 | 		line811: weights_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.weights'
33 | 		line826: output_file_path = 'yolov3-tiny.onnx'
34 | 
35 | 	onnx_to_tensorrt.py:
36 | 		line39: input_size = 416
37 | 		line40: batch_size = 1
38 | 		line42~line46:
39 | 		    onnx_file_path = 'yolov3-tiny.onnx'
40 | 		    engine_file_path = 'yolov3-tiny.trt'
41 | 		    input_file_list = '/home/nvidia/yolov3-tiny2onnx2trt/imagelist.txt'
42 | 		    IMAGE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/images/'
43 | 		    save_path = '/home/nvidia/yolov3-tiny2onnx2trt/'
44 | # notes (very important!):
45 | 	0.The onnx version must be 1.4.1. If it is not, please run the commands:
46 | 		pip uninstall onnx
47 | 		pip install onnx==1.4.1
48 | 	
49 | 	1.The cfg-file's last line must be a blank line. You should press Enter to add a blank line if there is no blank line at the end of the file.
50 | 	
51 | # steps:
52 | 	0.Put your .weights file in the folder
53 | 		|-yolov3-tiny2onnx2trt
54 | 			|-yolov3-tiny.weights
55 | 	
56 | 	1.Change your settings as "#custom settings"
57 | 
58 | 	2.Run commands:
59 | 		cd yolov3-tiny2onnx2trt
60 | 		python yolov3_to_onnx.py
61 | 
62 | 		you will get a yolov3-tiny.onnx file
63 | 
64 | 	3.Run commands:	
65 | 	  	python onnx_to_tensorrt.py:
66 | 
67 | 		you will get a yolov3-tiny.trt file and some inferenced images.
68 | 
69 | 


--------------------------------------------------------------------------------
/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
  3 | #
  4 | # NOTICE TO LICENSEE:
  5 | #
  6 | # This source code and/or documentation ("Licensed Deliverables") are
  7 | # subject to NVIDIA intellectual property rights under U.S. and
  8 | # international Copyright laws.
  9 | #
 10 | # These Licensed Deliverables contained herein is PROPRIETARY and
 11 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | # conditions of a form of NVIDIA software license agreement by and
 13 | # between NVIDIA and Licensee ("License Agreement") or electronically
 14 | # accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | # the contrary in the License Agreement, reproduction or disclosure
 16 | # of the Licensed Deliverables to any third party without the express
 17 | # written consent of NVIDIA is prohibited.
 18 | #
 19 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | # OF THESE LICENSED DELIVERABLES.
 33 | #
 34 | # U.S. Government End Users.  These Licensed Deliverables are a
 35 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | # 1995), consisting of "commercial computer software" and "commercial
 37 | # computer software documentation" as such terms are used in 48
 38 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | # U.S. Government End Users acquire the Licensed Deliverables with
 42 | # only those rights set forth herein.
 43 | #
 44 | # Any use of the Licensed Deliverables in individual and commercial
 45 | # software must include, in the user documentation and internal
 46 | # comments to the code, the above Disclaimer and U.S. Government End
 47 | # Users Notice.
 48 | #
 49 | 
 50 | import os
 51 | import argparse
 52 | import numpy as np
 53 | import pycuda.driver as cuda
 54 | import tensorrt as trt
 55 | 
 56 | try:
 57 |     # Sometimes python2 does not understand FileNotFoundError
 58 |     FileNotFoundError
 59 | except NameError:
 60 |     FileNotFoundError = IOError
 61 | 
 62 | def GiB(val):
 63 |     return val * 1 << 30
 64 | 
 65 | def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
 66 |     '''
 67 |     Parses sample arguments.
 68 |     Args:
 69 |         description (str): Description of the sample.
 70 |         subfolder (str): The subfolder containing data relevant to this sample
 71 |         find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
 72 |     Returns:
 73 |         str: Path of data directory.
 74 |     Raises:
 75 |         FileNotFoundError
 76 |     '''
 77 |     kDEFAULT_DATA_ROOT = os.path.abspath("/usr/src/tensorrt/data")
 78 | 
 79 |     # Standard command-line arguments for all samples.
 80 |     parser = argparse.ArgumentParser(description=description)
 81 |     parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory.")
 82 |     args, unknown_args = parser.parse_known_args()
 83 | 
 84 |     # If data directory is not specified, use the default.
 85 |     data_root = args.datadir if args.datadir else kDEFAULT_DATA_ROOT
 86 |     # If the subfolder exists, append it to the path, otherwise use the provided path as-is.
 87 |     subfolder_path = os.path.join(data_root, subfolder)
 88 |     if not os.path.exists(subfolder_path):
 89 |         print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
 90 |     data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
 91 | 
 92 |     # Make sure data directory exists.
 93 |     if not (os.path.exists(data_path)):
 94 |         raise FileNotFoundError(data_path + " does not exist. Please provide the correct data path with the -d option.")
 95 | 
 96 |     # Find all requested files.
 97 |     for index, f in enumerate(find_files):
 98 |         find_files[index] = os.path.abspath(os.path.join(data_path, f))
 99 |         if not os.path.exists(find_files[index]):
100 |             raise FileNotFoundError(find_files[index] + " does not exist. Please provide the correct data path with the -d option.")
101 |     if find_files:
102 |         return data_path, find_files
103 |     else:
104 |         return data_path
105 | 
106 | # Simple helper data class that's a little nicer to use than a 2-tuple.
107 | class HostDeviceMem(object):
108 |     def __init__(self, host_mem, device_mem):
109 |         self.host = host_mem
110 |         self.device = device_mem
111 | 
112 |     def __str__(self):
113 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
114 | 
115 |     def __repr__(self):
116 |         return self.__str__()
117 | 
118 | # Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
119 | def allocate_buffers(engine):
120 |     inputs = []
121 |     outputs = []
122 |     bindings = []
123 |     stream = cuda.Stream()
124 |     for binding in engine:
125 |         size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
126 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
127 |         # Allocate host and device buffers
128 |         host_mem = cuda.pagelocked_empty(size, dtype)
129 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
130 |         # Append the device buffer to device bindings.
131 |         bindings.append(int(device_mem))
132 |         # Append to the appropriate list.
133 |         if engine.binding_is_input(binding):
134 |             inputs.append(HostDeviceMem(host_mem, device_mem))
135 |         else:
136 |             outputs.append(HostDeviceMem(host_mem, device_mem))
137 |     return inputs, outputs, bindings, stream
138 | 
139 | # This function is generalized for multiple inputs/outputs.
140 | # inputs and outputs are expected to be lists of HostDeviceMem objects.
141 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
142 |     # Transfer input data to the GPU.
143 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
144 |     # Run inference.
145 |     context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
146 |     # Transfer predictions back from the GPU.
147 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
148 |     # Synchronize the stream
149 |     stream.synchronize()
150 |     # Return only the host outputs.
151 |     return [out.host for out in outputs]
152 | 


--------------------------------------------------------------------------------
/onnx_to_tensorrt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | #
  3 | # Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
  4 | #
  5 | # NOTICE TO LICENSEE:
  6 | #
  7 | # This source code and/or documentation ("Licensed Deliverables") are
  8 | # subject to NVIDIA intellectual property rights under U.S. and
  9 | # international Copyright laws.
 10 | #
 11 | # These Licensed Deliverables contained herein is PROPRIETARY and
 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
 13 | # conditions of a form of NVIDIA software license agreement by and
 14 | # between NVIDIA and Licensee ("License Agreement") or electronically
 15 | # accepted by Licensee.  Notwithstanding any terms or conditions to
 16 | # the contrary in the License Agreement, reproduction or disclosure
 17 | # of the Licensed Deliverables to any third party without the express
 18 | # written consent of NVIDIA is prohibited.
 19 | #
 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 33 | # OF THESE LICENSED DELIVERABLES.
 34 | #
 35 | # U.S. Government End Users.  These Licensed Deliverables are a
 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 37 | # 1995), consisting of "commercial computer software" and "commercial
 38 | # computer software documentation" as such terms are used in 48
 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 40 | # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 42 | # U.S. Government End Users acquire the Licensed Deliverables with
 43 | # only those rights set forth herein.
 44 | #
 45 | # Any use of the Licensed Deliverables in individual and commercial
 46 | # software must include, in the user documentation and internal
 47 | # comments to the code, the above Disclaimer and U.S. Government End
 48 | # Users Notice.
 49 | #
 50 | 
 51 | from __future__ import print_function
 52 | 
 53 | import glob
 54 | import time
 55 | import numpy as np
 56 | import tensorrt as trt
 57 | import pycuda.driver as cuda
 58 | import pycuda.autoinit
 59 | from PIL import ImageDraw
 60 | from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES
 61 | 
 62 | import sys, os
 63 | import common
 64 | 
 65 | TRT_LOGGER = trt.Logger()
 66 | 
 67 | 
 68 | def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
 69 |     """Draw the bounding boxes on the original input image and return it.
 70 | 
 71 |     Keyword arguments:
 72 |     image_raw -- a raw PIL Image
 73 |     bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
 74 |     categories -- NumPy array containing the corresponding category for each object,
 75 |     with shape (N,)
 76 |     confidences -- NumPy array containing the corresponding confidence for each object,
 77 |     with shape (N,)
 78 |     all_categories -- a list of all categories in the correct ordered (required for looking up
 79 |     the category name)
 80 |     bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue')
 81 |     """
 82 |     draw = ImageDraw.Draw(image_raw)
 83 |     print(bboxes, confidences, categories)
 84 |     for box, score, category in zip(bboxes, confidences, categories):
 85 |         x_coord, y_coord, width, height = box
 86 |         left = max(0, np.floor(x_coord + 0.5).astype(int))
 87 |         top = max(0, np.floor(y_coord + 0.5).astype(int))
 88 |         right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int))
 89 |         bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int))
 90 | 
 91 |         draw.rectangle(((left, top), (right, bottom)), outline=bbox_color)
 92 |         draw.text((left, top - 12), '{0} {1:.2f}'.format(all_categories[category], score), fill=bbox_color)
 93 | 
 94 |     return image_raw
 95 | 
 96 | def get_engine(onnx_file_path, max_batch_size, fp16_on, engine_file_path=""):
 97 |     """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
 98 |     def build_engine():
 99 |         """Takes an ONNX file and creates a TensorRT engine to run inference with"""
100 |         with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
101 |             builder.max_workspace_size = 1 << 30 # 1GB
102 |             builder.max_batch_size = max_batch_size
103 |             builder.fp16_mode = fp16_on
104 |             # Parse model file
105 |             if not os.path.exists(onnx_file_path):
106 |                 print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
107 |                 exit(0)
108 |             print('Loading ONNX file from path {}...'.format(onnx_file_path))
109 |             with open(onnx_file_path, 'rb') as model:
110 |                 print('Beginning ONNX file parsing')
111 |                 parser.parse(model.read())
112 |             print('Completed parsing of ONNX file')
113 |             print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
114 |             engine = builder.build_cuda_engine(network)
115 |             print("Completed creating Engine")
116 |             with open(engine_file_path, "wb") as f:
117 |                 f.write(engine.serialize())
118 |             return engine
119 | 
120 |     if os.path.exists(engine_file_path):
121 |         # If a serialized engine exists, use it instead of building an engine.
122 |         print("Reading engine from file {}".format(engine_file_path))
123 |         with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
124 |             return runtime.deserialize_cuda_engine(f.read())
125 |     else:
126 |         return build_engine()
127 | def download_file(path, link, checksum_reference=None):
128 |     if not os.path.exists(path):
129 |         print('downloading')
130 |         wget.download(link, path)
131 |         print()
132 |     if checksum_reference is not None:
133 |         raise ValueError('error')
134 |     return path
135 | def main():
136 |     """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
137 | 
138 |     # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
139 |     input_size = 416
140 |     batch_size = 1
141 |     fp16_on = True
142 |     onnx_file_path = 'yolov3-tiny.onnx'
143 |     engine_file_path = 'yolov3-tiny.trt'
144 |     input_file_list = '/home/nvidia/yolov3-tiny2onnx2trt/imagelist.txt'
145 |     IMAGE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/images/'
146 |     save_path = '/home/nvidia/yolov3-tiny2onnx2trt/'
147 |     
148 |     output_shapes_416 = [(batch_size, 18, 13, 13), (batch_size, 18, 26, 26)]
149 |     output_shapes_480 = [(batch_size, 18, 15, 15), (batch_size, 18, 30, 30)]
150 |     output_shapes_544 = [(batch_size, 18, 17, 17), (batch_size, 18, 34, 34)]
151 |     output_shapes_608 = [(batch_size, 18, 19, 19), (batch_size, 18, 38, 38)]
152 |     output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608}
153 |     
154 |     with open(input_file_list, 'r') as f:
155 |         filenames = []
156 |         for line in f.readlines():
157 |             filenames.append(line.strip())
158 | 
159 |     filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg'))
160 |     
161 |     nums = len(filenames)
162 |     # print(filenames)
163 | 
164 |     input_resolution_yolov3_HW = (input_size, input_size)
165 |     
166 |     preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
167 |     
168 |     output_shapes = output_shapes_dic[str(input_size)]
169 | 
170 |     postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)],
171 |                           "yolo_anchors": [(10,14),  (23,27),  (37,58),  (81,82),  (135,169),  (344,319)],
172 |                           "obj_threshold": 0.5, 
173 |                           "nms_threshold": 0.35,
174 |                           "yolo_input_resolution": input_resolution_yolov3_HW}
175 | 
176 |     postprocessor = PostprocessYOLO(**postprocessor_args)
177 |     
178 |     # Do inference with TensorRT
179 |     filenames_batch = []
180 |     images = []
181 |     images_raw = []
182 |     trt_outputs = []
183 |     index = 0
184 |     with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path) as engine, engine.create_execution_context() as context:
185 |         # inputs, outputs, bindings, stream = common.allocate_buffers(engine)
186 |         # Do inference
187 |         for filename in filenames:
188 |             filenames_batch.append(filename)
189 |             image_raw, image = preprocessor.process(filename)
190 |             images_raw.append(image_raw)
191 |             images.append(image)
192 |             index += 1
193 |             if index != nums and len(images_raw) != batch_size:
194 |                 continue
195 |             inputs, outputs, bindings, stream = common.allocate_buffers(engine)
196 |             images_batch = np.concatenate(images, axis=0)
197 |             inputs[0].host = images_batch
198 |             t1 = time.time()
199 |             trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size)
200 |             t2 = time.time()
201 |             t_inf = t2 - t1
202 |             print("time spent:",t_inf)
203 |             print(len(trt_outputs))
204 |             trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
205 | 
206 | 	    print('test')
207 | 	    for i in range(len(filenames_batch)):
208 |                 fname = filenames_batch[i].split('/')
209 |                 fname = fname[-1].split('.')[0]
210 | 		img_raw = images_raw[i]
211 | 		shape_orig_WH = img_raw.size
212 | 		boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i)
213 | 		print("boxes size:",len(boxes))
214 | 		# Draw the bounding boxes onto the original input image and save it as a PNG file
215 | 		obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES)
216 | 		output_image_path = save_path + fname + '_' + str(input_size) + '_bboxes.png'
217 | 		obj_detected_img.save(output_image_path, 'PNG')
218 | 		print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
219 |             filenames_batch = []
220 |             images_batch = []
221 | 	    images = []
222 | 	    images_raw = []
223 | 	    trt_outputs = []
224 | 
225 | if __name__ == '__main__':
226 |     main()
227 | 


--------------------------------------------------------------------------------
/data_processing.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from PIL import Image
  3 | import numpy as np
  4 | 
  5 | 
  6 | # YOLOv3-608 has been trained with these 80 categories from COCO:
  7 | # Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context."
  8 | # European Conference on Computer Vision. Springer, Cham, 2014.
  9 | 
 10 | def load_label_categories(label_file_path):
 11 |     categories = [line.rstrip('\n') for line in open(label_file_path)]
 12 |     return categories
 13 | 
 14 | LABEL_FILE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/coco_labels.txt'
 15 | ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH)
 16 | 
 17 | # Let's make sure that there are 80 classes, as expected for the COCO data set:
 18 | CATEGORY_NUM = len(ALL_CATEGORIES)
 19 | assert CATEGORY_NUM == 80
 20 | 
 21 | 
 22 | class PreprocessYOLO(object):
 23 |     """A simple class for loading images with PIL and reshaping them to the specified
 24 |     input resolution for YOLOv3-608.
 25 |     """
 26 | 
 27 |     def __init__(self, yolo_input_resolution):
 28 |         """Initialize with the input resolution for YOLOv3, which will stay fixed in this sample.
 29 | 
 30 |         Keyword arguments:
 31 |         yolo_input_resolution -- two-dimensional tuple with the target network's (spatial)
 32 |         input resolution in HW order
 33 |         """
 34 |         self.yolo_input_resolution = yolo_input_resolution
 35 | 
 36 |     def process(self, input_image_path):
 37 |         """Load an image from the specified input path,
 38 |         and return it together with a pre-processed version required for feeding it into a
 39 |         YOLOv3 network.
 40 | 
 41 |         Keyword arguments:
 42 |         input_image_path -- string path of the image to be loaded
 43 |         """
 44 |         image_raw, image_resized = self._load_and_resize(input_image_path)
 45 |         image_preprocessed = self._shuffle_and_normalize(image_resized)
 46 |         return image_raw, image_preprocessed
 47 | 
 48 |     def _load_and_resize(self, input_image_path):
 49 |         """Load an image from the specified path and resize it to the input resolution.
 50 |         Return the input image before resizing as a PIL Image (required for visualization),
 51 |         and the resized image as a NumPy float array.
 52 | 
 53 |         Keyword arguments:
 54 |         input_image_path -- string path of the image to be loaded
 55 |         """
 56 | 
 57 |         image_raw = Image.open(input_image_path)
 58 |         # Expecting yolo_input_resolution in (height, width) format, adjusting to PIL
 59 |         # convention (width, height) in PIL:
 60 |         new_resolution = (
 61 |             self.yolo_input_resolution[1],
 62 |             self.yolo_input_resolution[0])
 63 |         print(new_resolution)
 64 |         image_resized = image_raw.resize(
 65 |             new_resolution, resample=Image.BICUBIC)
 66 |         image_resized = np.array(image_resized, dtype=np.float32, order='C')
 67 |         return image_raw, image_resized
 68 | 
 69 |     def _shuffle_and_normalize(self, image):
 70 |         """Normalize a NumPy array representing an image to the range [0, 1], and
 71 |         convert it from HWC format ("channels last") to NCHW format ("channels first"
 72 |         with leading batch dimension).
 73 | 
 74 |         Keyword arguments:
 75 |         image -- image as three-dimensional NumPy float array, in HWC format
 76 |         """
 77 |         image /= 255.0
 78 |         # HWC to CHW format:
 79 |         image = np.transpose(image, [2, 0, 1])
 80 |         # CHW to NCHW format
 81 |         image = np.expand_dims(image, axis=0)
 82 |         # Convert the image to row-major order, also known as "C order":
 83 |         image = np.array(image, dtype=np.float32, order='C')
 84 |         return image
 85 | 
 86 | 
 87 | class PostprocessYOLO(object):
 88 |     """Class for post-processing the three outputs tensors from YOLOv3-608."""
 89 | 
 90 |     def __init__(self,
 91 |                  yolo_masks,
 92 |                  yolo_anchors,
 93 |                  obj_threshold,
 94 |                  nms_threshold,
 95 |                  yolo_input_resolution):
 96 |         """Initialize with all values that will be kept when processing several frames.
 97 |         Assuming 3 outputs of the network in the case of (large) YOLOv3.
 98 | 
 99 |         Keyword arguments:
100 |         yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks
101 |         yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors
102 |         object_threshold -- threshold for object coverage, float value between 0 and 1
103 |         nms_threshold -- threshold for non-max suppression algorithm,
104 |         float value between 0 and 1
105 |         input_resolution_yolo -- two-dimensional tuple with the target network's (spatial)
106 |         input resolution in HW order
107 |         """
108 |         self.masks = yolo_masks
109 |         self.anchors = yolo_anchors
110 |         self.object_threshold = obj_threshold
111 |         self.nms_threshold = nms_threshold
112 |         self.input_resolution_yolo = yolo_input_resolution
113 | 
114 |     def process(self, outputs, resolution_raw, i):
115 |         """Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them
116 |         and return a list of bounding boxes for detected object together with their category
117 |         and their confidences in separate lists.
118 | 
119 |         Keyword arguments:
120 |         outputs -- outputs from a TensorRT engine in NCHW format
121 |         resolution_raw -- the original spatial resolution from the input PIL image in WH order
122 |         """
123 |         outputs_reshaped = list()
124 |         for output in outputs:
125 |             outputs_reshaped.append(self._reshape_output(output, i))
126 | 
127 |         boxes, categories, confidences = self._process_yolo_output(
128 |             outputs_reshaped, resolution_raw)
129 | 
130 |         return boxes, categories, confidences
131 | 
132 |     def _reshape_output(self, output, i):
133 |         """Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
134 |         and then return it in (height,width,3,85) dimensionality after further reshaping.
135 | 
136 |         Keyword argument:
137 |         output -- an output from a TensorRT engine after inference
138 |         """
139 |         output = np.transpose(output, [0, 2, 3, 1])
140 |         _, height, width, _ = output.shape
141 |         dim1, dim2 = height, width
142 |         dim3 = 3
143 |         # There are CATEGORY_NUM=80 object categories:
144 |         dim4 = (4 + 1 + CATEGORY_NUM)
145 |         return np.reshape(output[i], (dim1, dim2, dim3, dim4))
146 | 
147 |     def _process_yolo_output(self, outputs_reshaped, resolution_raw):
148 |         """Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
149 |         return a list of bounding boxes for detected object together with their category and their
150 |         confidences in separate lists.
151 | 
152 |         Keyword arguments:
153 |         outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
154 |         with shape (height,width,3,85)
155 |         resolution_raw -- the original spatial resolution from the input PIL image in WH order
156 |         """
157 | 
158 |         # E.g. in YOLOv3-608, there are three output tensors, which we associate with their
159 |         # respective masks. Then we iterate through all output-mask pairs and generate candidates
160 |         # for bounding boxes, their corresponding category predictions and their confidences:
161 |         boxes, categories, confidences = list(), list(), list()
162 |         for output, mask in zip(outputs_reshaped, self.masks):
163 |             box, category, confidence = self._process_feats(output, mask)
164 |             box, category, confidence = self._filter_boxes(box, category, confidence)
165 |             boxes.append(box)
166 |             categories.append(category)
167 |             confidences.append(confidence)
168 | 
169 |         boxes = np.concatenate(boxes)
170 |         categories = np.concatenate(categories)
171 |         confidences = np.concatenate(confidences)
172 | 
173 |         # Scale boxes back to original image shape:
174 |         width, height = resolution_raw
175 |         image_dims = [width, height, width, height]
176 |         boxes = boxes * image_dims
177 | 
178 |         # Using the candidates from the previous (loop) step, we apply the non-max suppression
179 |         # algorithm that clusters adjacent bounding boxes to a single bounding box:
180 |         nms_boxes, nms_categories, nscores = list(), list(), list()
181 |         for category in set(categories):
182 |             idxs = np.where(categories == category)
183 |             box = boxes[idxs]
184 |             category = categories[idxs]
185 |             confidence = confidences[idxs]
186 | 
187 |             keep = self._nms_boxes(box, confidence)
188 | 
189 |             nms_boxes.append(box[keep])
190 |             nms_categories.append(category[keep])
191 |             nscores.append(confidence[keep])
192 | 
193 |         if not nms_categories and not nscores:
194 |             return None, None, None
195 | 
196 |         boxes = np.concatenate(nms_boxes)
197 |         categories = np.concatenate(nms_categories)
198 |         confidences = np.concatenate(nscores)
199 | 
200 |         return boxes, categories, confidences
201 | 
202 |     def _process_feats(self, output_reshaped, mask):
203 |         """Take in a reshaped YOLO output in height,width,3,85 format together with its
204 |         corresponding YOLO mask and return the detected bounding boxes, the confidence,
205 |         and the class probability in each cell/pixel.
206 | 
207 |         Keyword arguments:
208 |         output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
209 |         mask -- 2-dimensional tuple with mask specification for this output
210 |         """
211 | 
212 |         # Two in-line functions required for calculating the bounding box
213 |         # descriptors:
214 |         def sigmoid(value):
215 |             """Return the sigmoid of the input."""
216 |             return 1.0 / (1.0 + math.exp(-value))
217 | 
218 |         def exponential(value):
219 |             """Return the exponential of the input."""
220 |             return math.exp(value)
221 | 
222 |         # Vectorized calculation of above two functions:
223 |         sigmoid_v = np.vectorize(sigmoid)
224 |         exponential_v = np.vectorize(exponential)
225 | 
226 |         grid_h, grid_w, _, _ = output_reshaped.shape
227 | 
228 |         anchors = [self.anchors[i] for i in mask]
229 | 
230 |         # Reshape to N, height, width, num_anchors, box_params:
231 |         anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2])
232 |         box_xy = sigmoid_v(output_reshaped[..., :2])
233 |         box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor
234 |         box_confidence = sigmoid_v(output_reshaped[..., 4])
235 | 
236 |         box_confidence = np.expand_dims(box_confidence, axis=-1)
237 |         box_class_probs = sigmoid_v(output_reshaped[..., 5:])
238 | 
239 |         col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
240 |         row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
241 | 
242 |         col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
243 |         row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
244 |         grid = np.concatenate((col, row), axis=-1)
245 | 
246 |         box_xy += grid
247 |         box_xy /= (grid_w, grid_h)
248 |         box_wh /= self.input_resolution_yolo
249 |         box_xy -= (box_wh / 2.)
250 |         boxes = np.concatenate((box_xy, box_wh), axis=-1)
251 | 
252 |         # boxes: centroids, box_confidence: confidence level, box_class_probs:
253 |         # class confidence
254 |         return boxes, box_confidence, box_class_probs
255 | 
256 |     def _filter_boxes(self, boxes, box_confidences, box_class_probs):
257 |         """Take in the unfiltered bounding box descriptors and discard each cell
258 |         whose score is lower than the object threshold set during class initialization.
259 | 
260 |         Keyword arguments:
261 |         boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
262 |         x,y,height,width coordinates of the boxes
263 |         box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
264 |         confidence scalar per element
265 |         box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)
266 | 
267 |         """
268 |         box_scores = box_confidences * box_class_probs
269 |         box_classes = np.argmax(box_scores, axis=-1)
270 |         box_class_scores = np.max(box_scores, axis=-1)
271 |         pos = np.where(box_class_scores >= self.object_threshold)
272 | 
273 |         boxes = boxes[pos]
274 |         classes = box_classes[pos]
275 |         scores = box_class_scores[pos]
276 | 
277 |         return boxes, classes, scores
278 | 
279 |     def _nms_boxes(self, boxes, box_confidences):
280 |         """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
281 |         confidence scores and return an array with the indexes of the bounding boxes we want to
282 |         keep (and display later).
283 | 
284 |         Keyword arguments:
285 |         boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
286 |         with shape (N,4); 4 for x,y,height,width coordinates of the boxes
287 |         box_confidences -- a Numpy array containing the corresponding confidences with shape N
288 |         """
289 |         x_coord = boxes[:, 0]
290 |         y_coord = boxes[:, 1]
291 |         width = boxes[:, 2]
292 |         height = boxes[:, 3]
293 | 
294 |         areas = width * height
295 |         ordered = box_confidences.argsort()[::-1]
296 | 
297 |         keep = list()
298 |         while ordered.size > 0:
299 |             # Index of the current element:
300 |             i = ordered[0]
301 |             keep.append(i)
302 |             xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
303 |             yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
304 |             xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]])
305 |             yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]])
306 | 
307 |             width1 = np.maximum(0.0, xx2 - xx1 + 1)
308 |             height1 = np.maximum(0.0, yy2 - yy1 + 1)
309 |             intersection = width1 * height1
310 |             union = (areas[i] + areas[ordered[1:]] - intersection)
311 | 
312 |             # Compute the Intersection over Union (IoU) score:
313 |             iou = intersection / union
314 | 
315 |             # The goal of the NMS algorithm is to reduce the number of adjacent bounding-box
316 |             # candidates to a minimum. In this step, we keep only those elements whose overlap
317 |             # with the current bounding box is lower than the threshold:
318 |             indexes = np.where(iou <= self.nms_threshold)[0]
319 |             ordered = ordered[indexes + 1]
320 | 
321 |         keep = np.array(keep)
322 |         return keep
323 | 


--------------------------------------------------------------------------------
/yolov3_to_onnx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | #
  3 | # Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  4 | #
  5 | # NOTICE TO LICENSEE:
  6 | #
  7 | # This source code and/or documentation ("Licensed Deliverables") are
  8 | # subject to NVIDIA intellectual property rights under U.S. and
  9 | # international Copyright laws.
 10 | #
 11 | # These Licensed Deliverables contained herein is PROPRIETARY and
 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
 13 | # conditions of a form of NVIDIA software license agreement by and
 14 | # between NVIDIA and Licensee ("License Agreement") or electronically
 15 | # accepted by Licensee.  Notwithstanding any terms or conditions to
 16 | # the contrary in the License Agreement, reproduction or disclosure
 17 | # of the Licensed Deliverables to any third party without the express
 18 | # written consent of NVIDIA is prohibited.
 19 | #
 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 33 | # OF THESE LICENSED DELIVERABLES.
 34 | #
 35 | # U.S. Government End Users.  These Licensed Deliverables are a
 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 37 | # 1995), consisting of "commercial computer software" and "commercial
 38 | # computer software documentation" as such terms are used in 48
 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 40 | # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 42 | # U.S. Government End Users acquire the Licensed Deliverables with
 43 | # only those rights set forth herein.
 44 | #
 45 | # Any use of the Licensed Deliverables in individual and commercial
 46 | # software must include, in the user documentation and internal
 47 | # comments to the code, the above Disclaimer and U.S. Government End
 48 | # Users Notice.
 49 | #
 50 | 
 51 | from __future__ import print_function
 52 | from collections import OrderedDict
 53 | import hashlib
 54 | import os.path
 55 | 
 56 | import wget
 57 | 
 58 | import onnx
 59 | from onnx import helper
 60 | from onnx import TensorProto
 61 | import numpy as np
 62 | 
 63 | import sys
 64 | 
 65 | class DarkNetParser(object):
 66 |     """Definition of a parser for DarkNet-based YOLOv3-608 (only tested for this topology)."""
 67 | 
 68 |     def __init__(self, supported_layers):
 69 |         """Initializes a DarkNetParser object.
 70 | 
 71 |         Keyword argument:
 72 |         supported_layers -- a string list of supported layers in DarkNet naming convention,
 73 |         parameters are only added to the class dictionary if a parsed layer is included.
 74 |         """
 75 | 
 76 |         # A list of YOLOv3 layers containing dictionaries with all layer
 77 |         # parameters:
 78 |         self.layer_configs = OrderedDict()
 79 |         self.supported_layers = supported_layers
 80 |         self.layer_counter = 0
 81 | 
 82 |     def parse_cfg_file(self, cfg_file_path):
 83 |         """Takes the yolov3.cfg file and parses it layer by layer,
 84 |         appending each layer's parameters as a dictionary to layer_configs.
 85 | 
 86 |         Keyword argument:
 87 |         cfg_file_path -- path to the yolov3.cfg file as string
 88 |         """
 89 |         with open(cfg_file_path, 'rb') as cfg_file:
 90 |             remainder = cfg_file.read()
 91 |             while remainder is not None:
 92 |                 layer_dict, layer_name, remainder = self._next_layer(remainder)
 93 |                 if layer_dict is not None:
 94 |                     self.layer_configs[layer_name] = layer_dict
 95 |         return self.layer_configs
 96 | 
 97 |     def _next_layer(self, remainder):
 98 |         """Takes in a string and segments it by looking for DarkNet delimiters.
 99 |         Returns the layer parameters and the remaining string after the last delimiter.
100 |         Example for the first Conv layer in yolo.cfg ...
101 | 
102 |         [convolutional]
103 |         batch_normalize=1
104 |         filters=32
105 |         size=3
106 |         stride=1
107 |         pad=1
108 |         activation=leaky
109 | 
110 |         ... becomes the following layer_dict return value:
111 |         {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32,
112 |         'batch_normalize': 1, 'type': 'convolutional', 'size': 3}.
113 | 
114 |         '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg
115 |         are returned as the next remainder.
116 | 
117 |         Keyword argument:
118 |         remainder -- a string with all raw text after the previously parsed layer
119 |         """
120 |         remainder = remainder.split('[', 1)
121 |         if len(remainder) == 2:
122 |             remainder = remainder[1]
123 |         else:
124 |             return None, None, None
125 |         remainder = remainder.split(']', 1)
126 |         if len(remainder) == 2:
127 |             layer_type, remainder = remainder
128 |         else:
129 |             return None, None, None
130 |         if remainder.replace(' ', '')[0] == '#':
131 |             remainder = remainder.split('\n', 1)[1]
132 | 
133 |         layer_param_block, remainder = remainder.split('\n\n', 1)
134 |         layer_param_lines = layer_param_block.split('\n')[1:]
135 |         layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type
136 |         layer_dict = dict(type=layer_type)
137 |         if layer_type in self.supported_layers:
138 |             for param_line in layer_param_lines:
139 |                 if param_line[0] == '#':
140 |                     continue
141 |                 param_type, param_value = self._parse_params(param_line)
142 |                 layer_dict[param_type] = param_value
143 |         self.layer_counter += 1
144 |         return layer_dict, layer_name, remainder
145 | 
146 |     def _parse_params(self, param_line):
147 |         """Identifies the parameters contained in one of the cfg file and returns
148 |         them in the required format for each parameter type, e.g. as a list, an int or a float.
149 | 
150 |         Keyword argument:
151 |         param_line -- one parsed line within a layer block
152 |         """
153 |         param_line = param_line.replace(' ', '')
154 |         param_type, param_value_raw = param_line.split('=')
155 |         param_value = None
156 |         if param_type == 'layers':
157 |             layer_indexes = list()
158 |             for index in param_value_raw.split(','):
159 |                 layer_indexes.append(int(index))
160 |             param_value = layer_indexes
161 |         elif isinstance(param_value_raw, str) and not param_value_raw.isalpha():
162 |             condition_param_value_positive = param_value_raw.isdigit()
163 |             condition_param_value_negative = param_value_raw[0] == '-' and \
164 |                 param_value_raw[1:].isdigit()
165 |             if condition_param_value_positive or condition_param_value_negative:
166 |                 param_value = int(param_value_raw)
167 |             else:
168 |                 param_value = float(param_value_raw)
169 |         else:
170 |             param_value = str(param_value_raw)
171 |         return param_type, param_value
172 | 
173 | 
174 | class MajorNodeSpecs(object):
175 |     """Helper class used to store the names of ONNX output names,
176 |     corresponding to the output of a DarkNet layer and its output channels.
177 |     Some DarkNet layers are not created and there is no corresponding ONNX node,
178 |     but we still need to track them in order to set up skip connections.
179 |     """
180 | 
181 |     def __init__(self, name, channels):
182 |         """ Initialize a MajorNodeSpecs object.
183 | 
184 |         Keyword arguments:
185 |         name -- name of the ONNX node
186 |         channels -- number of output channels of this node
187 |         """
188 |         self.name = name
189 |         self.channels = channels
190 |         self.created_onnx_node = False
191 |         if name is not None and isinstance(channels, int) and channels > 0:
192 |             self.created_onnx_node = True
193 | 
194 | 
195 | class ConvParams(object):
196 |     """Helper class to store the hyper parameters of a Conv layer,
197 |     including its prefix name in the ONNX graph and the expected dimensions
198 |     of weights for convolution, bias, and batch normalization.
199 | 
200 |     Additionally acts as a wrapper for generating safe names for all
201 |     weights, checking on feasible combinations.
202 |     """
203 | 
204 |     def __init__(self, node_name, batch_normalize, conv_weight_dims):
205 |         """Constructor based on the base node name (e.g. 101_convolutional), the batch
206 |         normalization setting, and the convolutional weights shape.
207 | 
208 |         Keyword arguments:
209 |         node_name -- base name of this YOLO convolutional layer
210 |         batch_normalize -- bool value if batch normalization is used
211 |         conv_weight_dims -- the dimensions of this layer's convolutional weights
212 |         """
213 |         self.node_name = node_name
214 |         self.batch_normalize = batch_normalize
215 |         assert len(conv_weight_dims) == 4
216 |         self.conv_weight_dims = conv_weight_dims
217 | 
218 |     def generate_param_name(self, param_category, suffix):
219 |         """Generates a name based on two string inputs,
220 |         and checks if the combination is valid."""
221 |         assert suffix
222 |         assert param_category in ['bn', 'conv']
223 |         assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias'])
224 |         if param_category == 'bn':
225 |             assert self.batch_normalize
226 |             assert suffix in ['scale', 'bias', 'mean', 'var']
227 |         elif param_category == 'conv':
228 |             assert suffix in ['weights', 'bias']
229 |             if suffix == 'bias':
230 |                 assert not self.batch_normalize
231 |         param_name = self.node_name + '_' + param_category + '_' + suffix
232 |         return param_name
233 | 
234 | class UpsampleParams(object):
235 |     #Helper class to store the scale parameter for an Upsample node. 
236 | 
237 |     def __init__(self, node_name, value):
238 |         """Constructor based on the base node name (e.g. 86_Upsample),
239 |         and the value of the scale input tensor.
240 | 
241 |         Keyword arguments:
242 |         node_name -- base name of this YOLO Upsample layer
243 |         value -- the value of the scale input to the Upsample layer as a numpy array
244 |         """
245 |         self.node_name = node_name
246 |         self.value = value
247 | 
248 |     def generate_param_name(self):
249 |         """Generates the scale parameter name for the Upsample node."""
250 |         param_name = self.node_name + '_' + "scale"
251 |         return param_name
252 | 
253 | class WeightLoader(object):
254 |     """Helper class used for loading the serialized weights of a binary file stream
255 |     and returning the initializers and the input tensors required for populating
256 |     the ONNX graph with weights.
257 |     """
258 | 
259 |     def __init__(self, weights_file_path):
260 |         """Initialized with a path to the YOLOv3 .weights file.
261 | 
262 |         Keyword argument:
263 |         weights_file_path -- path to the weights file.
264 |         """
265 |         self.weights_file = self._open_weights_file(weights_file_path)
266 | 
267 |     def load_upsample_scales(self, upsample_params):
268 |         """Returns the initializers with the value of the scale input 
269 |         tensor given by upsample_params.
270 | 
271 |         Keyword argument:
272 |         upsample_params -- a UpsampleParams object
273 |         """
274 |         initializer = list()
275 |         inputs = list()
276 |         name = upsample_params.generate_param_name()
277 |         shape = upsample_params.value.shape
278 |         data = upsample_params.value
279 |         scale_init = helper.make_tensor(
280 |             name, TensorProto.FLOAT, shape, data)
281 |         scale_input = helper.make_tensor_value_info(
282 |             name, TensorProto.FLOAT, shape)
283 |         initializer.append(scale_init)
284 |         inputs.append(scale_input)
285 |         return initializer, inputs
286 | 
287 | 
288 |     def load_conv_weights(self, conv_params):
289 |         """Returns the initializers with weights from the weights file and
290 |         the input tensors of a convolutional layer for all corresponding ONNX nodes.
291 | 
292 |         Keyword argument:
293 |         conv_params -- a ConvParams object
294 |         """
295 |         initializer = list()
296 |         inputs = list()
297 |         if conv_params.batch_normalize:
298 |             bias_init, bias_input = self._create_param_tensors(
299 |                 conv_params, 'bn', 'bias')
300 |             bn_scale_init, bn_scale_input = self._create_param_tensors(
301 |                 conv_params, 'bn', 'scale')
302 |             bn_mean_init, bn_mean_input = self._create_param_tensors(
303 |                 conv_params, 'bn', 'mean')
304 |             bn_var_init, bn_var_input = self._create_param_tensors(
305 |                 conv_params, 'bn', 'var')
306 |             initializer.extend(
307 |                 [bn_scale_init, bias_init, bn_mean_init, bn_var_init])
308 |             inputs.extend([bn_scale_input, bias_input,
309 |                            bn_mean_input, bn_var_input])
310 |         else:
311 |             bias_init, bias_input = self._create_param_tensors(
312 |                 conv_params, 'conv', 'bias')
313 |             initializer.append(bias_init)
314 |             inputs.append(bias_input)
315 |         conv_init, conv_input = self._create_param_tensors(
316 |             conv_params, 'conv', 'weights')
317 |         initializer.append(conv_init)
318 |         inputs.append(conv_input)
319 |         return initializer, inputs
320 | 
321 |     def _open_weights_file(self, weights_file_path):
322 |         """Opens a YOLOv3 DarkNet file stream and skips the header.
323 | 
324 |         Keyword argument:
325 |         weights_file_path -- path to the weights file.
326 |         """
327 |         weights_file = open(weights_file_path, 'rb')
328 |         length_header = 5
329 |         np.ndarray(
330 |             shape=(length_header, ), dtype='int32', buffer=weights_file.read(
331 |                 length_header * 4))
332 |         return weights_file
333 | 
334 |     def _create_param_tensors(self, conv_params, param_category, suffix):
335 |         """Creates the initializers with weights from the weights file together with
336 |         the input tensors.
337 | 
338 |         Keyword arguments:
339 |         conv_params -- a ConvParams object
340 |         param_category -- the category of parameters to be created ('bn' or 'conv')
341 |         suffix -- a string determining the sub-type of above param_category (e.g.,
342 |         'weights' or 'bias')
343 |         """
344 |         param_name, param_data, param_data_shape = self._load_one_param_type(
345 |             conv_params, param_category, suffix)
346 | 
347 |         initializer_tensor = helper.make_tensor(
348 |             param_name, TensorProto.FLOAT, param_data_shape, param_data)
349 |         input_tensor = helper.make_tensor_value_info(
350 |             param_name, TensorProto.FLOAT, param_data_shape)
351 |         return initializer_tensor, input_tensor
352 | 
353 |     def _load_one_param_type(self, conv_params, param_category, suffix):
354 |         """Deserializes the weights from a file stream in the DarkNet order.
355 | 
356 |         Keyword arguments:
357 |         conv_params -- a ConvParams object
358 |         param_category -- the category of parameters to be created ('bn' or 'conv')
359 |         suffix -- a string determining the sub-type of above param_category (e.g.,
360 |         'weights' or 'bias')
361 |         """
362 |         param_name = conv_params.generate_param_name(param_category, suffix)
363 |         channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims
364 |         if param_category == 'bn':
365 |             param_shape = [channels_out]
366 |         elif param_category == 'conv':
367 |             if suffix == 'weights':
368 |                 param_shape = [channels_out, channels_in, filter_h, filter_w]
369 |             elif suffix == 'bias':
370 |                 param_shape = [channels_out]
371 |         param_size = np.product(np.array(param_shape))
372 |         param_data = np.ndarray(
373 |             shape=param_shape,
374 |             dtype='float32',
375 |             buffer=self.weights_file.read(param_size * 4))
376 |         param_data = param_data.flatten().astype(float)
377 |         return param_name, param_data, param_shape
378 | 
379 | 
380 | class GraphBuilderONNX(object):
381 |     """Class for creating an ONNX graph from a previously generated list of layer dictionaries."""
382 | 
383 |     def __init__(self, output_tensors):
384 |         """Initialize with all DarkNet default parameters used creating YOLOv3,
385 |         and specify the output tensors as an OrderedDict for their output dimensions
386 |         with their names as keys.
387 | 
388 |         Keyword argument:
389 |         output_tensors -- the output tensors as an OrderedDict containing the keys'
390 |         output dimensions
391 |         """
392 |         self.output_tensors = output_tensors
393 |         self._nodes = list()
394 |         self.graph_def = None
395 |         self.input_tensor = None
396 |         self.epsilon_bn = 1e-5
397 |         self.momentum_bn = 0.99
398 |         self.alpha_lrelu = 0.1
399 |         self.param_dict = OrderedDict()
400 |         self.major_node_specs = list()
401 |         self.batch_size = 1
402 | 
403 |     def build_onnx_graph(
404 |             self,
405 |             layer_configs,
406 |             weights_file_path,
407 |             verbose=True):
408 |         """Iterate over all layer configs (parsed from the DarkNet representation
409 |         of YOLOv3-608), create an ONNX graph, populate it with weights from the weights
410 |         file and return the graph definition.
411 | 
412 |         Keyword arguments:
413 |         layer_configs -- an OrderedDict object with all parsed layers' configurations
414 |         weights_file_path -- location of the weights file
415 |         verbose -- toggles if the graph is printed after creation (default: True)
416 |         """
417 |         for layer_name in layer_configs.keys():
418 |             layer_dict = layer_configs[layer_name]
419 |             major_node_specs = self._make_onnx_node(layer_name, layer_dict)
420 |             if major_node_specs.name is not None:
421 |                 self.major_node_specs.append(major_node_specs)
422 |         outputs = list()
423 |         for tensor_name in self.output_tensors.keys():
424 |             output_dims = [self.batch_size, ] + \
425 |                 self.output_tensors[tensor_name]
426 |             output_tensor = helper.make_tensor_value_info(
427 |                 tensor_name, TensorProto.FLOAT, output_dims)
428 |             outputs.append(output_tensor)
429 |         inputs = [self.input_tensor]
430 |         weight_loader = WeightLoader(weights_file_path)
431 |         initializer = list()
432 |         # If a layer has parameters, add them to the initializer and input lists.
433 |         for layer_name in self.param_dict.keys():
434 |             _, layer_type = layer_name.split('_', 1)
435 |             params = self.param_dict[layer_name]
436 |             if layer_type == 'convolutional':
437 |                 initializer_layer, inputs_layer = weight_loader.load_conv_weights(
438 |                     params)
439 |                 initializer.extend(initializer_layer)
440 |                 inputs.extend(inputs_layer)
441 |             elif layer_type == "upsample":
442 |                 initializer_layer, inputs_layer = weight_loader.load_upsample_scales(
443 |                     params)
444 |                 initializer.extend(initializer_layer)
445 |                 inputs.extend(inputs_layer)
446 |         del weight_loader
447 |         self.graph_def = helper.make_graph(
448 |             nodes=self._nodes,
449 |             name='YOLOv3-608',
450 |             inputs=inputs,
451 |             outputs=outputs,
452 |             initializer=initializer
453 |         )
454 |         if verbose:
455 |             print(helper.printable_graph(self.graph_def))
456 |         model_def = helper.make_model(self.graph_def,
457 |                                       producer_name='NVIDIA TensorRT sample')
458 |         return model_def
459 | 
460 |     def _make_onnx_node(self, layer_name, layer_dict):
461 |         """Take in a layer parameter dictionary, choose the correct function for
462 |         creating an ONNX node and store the information important to graph creation
463 |         as a MajorNodeSpec object.
464 | 
465 |         Keyword arguments:
466 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
467 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
468 |         """
469 |         layer_type = layer_dict['type']
470 |         if self.input_tensor is None:
471 |             if layer_type == 'net':
472 |                 major_node_output_name, major_node_output_channels = self._make_input_tensor(
473 |                     layer_name, layer_dict)
474 |                 major_node_specs = MajorNodeSpecs(major_node_output_name,
475 |                                                   major_node_output_channels)
476 |             else:
477 |                 raise ValueError('The first node has to be of type "net".')
478 |         else:
479 |             node_creators = dict()
480 |             node_creators['convolutional'] = self._make_conv_node
481 |             node_creators['shortcut'] = self._make_shortcut_node
482 |             node_creators['route'] = self._make_route_node
483 |             node_creators['upsample'] = self._make_upsample_node
484 |             node_creators['maxpool'] = self._make_maxpool_node
485 | 
486 |             if layer_type in node_creators.keys():
487 |                 major_node_output_name, major_node_output_channels = \
488 |                     node_creators[layer_type](layer_name, layer_dict)
489 |                 major_node_specs = MajorNodeSpecs(major_node_output_name,
490 |                                                   major_node_output_channels)
491 |             else:
492 |                 print(
493 |                     'Layer of type %s not supported, skipping ONNX node generation.' %
494 |                     layer_type)
495 |                 major_node_specs = MajorNodeSpecs(layer_name,
496 |                                                   None)
497 |         return major_node_specs
498 | 
499 |     def _make_input_tensor(self, layer_name, layer_dict):
500 |         """Create an ONNX input tensor from a 'net' layer and store the batch size.
501 | 
502 |         Keyword arguments:
503 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
504 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
505 |         """
506 |         batch_size = layer_dict['batch']
507 |         channels = layer_dict['channels']
508 |         height = layer_dict['height']
509 |         width = layer_dict['width']
510 |         self.batch_size = batch_size
511 |         input_tensor = helper.make_tensor_value_info(
512 |             str(layer_name), TensorProto.FLOAT, [
513 |                 batch_size, channels, height, width])
514 |         self.input_tensor = input_tensor
515 |         return layer_name, channels
516 | 
517 |     def _get_previous_node_specs(self, target_index=-1):
518 |         """Get a previously generated ONNX node (skip those that were not generated).
519 |         Target index can be passed for jumping to a specific index.
520 | 
521 |         Keyword arguments:
522 |         target_index -- optional for jumping to a specific index (default: -1 for jumping
523 |         to previous element)
524 |         """
525 |         previous_node = None
526 |         for node in self.major_node_specs[target_index::-1]:
527 |             if node.created_onnx_node:
528 |                 previous_node = node
529 |                 break
530 |         assert previous_node is not None
531 |         return previous_node
532 | 
533 |     def _make_conv_node(self, layer_name, layer_dict):
534 |         """Create an ONNX Conv node with optional batch normalization and
535 |         activation nodes.
536 | 
537 |         Keyword arguments:
538 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
539 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
540 |         """
541 |         previous_node_specs = self._get_previous_node_specs()
542 |         inputs = [previous_node_specs.name]
543 |         previous_channels = previous_node_specs.channels
544 |         kernel_size = layer_dict['size']
545 |         stride = layer_dict['stride']
546 |         filters = layer_dict['filters']
547 |         batch_normalize = False
548 |         if 'batch_normalize' in layer_dict.keys(
549 |         ) and layer_dict['batch_normalize'] == 1:
550 |             batch_normalize = True
551 | 
552 |         kernel_shape = [kernel_size, kernel_size]
553 |         weights_shape = [filters, previous_channels] + kernel_shape
554 |         conv_params = ConvParams(layer_name, batch_normalize, weights_shape)
555 | 
556 |         strides = [stride, stride]
557 |         dilations = [1, 1]
558 |         weights_name = conv_params.generate_param_name('conv', 'weights')
559 |         inputs.append(weights_name)
560 |         if not batch_normalize:
561 |             bias_name = conv_params.generate_param_name('conv', 'bias')
562 |             inputs.append(bias_name)
563 | 
564 |         conv_node = helper.make_node(
565 |             'Conv',
566 |             inputs=inputs,
567 |             outputs=[layer_name],
568 |             kernel_shape=kernel_shape,
569 |             strides=strides,
570 |             auto_pad='SAME_LOWER',
571 |             dilations=dilations,
572 |             name=layer_name
573 |         )
574 |         self._nodes.append(conv_node)
575 |         inputs = [layer_name]
576 |         layer_name_output = layer_name
577 | 
578 |         if batch_normalize:
579 |             layer_name_bn = layer_name + '_bn'
580 |             bn_param_suffixes = ['scale', 'bias', 'mean', 'var']
581 |             for suffix in bn_param_suffixes:
582 |                 bn_param_name = conv_params.generate_param_name('bn', suffix)
583 |                 inputs.append(bn_param_name)
584 |             batchnorm_node = helper.make_node(
585 |                 'BatchNormalization',
586 |                 inputs=inputs,
587 |                 outputs=[layer_name_bn],
588 |                 epsilon=self.epsilon_bn,
589 |                 momentum=self.momentum_bn,
590 |                 name=layer_name_bn
591 |             )
592 |             self._nodes.append(batchnorm_node)
593 |             inputs = [layer_name_bn]
594 |             layer_name_output = layer_name_bn
595 | 
596 |         if layer_dict['activation'] == 'leaky':
597 |             layer_name_lrelu = layer_name + '_lrelu'
598 | 
599 |             lrelu_node = helper.make_node(
600 |                 'LeakyRelu',
601 |                 inputs=inputs,
602 |                 outputs=[layer_name_lrelu],
603 |                 name=layer_name_lrelu,
604 |                 alpha=self.alpha_lrelu
605 |             )
606 |             self._nodes.append(lrelu_node)
607 |             inputs = [layer_name_lrelu]
608 |             layer_name_output = layer_name_lrelu
609 |         elif layer_dict['activation'] == 'linear':
610 |             pass
611 |         else:
612 |             print('Activation not supported.')
613 | 
614 |         self.param_dict[layer_name] = conv_params
615 |         return layer_name_output, filters
616 | 
617 |     def _make_shortcut_node(self, layer_name, layer_dict):
618 |         """Create an ONNX Add node with the shortcut properties from
619 |         the DarkNet-based graph.
620 | 
621 |         Keyword arguments:
622 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
623 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
624 |         """
625 |         shortcut_index = layer_dict['from']
626 |         activation = layer_dict['activation']
627 |         assert activation == 'linear'
628 | 
629 |         first_node_specs = self._get_previous_node_specs()
630 |         second_node_specs = self._get_previous_node_specs(
631 |             target_index=shortcut_index)
632 |         assert first_node_specs.channels == second_node_specs.channels
633 |         channels = first_node_specs.channels
634 |         inputs = [first_node_specs.name, second_node_specs.name]
635 |         shortcut_node = helper.make_node(
636 |             'Add',
637 |             inputs=inputs,
638 |             outputs=[layer_name],
639 |             name=layer_name,
640 |         )
641 |         self._nodes.append(shortcut_node)
642 |         return layer_name, channels
643 | 
644 |     def _make_route_node(self, layer_name, layer_dict):
645 |         """If the 'layers' parameter from the DarkNet configuration is only one index, continue
646 |         node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node
647 |         with the route properties from the DarkNet-based graph.
648 | 
649 |         Keyword arguments:
650 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
651 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
652 |         """
653 |         route_node_indexes = layer_dict['layers']
654 |         if len(route_node_indexes) == 1:
655 |             split_index = route_node_indexes[0]
656 |             assert split_index < 0
657 |             # Increment by one because we skipped the YOLO layer:
658 |             split_index += 1
659 |             self.major_node_specs = self.major_node_specs[:split_index]
660 |             layer_name = None
661 |             channels = None
662 |         else:
663 |             inputs = list()
664 |             channels = 0
665 |             for index in route_node_indexes:
666 |                 if index > 0:
667 |                     # Increment by one because we count the input as a node (DarkNet
668 |                     # does not)
669 |                     index += 1
670 |                 route_node_specs = self._get_previous_node_specs(
671 |                     target_index=index)
672 |                 inputs.append(route_node_specs.name)
673 |                 channels += route_node_specs.channels
674 |             assert inputs
675 |             assert channels > 0
676 | 
677 |             route_node = helper.make_node(
678 |                 'Concat',
679 |                 axis=1,
680 |                 inputs=inputs,
681 |                 outputs=[layer_name],
682 |                 name=layer_name,
683 |             )
684 |             self._nodes.append(route_node)
685 |         return layer_name, channels
686 | 
687 |     def _make_upsample_node(self, layer_name, layer_dict):
688 |         """Create an ONNX Upsample node with the properties from
689 |         the DarkNet-based graph.
690 | 
691 |         Keyword arguments:
692 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
693 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
694 |         """
695 |         upsample_factor = float(layer_dict['stride'])
696 |         # Create the scales array with node parameters
697 |         scales=np.array([1.0, 1.0, upsample_factor, upsample_factor]).astype(np.float32)
698 |         previous_node_specs = self._get_previous_node_specs()
699 |         inputs = [previous_node_specs.name]
700 | 
701 |         channels = previous_node_specs.channels
702 |         assert channels > 0
703 |         upsample_params = UpsampleParams(layer_name, scales)
704 |         scales_name = upsample_params.generate_param_name()
705 |         # For ONNX opset >= 9, the Upsample node takes the scales array as an input.
706 |         inputs.append(scales_name)
707 | 
708 |         upsample_node = helper.make_node(
709 |             'Upsample',
710 |             mode='nearest',
711 |             inputs=inputs,
712 |             outputs=[layer_name],
713 |             name=layer_name,
714 |         )
715 |         self._nodes.append(upsample_node)
716 |         self.param_dict[layer_name] = upsample_params
717 |         return layer_name, channels
718 | 
719 |     def _make_maxpool_node(self, layer_name, layer_dict):
720 |         stride=layer_dict['stride']
721 |         kernel_size = layer_dict['size']
722 |         previous_node_specs = self._get_previous_node_specs()
723 |         inputs = [previous_node_specs.name]
724 |         channels = previous_node_specs.channels
725 |         kernel_shape = [kernel_size, kernel_size]
726 |         strides = [stride, stride]
727 |         assert channels > 0
728 |         maxpool_node = helper.make_node(
729 |             'MaxPool',
730 |             inputs=inputs,
731 |             outputs=[layer_name],
732 |             kernel_shape=kernel_shape,
733 |             strides=strides,
734 | 	    auto_pad='SAME_UPPER',
735 |             name=layer_name,
736 |         )
737 |         self._nodes.append(maxpool_node)
738 |         return layer_name, channels
739 | 
740 | 
741 | def generate_md5_checksum(local_path):
742 |     """Returns the MD5 checksum of a local file.
743 | 
744 |     Keyword argument:
745 |     local_path -- path of the file whose checksum shall be generated
746 |     """
747 |     with open(local_path) as local_file:
748 |         data = local_file.read()
749 |         return hashlib.md5(data).hexdigest()
750 | 
751 | 
752 | def download_file(local_path, link, checksum_reference=None):
753 |     """Checks if a local file is present and downloads it from the specified path otherwise.
754 |     If checksum_reference is specified, the file's md5 checksum is compared against the
755 |     expected value.
756 | 
757 |     Keyword arguments:
758 |     local_path -- path of the file whose checksum shall be generated
759 |     link -- link where the file shall be downloaded from if it is not found locally
760 |     checksum_reference -- expected MD5 checksum of the file
761 |     """
762 |     if not os.path.exists(local_path):
763 |         print('Downloading from %s, this may take a while...' % link)
764 |         wget.download(link, local_path)
765 |         print()
766 |     if checksum_reference is not None:
767 |         checksum = generate_md5_checksum(local_path)
768 |         if checksum != checksum_reference:
769 |             raise ValueError(
770 |                 'The MD5 checksum of local file %s differs from %s, please manually remove \
771 |                  the file and try again.' %
772 |                 (local_path, checksum_reference))
773 |     return local_path
774 | 
775 | 
776 | def main():
777 |     """Run the DarkNet-to-ONNX conversion for YOLOv3-tiny-416."""
778 |     img_size = 416
779 |     # Have to use python 2 due to hashlib compatibility
780 |     if sys.version_info[0] > 2:
781 |         raise Exception("This script is only compatible with python2, please re-run this script with python2. The rest of this sample can be run with either version of python.")
782 | 
783 |     # Download the config for YOLOv3 if not present yet, and analyze the checksum:
784 |     cfg_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.cfg'
785 | 
786 |     # These are the only layers DarkNetParser will extract parameters from. The three layers of
787 |     # type 'yolo' are not parsed in detail because they are included in the post-processing later:
788 |     supported_layers = ['net', 'convolutional', 'shortcut',
789 |                         'route', 'upsample', 'maxpool']
790 | 
791 |     # Create a DarkNetParser object, and the use it to generate an OrderedDict with all
792 |     # layer's configs from the cfg file:
793 |     parser = DarkNetParser(supported_layers)
794 |     layer_configs = parser.parse_cfg_file(cfg_file_path)
795 |     # We do not need the parser anymore after we got layer_configs:
796 |     del parser
797 | 
798 |     # In above layer_config, there are three outputs that we need to know the output
799 |     # shape of (in CHW format):
800 |     output_tensor_dims = OrderedDict()
801 |     kernel_size_1 = img_size/32
802 |     kernel_size_2 = img_size/16
803 |     output_tensor_dims['016_convolutional'] = [18, kernel_size_1, kernel_size_1]
804 |     output_tensor_dims['023_convolutional'] = [18, kernel_size_2, kernel_size_2]
805 | 
806 |     # Create a GraphBuilderONNX object with the known output tensor dimensions:
807 |     builder = GraphBuilderONNX(output_tensor_dims)
808 | 
809 |     # We want to populate our network with weights later, that's why we download those from
810 |     # the official mirror (and verify the checksum):
811 |     weights_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.weights'
812 | 
813 |     # Now generate an ONNX graph with weights from the previously parsed layer configurations
814 |     # and the weights file:
815 |     yolov3_model_def = builder.build_onnx_graph(
816 |         layer_configs=layer_configs,
817 |         weights_file_path=weights_file_path,
818 |         verbose=True)
819 |     # Once we have the model definition, we do not need the builder anymore:
820 |     del builder
821 | 
822 |     # Perform a sanity check on the ONNX model definition:
823 |     onnx.checker.check_model(yolov3_model_def)
824 | 
825 |     # Serialize the generated ONNX graph to this file:
826 |     output_file_path = 'yolov3-tiny.onnx'
827 |     onnx.save(yolov3_model_def, output_file_path)
828 | 
829 | if __name__ == '__main__':
830 |     main()
831 | 


--------------------------------------------------------------------------------