├── imageslist.txt ├── images ├── dog.jpg ├── eagle.jpg └── horses.jpg ├── requirements.txt ├── coco_labels.txt ├── README.md ├── yolov3-tiny.cfg ├── common.py ├── onnx_to_tensorrt.py ├── data_processing.py └── yolov3_to_onnx.py /imageslist.txt: -------------------------------------------------------------------------------- 1 | dog 2 | eagle 3 | horses 4 | 5 | -------------------------------------------------------------------------------- /images/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zombie0117/yolov3-tiny-onnx-TensorRT/HEAD/images/dog.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.15.1 2 | onnx>=1.1.1 3 | pycuda>=2017.1.1 4 | Pillow>=5.2.0 5 | wget>=3.2 6 | -------------------------------------------------------------------------------- /images/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zombie0117/yolov3-tiny-onnx-TensorRT/HEAD/images/eagle.jpg -------------------------------------------------------------------------------- /images/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zombie0117/yolov3-tiny-onnx-TensorRT/HEAD/images/horses.jpg -------------------------------------------------------------------------------- /coco_labels.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolov3-tiny2onnx2trt 2 | Convert your yolov3-tiny model to trt model 3 | 4 | # device: nvidia jetson tx2 5 | 6 | 7 | # jetpack version:jetpack4.2: 8 | ubuntu18.04 9 | tensorrt5.0.6.3 10 | cuda10.0 11 | cudnn7.3.1 12 | 13 | 14 | # others: 15 | python=2.7 16 | numpy=1.16.1 17 | onnx=1.4.1 (important) 18 | pycuda=2019.1.1 19 | Pillow=6.1.0 20 | wget=3.2 21 | 22 | 23 | # custom settings 24 | 25 | data_processing.py: 26 | line14: LABEL_FILE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/coco_labels.txt' 27 | line19: CATEGORY_NUM = 80 28 | 29 | yolov3_to_onnx.py: 30 | line778: img_size = 416 31 | line784: cfg_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.cfg' 32 | line811: weights_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.weights' 33 | line826: output_file_path = 'yolov3-tiny.onnx' 34 | 35 | onnx_to_tensorrt.py: 36 | line39: input_size = 416 37 | line40: batch_size = 1 38 | line42~line46: 39 | onnx_file_path = 'yolov3-tiny.onnx' 40 | engine_file_path = 'yolov3-tiny.trt' 41 | input_file_list = '/home/nvidia/yolov3-tiny2onnx2trt/imagelist.txt' 42 | IMAGE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/images/' 43 | save_path = '/home/nvidia/yolov3-tiny2onnx2trt/' 44 | # notes (very important!): 45 | 0.The onnx version must be 1.4.1. If it is not, please run the commands: 46 | pip uninstall onnx 47 | pip install onnx==1.4.1 48 | 49 | 1.The cfg-file's last line must be a blank line. You should press Enter to add a blank line if there is no blank line at the end of the file. 50 | 51 | # steps: 52 | 0.Put your .weights file in the folder 53 | |-yolov3-tiny2onnx2trt 54 | |-yolov3-tiny.weights 55 | 56 | 1.Change your settings as "#custom settings" 57 | 58 | 2.Run commands: 59 | cd yolov3-tiny2onnx2trt 60 | python yolov3_to_onnx.py 61 | 62 | you will get a yolov3-tiny.onnx file 63 | 64 | 3.Run commands: 65 | python onnx_to_tensorrt.py: 66 | 67 | you will get a yolov3-tiny.trt file and some inferenced images. 68 | 69 | -------------------------------------------------------------------------------- /yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 1993-2018 NVIDIA Corporation. All rights reserved. 3 | # 4 | # NOTICE TO LICENSEE: 5 | # 6 | # This source code and/or documentation ("Licensed Deliverables") are 7 | # subject to NVIDIA intellectual property rights under U.S. and 8 | # international Copyright laws. 9 | # 10 | # These Licensed Deliverables contained herein is PROPRIETARY and 11 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | # conditions of a form of NVIDIA software license agreement by and 13 | # between NVIDIA and Licensee ("License Agreement") or electronically 14 | # accepted by Licensee. Notwithstanding any terms or conditions to 15 | # the contrary in the License Agreement, reproduction or disclosure 16 | # of the Licensed Deliverables to any third party without the express 17 | # written consent of NVIDIA is prohibited. 18 | # 19 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | # OF THESE LICENSED DELIVERABLES. 33 | # 34 | # U.S. Government End Users. These Licensed Deliverables are a 35 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | # 1995), consisting of "commercial computer software" and "commercial 37 | # computer software documentation" as such terms are used in 48 38 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | # U.S. Government End Users acquire the Licensed Deliverables with 42 | # only those rights set forth herein. 43 | # 44 | # Any use of the Licensed Deliverables in individual and commercial 45 | # software must include, in the user documentation and internal 46 | # comments to the code, the above Disclaimer and U.S. Government End 47 | # Users Notice. 48 | # 49 | 50 | import os 51 | import argparse 52 | import numpy as np 53 | import pycuda.driver as cuda 54 | import tensorrt as trt 55 | 56 | try: 57 | # Sometimes python2 does not understand FileNotFoundError 58 | FileNotFoundError 59 | except NameError: 60 | FileNotFoundError = IOError 61 | 62 | def GiB(val): 63 | return val * 1 << 30 64 | 65 | def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): 66 | ''' 67 | Parses sample arguments. 68 | Args: 69 | description (str): Description of the sample. 70 | subfolder (str): The subfolder containing data relevant to this sample 71 | find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. 72 | Returns: 73 | str: Path of data directory. 74 | Raises: 75 | FileNotFoundError 76 | ''' 77 | kDEFAULT_DATA_ROOT = os.path.abspath("/usr/src/tensorrt/data") 78 | 79 | # Standard command-line arguments for all samples. 80 | parser = argparse.ArgumentParser(description=description) 81 | parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory.") 82 | args, unknown_args = parser.parse_known_args() 83 | 84 | # If data directory is not specified, use the default. 85 | data_root = args.datadir if args.datadir else kDEFAULT_DATA_ROOT 86 | # If the subfolder exists, append it to the path, otherwise use the provided path as-is. 87 | subfolder_path = os.path.join(data_root, subfolder) 88 | if not os.path.exists(subfolder_path): 89 | print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") 90 | data_path = subfolder_path if os.path.exists(subfolder_path) else data_root 91 | 92 | # Make sure data directory exists. 93 | if not (os.path.exists(data_path)): 94 | raise FileNotFoundError(data_path + " does not exist. Please provide the correct data path with the -d option.") 95 | 96 | # Find all requested files. 97 | for index, f in enumerate(find_files): 98 | find_files[index] = os.path.abspath(os.path.join(data_path, f)) 99 | if not os.path.exists(find_files[index]): 100 | raise FileNotFoundError(find_files[index] + " does not exist. Please provide the correct data path with the -d option.") 101 | if find_files: 102 | return data_path, find_files 103 | else: 104 | return data_path 105 | 106 | # Simple helper data class that's a little nicer to use than a 2-tuple. 107 | class HostDeviceMem(object): 108 | def __init__(self, host_mem, device_mem): 109 | self.host = host_mem 110 | self.device = device_mem 111 | 112 | def __str__(self): 113 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 114 | 115 | def __repr__(self): 116 | return self.__str__() 117 | 118 | # Allocates all buffers required for an engine, i.e. host/device inputs/outputs. 119 | def allocate_buffers(engine): 120 | inputs = [] 121 | outputs = [] 122 | bindings = [] 123 | stream = cuda.Stream() 124 | for binding in engine: 125 | size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size 126 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 127 | # Allocate host and device buffers 128 | host_mem = cuda.pagelocked_empty(size, dtype) 129 | device_mem = cuda.mem_alloc(host_mem.nbytes) 130 | # Append the device buffer to device bindings. 131 | bindings.append(int(device_mem)) 132 | # Append to the appropriate list. 133 | if engine.binding_is_input(binding): 134 | inputs.append(HostDeviceMem(host_mem, device_mem)) 135 | else: 136 | outputs.append(HostDeviceMem(host_mem, device_mem)) 137 | return inputs, outputs, bindings, stream 138 | 139 | # This function is generalized for multiple inputs/outputs. 140 | # inputs and outputs are expected to be lists of HostDeviceMem objects. 141 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): 142 | # Transfer input data to the GPU. 143 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] 144 | # Run inference. 145 | context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle) 146 | # Transfer predictions back from the GPU. 147 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] 148 | # Synchronize the stream 149 | stream.synchronize() 150 | # Return only the host outputs. 151 | return [out.host for out in outputs] 152 | -------------------------------------------------------------------------------- /onnx_to_tensorrt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # 3 | # Copyright 1993-2018 NVIDIA Corporation. All rights reserved. 4 | # 5 | # NOTICE TO LICENSEE: 6 | # 7 | # This source code and/or documentation ("Licensed Deliverables") are 8 | # subject to NVIDIA intellectual property rights under U.S. and 9 | # international Copyright laws. 10 | # 11 | # These Licensed Deliverables contained herein is PROPRIETARY and 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and 13 | # conditions of a form of NVIDIA software license agreement by and 14 | # between NVIDIA and Licensee ("License Agreement") or electronically 15 | # accepted by Licensee. Notwithstanding any terms or conditions to 16 | # the contrary in the License Agreement, reproduction or disclosure 17 | # of the Licensed Deliverables to any third party without the express 18 | # written consent of NVIDIA is prohibited. 19 | # 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 33 | # OF THESE LICENSED DELIVERABLES. 34 | # 35 | # U.S. Government End Users. These Licensed Deliverables are a 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 37 | # 1995), consisting of "commercial computer software" and "commercial 38 | # computer software documentation" as such terms are used in 48 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 40 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 42 | # U.S. Government End Users acquire the Licensed Deliverables with 43 | # only those rights set forth herein. 44 | # 45 | # Any use of the Licensed Deliverables in individual and commercial 46 | # software must include, in the user documentation and internal 47 | # comments to the code, the above Disclaimer and U.S. Government End 48 | # Users Notice. 49 | # 50 | 51 | from __future__ import print_function 52 | 53 | import glob 54 | import time 55 | import numpy as np 56 | import tensorrt as trt 57 | import pycuda.driver as cuda 58 | import pycuda.autoinit 59 | from PIL import ImageDraw 60 | from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES 61 | 62 | import sys, os 63 | import common 64 | 65 | TRT_LOGGER = trt.Logger() 66 | 67 | 68 | def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'): 69 | """Draw the bounding boxes on the original input image and return it. 70 | 71 | Keyword arguments: 72 | image_raw -- a raw PIL Image 73 | bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4). 74 | categories -- NumPy array containing the corresponding category for each object, 75 | with shape (N,) 76 | confidences -- NumPy array containing the corresponding confidence for each object, 77 | with shape (N,) 78 | all_categories -- a list of all categories in the correct ordered (required for looking up 79 | the category name) 80 | bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue') 81 | """ 82 | draw = ImageDraw.Draw(image_raw) 83 | print(bboxes, confidences, categories) 84 | for box, score, category in zip(bboxes, confidences, categories): 85 | x_coord, y_coord, width, height = box 86 | left = max(0, np.floor(x_coord + 0.5).astype(int)) 87 | top = max(0, np.floor(y_coord + 0.5).astype(int)) 88 | right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int)) 89 | bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int)) 90 | 91 | draw.rectangle(((left, top), (right, bottom)), outline=bbox_color) 92 | draw.text((left, top - 12), '{0} {1:.2f}'.format(all_categories[category], score), fill=bbox_color) 93 | 94 | return image_raw 95 | 96 | def get_engine(onnx_file_path, max_batch_size, fp16_on, engine_file_path=""): 97 | """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" 98 | def build_engine(): 99 | """Takes an ONNX file and creates a TensorRT engine to run inference with""" 100 | with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: 101 | builder.max_workspace_size = 1 << 30 # 1GB 102 | builder.max_batch_size = max_batch_size 103 | builder.fp16_mode = fp16_on 104 | # Parse model file 105 | if not os.path.exists(onnx_file_path): 106 | print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) 107 | exit(0) 108 | print('Loading ONNX file from path {}...'.format(onnx_file_path)) 109 | with open(onnx_file_path, 'rb') as model: 110 | print('Beginning ONNX file parsing') 111 | parser.parse(model.read()) 112 | print('Completed parsing of ONNX file') 113 | print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) 114 | engine = builder.build_cuda_engine(network) 115 | print("Completed creating Engine") 116 | with open(engine_file_path, "wb") as f: 117 | f.write(engine.serialize()) 118 | return engine 119 | 120 | if os.path.exists(engine_file_path): 121 | # If a serialized engine exists, use it instead of building an engine. 122 | print("Reading engine from file {}".format(engine_file_path)) 123 | with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: 124 | return runtime.deserialize_cuda_engine(f.read()) 125 | else: 126 | return build_engine() 127 | def download_file(path, link, checksum_reference=None): 128 | if not os.path.exists(path): 129 | print('downloading') 130 | wget.download(link, path) 131 | print() 132 | if checksum_reference is not None: 133 | raise ValueError('error') 134 | return path 135 | def main(): 136 | """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.""" 137 | 138 | # Try to load a previously generated YOLOv3-608 network graph in ONNX format: 139 | input_size = 416 140 | batch_size = 1 141 | fp16_on = True 142 | onnx_file_path = 'yolov3-tiny.onnx' 143 | engine_file_path = 'yolov3-tiny.trt' 144 | input_file_list = '/home/nvidia/yolov3-tiny2onnx2trt/imagelist.txt' 145 | IMAGE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/images/' 146 | save_path = '/home/nvidia/yolov3-tiny2onnx2trt/' 147 | 148 | output_shapes_416 = [(batch_size, 18, 13, 13), (batch_size, 18, 26, 26)] 149 | output_shapes_480 = [(batch_size, 18, 15, 15), (batch_size, 18, 30, 30)] 150 | output_shapes_544 = [(batch_size, 18, 17, 17), (batch_size, 18, 34, 34)] 151 | output_shapes_608 = [(batch_size, 18, 19, 19), (batch_size, 18, 38, 38)] 152 | output_shapes_dic = {'416': output_shapes_416, '480': output_shapes_480, '544': output_shapes_544, '608': output_shapes_608} 153 | 154 | with open(input_file_list, 'r') as f: 155 | filenames = [] 156 | for line in f.readlines(): 157 | filenames.append(line.strip()) 158 | 159 | filenames = glob.glob(os.path.join(IMAGE_PATH, '*.jpg')) 160 | 161 | nums = len(filenames) 162 | # print(filenames) 163 | 164 | input_resolution_yolov3_HW = (input_size, input_size) 165 | 166 | preprocessor = PreprocessYOLO(input_resolution_yolov3_HW) 167 | 168 | output_shapes = output_shapes_dic[str(input_size)] 169 | 170 | postprocessor_args = {"yolo_masks": [(3, 4, 5), (0, 1, 2)], 171 | "yolo_anchors": [(10,14), (23,27), (37,58), (81,82), (135,169), (344,319)], 172 | "obj_threshold": 0.5, 173 | "nms_threshold": 0.35, 174 | "yolo_input_resolution": input_resolution_yolov3_HW} 175 | 176 | postprocessor = PostprocessYOLO(**postprocessor_args) 177 | 178 | # Do inference with TensorRT 179 | filenames_batch = [] 180 | images = [] 181 | images_raw = [] 182 | trt_outputs = [] 183 | index = 0 184 | with get_engine(onnx_file_path, batch_size, fp16_on, engine_file_path) as engine, engine.create_execution_context() as context: 185 | # inputs, outputs, bindings, stream = common.allocate_buffers(engine) 186 | # Do inference 187 | for filename in filenames: 188 | filenames_batch.append(filename) 189 | image_raw, image = preprocessor.process(filename) 190 | images_raw.append(image_raw) 191 | images.append(image) 192 | index += 1 193 | if index != nums and len(images_raw) != batch_size: 194 | continue 195 | inputs, outputs, bindings, stream = common.allocate_buffers(engine) 196 | images_batch = np.concatenate(images, axis=0) 197 | inputs[0].host = images_batch 198 | t1 = time.time() 199 | trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size) 200 | t2 = time.time() 201 | t_inf = t2 - t1 202 | print("time spent:",t_inf) 203 | print(len(trt_outputs)) 204 | trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] 205 | 206 | print('test') 207 | for i in range(len(filenames_batch)): 208 | fname = filenames_batch[i].split('/') 209 | fname = fname[-1].split('.')[0] 210 | img_raw = images_raw[i] 211 | shape_orig_WH = img_raw.size 212 | boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH), i) 213 | print("boxes size:",len(boxes)) 214 | # Draw the bounding boxes onto the original input image and save it as a PNG file 215 | obj_detected_img = draw_bboxes(img_raw, boxes, scores, classes, ALL_CATEGORIES) 216 | output_image_path = save_path + fname + '_' + str(input_size) + '_bboxes.png' 217 | obj_detected_img.save(output_image_path, 'PNG') 218 | print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path)) 219 | filenames_batch = [] 220 | images_batch = [] 221 | images = [] 222 | images_raw = [] 223 | trt_outputs = [] 224 | 225 | if __name__ == '__main__': 226 | main() 227 | -------------------------------------------------------------------------------- /data_processing.py: -------------------------------------------------------------------------------- 1 | import math 2 | from PIL import Image 3 | import numpy as np 4 | 5 | 6 | # YOLOv3-608 has been trained with these 80 categories from COCO: 7 | # Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context." 8 | # European Conference on Computer Vision. Springer, Cham, 2014. 9 | 10 | def load_label_categories(label_file_path): 11 | categories = [line.rstrip('\n') for line in open(label_file_path)] 12 | return categories 13 | 14 | LABEL_FILE_PATH = '/home/nvidia/yolov3-tiny2onnx2trt/coco_labels.txt' 15 | ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH) 16 | 17 | # Let's make sure that there are 80 classes, as expected for the COCO data set: 18 | CATEGORY_NUM = len(ALL_CATEGORIES) 19 | assert CATEGORY_NUM == 80 20 | 21 | 22 | class PreprocessYOLO(object): 23 | """A simple class for loading images with PIL and reshaping them to the specified 24 | input resolution for YOLOv3-608. 25 | """ 26 | 27 | def __init__(self, yolo_input_resolution): 28 | """Initialize with the input resolution for YOLOv3, which will stay fixed in this sample. 29 | 30 | Keyword arguments: 31 | yolo_input_resolution -- two-dimensional tuple with the target network's (spatial) 32 | input resolution in HW order 33 | """ 34 | self.yolo_input_resolution = yolo_input_resolution 35 | 36 | def process(self, input_image_path): 37 | """Load an image from the specified input path, 38 | and return it together with a pre-processed version required for feeding it into a 39 | YOLOv3 network. 40 | 41 | Keyword arguments: 42 | input_image_path -- string path of the image to be loaded 43 | """ 44 | image_raw, image_resized = self._load_and_resize(input_image_path) 45 | image_preprocessed = self._shuffle_and_normalize(image_resized) 46 | return image_raw, image_preprocessed 47 | 48 | def _load_and_resize(self, input_image_path): 49 | """Load an image from the specified path and resize it to the input resolution. 50 | Return the input image before resizing as a PIL Image (required for visualization), 51 | and the resized image as a NumPy float array. 52 | 53 | Keyword arguments: 54 | input_image_path -- string path of the image to be loaded 55 | """ 56 | 57 | image_raw = Image.open(input_image_path) 58 | # Expecting yolo_input_resolution in (height, width) format, adjusting to PIL 59 | # convention (width, height) in PIL: 60 | new_resolution = ( 61 | self.yolo_input_resolution[1], 62 | self.yolo_input_resolution[0]) 63 | print(new_resolution) 64 | image_resized = image_raw.resize( 65 | new_resolution, resample=Image.BICUBIC) 66 | image_resized = np.array(image_resized, dtype=np.float32, order='C') 67 | return image_raw, image_resized 68 | 69 | def _shuffle_and_normalize(self, image): 70 | """Normalize a NumPy array representing an image to the range [0, 1], and 71 | convert it from HWC format ("channels last") to NCHW format ("channels first" 72 | with leading batch dimension). 73 | 74 | Keyword arguments: 75 | image -- image as three-dimensional NumPy float array, in HWC format 76 | """ 77 | image /= 255.0 78 | # HWC to CHW format: 79 | image = np.transpose(image, [2, 0, 1]) 80 | # CHW to NCHW format 81 | image = np.expand_dims(image, axis=0) 82 | # Convert the image to row-major order, also known as "C order": 83 | image = np.array(image, dtype=np.float32, order='C') 84 | return image 85 | 86 | 87 | class PostprocessYOLO(object): 88 | """Class for post-processing the three outputs tensors from YOLOv3-608.""" 89 | 90 | def __init__(self, 91 | yolo_masks, 92 | yolo_anchors, 93 | obj_threshold, 94 | nms_threshold, 95 | yolo_input_resolution): 96 | """Initialize with all values that will be kept when processing several frames. 97 | Assuming 3 outputs of the network in the case of (large) YOLOv3. 98 | 99 | Keyword arguments: 100 | yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks 101 | yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors 102 | object_threshold -- threshold for object coverage, float value between 0 and 1 103 | nms_threshold -- threshold for non-max suppression algorithm, 104 | float value between 0 and 1 105 | input_resolution_yolo -- two-dimensional tuple with the target network's (spatial) 106 | input resolution in HW order 107 | """ 108 | self.masks = yolo_masks 109 | self.anchors = yolo_anchors 110 | self.object_threshold = obj_threshold 111 | self.nms_threshold = nms_threshold 112 | self.input_resolution_yolo = yolo_input_resolution 113 | 114 | def process(self, outputs, resolution_raw, i): 115 | """Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them 116 | and return a list of bounding boxes for detected object together with their category 117 | and their confidences in separate lists. 118 | 119 | Keyword arguments: 120 | outputs -- outputs from a TensorRT engine in NCHW format 121 | resolution_raw -- the original spatial resolution from the input PIL image in WH order 122 | """ 123 | outputs_reshaped = list() 124 | for output in outputs: 125 | outputs_reshaped.append(self._reshape_output(output, i)) 126 | 127 | boxes, categories, confidences = self._process_yolo_output( 128 | outputs_reshaped, resolution_raw) 129 | 130 | return boxes, categories, confidences 131 | 132 | def _reshape_output(self, output, i): 133 | """Reshape a TensorRT output from NCHW to NHWC format (with expected C=255), 134 | and then return it in (height,width,3,85) dimensionality after further reshaping. 135 | 136 | Keyword argument: 137 | output -- an output from a TensorRT engine after inference 138 | """ 139 | output = np.transpose(output, [0, 2, 3, 1]) 140 | _, height, width, _ = output.shape 141 | dim1, dim2 = height, width 142 | dim3 = 3 143 | # There are CATEGORY_NUM=80 object categories: 144 | dim4 = (4 + 1 + CATEGORY_NUM) 145 | return np.reshape(output[i], (dim1, dim2, dim3, dim4)) 146 | 147 | def _process_yolo_output(self, outputs_reshaped, resolution_raw): 148 | """Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return 149 | return a list of bounding boxes for detected object together with their category and their 150 | confidences in separate lists. 151 | 152 | Keyword arguments: 153 | outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays 154 | with shape (height,width,3,85) 155 | resolution_raw -- the original spatial resolution from the input PIL image in WH order 156 | """ 157 | 158 | # E.g. in YOLOv3-608, there are three output tensors, which we associate with their 159 | # respective masks. Then we iterate through all output-mask pairs and generate candidates 160 | # for bounding boxes, their corresponding category predictions and their confidences: 161 | boxes, categories, confidences = list(), list(), list() 162 | for output, mask in zip(outputs_reshaped, self.masks): 163 | box, category, confidence = self._process_feats(output, mask) 164 | box, category, confidence = self._filter_boxes(box, category, confidence) 165 | boxes.append(box) 166 | categories.append(category) 167 | confidences.append(confidence) 168 | 169 | boxes = np.concatenate(boxes) 170 | categories = np.concatenate(categories) 171 | confidences = np.concatenate(confidences) 172 | 173 | # Scale boxes back to original image shape: 174 | width, height = resolution_raw 175 | image_dims = [width, height, width, height] 176 | boxes = boxes * image_dims 177 | 178 | # Using the candidates from the previous (loop) step, we apply the non-max suppression 179 | # algorithm that clusters adjacent bounding boxes to a single bounding box: 180 | nms_boxes, nms_categories, nscores = list(), list(), list() 181 | for category in set(categories): 182 | idxs = np.where(categories == category) 183 | box = boxes[idxs] 184 | category = categories[idxs] 185 | confidence = confidences[idxs] 186 | 187 | keep = self._nms_boxes(box, confidence) 188 | 189 | nms_boxes.append(box[keep]) 190 | nms_categories.append(category[keep]) 191 | nscores.append(confidence[keep]) 192 | 193 | if not nms_categories and not nscores: 194 | return None, None, None 195 | 196 | boxes = np.concatenate(nms_boxes) 197 | categories = np.concatenate(nms_categories) 198 | confidences = np.concatenate(nscores) 199 | 200 | return boxes, categories, confidences 201 | 202 | def _process_feats(self, output_reshaped, mask): 203 | """Take in a reshaped YOLO output in height,width,3,85 format together with its 204 | corresponding YOLO mask and return the detected bounding boxes, the confidence, 205 | and the class probability in each cell/pixel. 206 | 207 | Keyword arguments: 208 | output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85) 209 | mask -- 2-dimensional tuple with mask specification for this output 210 | """ 211 | 212 | # Two in-line functions required for calculating the bounding box 213 | # descriptors: 214 | def sigmoid(value): 215 | """Return the sigmoid of the input.""" 216 | return 1.0 / (1.0 + math.exp(-value)) 217 | 218 | def exponential(value): 219 | """Return the exponential of the input.""" 220 | return math.exp(value) 221 | 222 | # Vectorized calculation of above two functions: 223 | sigmoid_v = np.vectorize(sigmoid) 224 | exponential_v = np.vectorize(exponential) 225 | 226 | grid_h, grid_w, _, _ = output_reshaped.shape 227 | 228 | anchors = [self.anchors[i] for i in mask] 229 | 230 | # Reshape to N, height, width, num_anchors, box_params: 231 | anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2]) 232 | box_xy = sigmoid_v(output_reshaped[..., :2]) 233 | box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor 234 | box_confidence = sigmoid_v(output_reshaped[..., 4]) 235 | 236 | box_confidence = np.expand_dims(box_confidence, axis=-1) 237 | box_class_probs = sigmoid_v(output_reshaped[..., 5:]) 238 | 239 | col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w) 240 | row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h) 241 | 242 | col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) 243 | row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) 244 | grid = np.concatenate((col, row), axis=-1) 245 | 246 | box_xy += grid 247 | box_xy /= (grid_w, grid_h) 248 | box_wh /= self.input_resolution_yolo 249 | box_xy -= (box_wh / 2.) 250 | boxes = np.concatenate((box_xy, box_wh), axis=-1) 251 | 252 | # boxes: centroids, box_confidence: confidence level, box_class_probs: 253 | # class confidence 254 | return boxes, box_confidence, box_class_probs 255 | 256 | def _filter_boxes(self, boxes, box_confidences, box_class_probs): 257 | """Take in the unfiltered bounding box descriptors and discard each cell 258 | whose score is lower than the object threshold set during class initialization. 259 | 260 | Keyword arguments: 261 | boxes -- bounding box coordinates with shape (height,width,3,4); 4 for 262 | x,y,height,width coordinates of the boxes 263 | box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as 264 | confidence scalar per element 265 | box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM) 266 | 267 | """ 268 | box_scores = box_confidences * box_class_probs 269 | box_classes = np.argmax(box_scores, axis=-1) 270 | box_class_scores = np.max(box_scores, axis=-1) 271 | pos = np.where(box_class_scores >= self.object_threshold) 272 | 273 | boxes = boxes[pos] 274 | classes = box_classes[pos] 275 | scores = box_class_scores[pos] 276 | 277 | return boxes, classes, scores 278 | 279 | def _nms_boxes(self, boxes, box_confidences): 280 | """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their 281 | confidence scores and return an array with the indexes of the bounding boxes we want to 282 | keep (and display later). 283 | 284 | Keyword arguments: 285 | boxes -- a NumPy array containing N bounding-box coordinates that survived filtering, 286 | with shape (N,4); 4 for x,y,height,width coordinates of the boxes 287 | box_confidences -- a Numpy array containing the corresponding confidences with shape N 288 | """ 289 | x_coord = boxes[:, 0] 290 | y_coord = boxes[:, 1] 291 | width = boxes[:, 2] 292 | height = boxes[:, 3] 293 | 294 | areas = width * height 295 | ordered = box_confidences.argsort()[::-1] 296 | 297 | keep = list() 298 | while ordered.size > 0: 299 | # Index of the current element: 300 | i = ordered[0] 301 | keep.append(i) 302 | xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]]) 303 | yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]]) 304 | xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]]) 305 | yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]]) 306 | 307 | width1 = np.maximum(0.0, xx2 - xx1 + 1) 308 | height1 = np.maximum(0.0, yy2 - yy1 + 1) 309 | intersection = width1 * height1 310 | union = (areas[i] + areas[ordered[1:]] - intersection) 311 | 312 | # Compute the Intersection over Union (IoU) score: 313 | iou = intersection / union 314 | 315 | # The goal of the NMS algorithm is to reduce the number of adjacent bounding-box 316 | # candidates to a minimum. In this step, we keep only those elements whose overlap 317 | # with the current bounding box is lower than the threshold: 318 | indexes = np.where(iou <= self.nms_threshold)[0] 319 | ordered = ordered[indexes + 1] 320 | 321 | keep = np.array(keep) 322 | return keep 323 | -------------------------------------------------------------------------------- /yolov3_to_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # 3 | # Copyright 1993-2019 NVIDIA Corporation. All rights reserved. 4 | # 5 | # NOTICE TO LICENSEE: 6 | # 7 | # This source code and/or documentation ("Licensed Deliverables") are 8 | # subject to NVIDIA intellectual property rights under U.S. and 9 | # international Copyright laws. 10 | # 11 | # These Licensed Deliverables contained herein is PROPRIETARY and 12 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and 13 | # conditions of a form of NVIDIA software license agreement by and 14 | # between NVIDIA and Licensee ("License Agreement") or electronically 15 | # accepted by Licensee. Notwithstanding any terms or conditions to 16 | # the contrary in the License Agreement, reproduction or disclosure 17 | # of the Licensed Deliverables to any third party without the express 18 | # written consent of NVIDIA is prohibited. 19 | # 20 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 21 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 22 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 23 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 24 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 25 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 26 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 27 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 28 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 29 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 30 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 31 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 32 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 33 | # OF THESE LICENSED DELIVERABLES. 34 | # 35 | # U.S. Government End Users. These Licensed Deliverables are a 36 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 37 | # 1995), consisting of "commercial computer software" and "commercial 38 | # computer software documentation" as such terms are used in 48 39 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 40 | # only as a commercial end item. Consistent with 48 C.F.R.12.212 and 41 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 42 | # U.S. Government End Users acquire the Licensed Deliverables with 43 | # only those rights set forth herein. 44 | # 45 | # Any use of the Licensed Deliverables in individual and commercial 46 | # software must include, in the user documentation and internal 47 | # comments to the code, the above Disclaimer and U.S. Government End 48 | # Users Notice. 49 | # 50 | 51 | from __future__ import print_function 52 | from collections import OrderedDict 53 | import hashlib 54 | import os.path 55 | 56 | import wget 57 | 58 | import onnx 59 | from onnx import helper 60 | from onnx import TensorProto 61 | import numpy as np 62 | 63 | import sys 64 | 65 | class DarkNetParser(object): 66 | """Definition of a parser for DarkNet-based YOLOv3-608 (only tested for this topology).""" 67 | 68 | def __init__(self, supported_layers): 69 | """Initializes a DarkNetParser object. 70 | 71 | Keyword argument: 72 | supported_layers -- a string list of supported layers in DarkNet naming convention, 73 | parameters are only added to the class dictionary if a parsed layer is included. 74 | """ 75 | 76 | # A list of YOLOv3 layers containing dictionaries with all layer 77 | # parameters: 78 | self.layer_configs = OrderedDict() 79 | self.supported_layers = supported_layers 80 | self.layer_counter = 0 81 | 82 | def parse_cfg_file(self, cfg_file_path): 83 | """Takes the yolov3.cfg file and parses it layer by layer, 84 | appending each layer's parameters as a dictionary to layer_configs. 85 | 86 | Keyword argument: 87 | cfg_file_path -- path to the yolov3.cfg file as string 88 | """ 89 | with open(cfg_file_path, 'rb') as cfg_file: 90 | remainder = cfg_file.read() 91 | while remainder is not None: 92 | layer_dict, layer_name, remainder = self._next_layer(remainder) 93 | if layer_dict is not None: 94 | self.layer_configs[layer_name] = layer_dict 95 | return self.layer_configs 96 | 97 | def _next_layer(self, remainder): 98 | """Takes in a string and segments it by looking for DarkNet delimiters. 99 | Returns the layer parameters and the remaining string after the last delimiter. 100 | Example for the first Conv layer in yolo.cfg ... 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=32 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | ... becomes the following layer_dict return value: 111 | {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32, 112 | 'batch_normalize': 1, 'type': 'convolutional', 'size': 3}. 113 | 114 | '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg 115 | are returned as the next remainder. 116 | 117 | Keyword argument: 118 | remainder -- a string with all raw text after the previously parsed layer 119 | """ 120 | remainder = remainder.split('[', 1) 121 | if len(remainder) == 2: 122 | remainder = remainder[1] 123 | else: 124 | return None, None, None 125 | remainder = remainder.split(']', 1) 126 | if len(remainder) == 2: 127 | layer_type, remainder = remainder 128 | else: 129 | return None, None, None 130 | if remainder.replace(' ', '')[0] == '#': 131 | remainder = remainder.split('\n', 1)[1] 132 | 133 | layer_param_block, remainder = remainder.split('\n\n', 1) 134 | layer_param_lines = layer_param_block.split('\n')[1:] 135 | layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type 136 | layer_dict = dict(type=layer_type) 137 | if layer_type in self.supported_layers: 138 | for param_line in layer_param_lines: 139 | if param_line[0] == '#': 140 | continue 141 | param_type, param_value = self._parse_params(param_line) 142 | layer_dict[param_type] = param_value 143 | self.layer_counter += 1 144 | return layer_dict, layer_name, remainder 145 | 146 | def _parse_params(self, param_line): 147 | """Identifies the parameters contained in one of the cfg file and returns 148 | them in the required format for each parameter type, e.g. as a list, an int or a float. 149 | 150 | Keyword argument: 151 | param_line -- one parsed line within a layer block 152 | """ 153 | param_line = param_line.replace(' ', '') 154 | param_type, param_value_raw = param_line.split('=') 155 | param_value = None 156 | if param_type == 'layers': 157 | layer_indexes = list() 158 | for index in param_value_raw.split(','): 159 | layer_indexes.append(int(index)) 160 | param_value = layer_indexes 161 | elif isinstance(param_value_raw, str) and not param_value_raw.isalpha(): 162 | condition_param_value_positive = param_value_raw.isdigit() 163 | condition_param_value_negative = param_value_raw[0] == '-' and \ 164 | param_value_raw[1:].isdigit() 165 | if condition_param_value_positive or condition_param_value_negative: 166 | param_value = int(param_value_raw) 167 | else: 168 | param_value = float(param_value_raw) 169 | else: 170 | param_value = str(param_value_raw) 171 | return param_type, param_value 172 | 173 | 174 | class MajorNodeSpecs(object): 175 | """Helper class used to store the names of ONNX output names, 176 | corresponding to the output of a DarkNet layer and its output channels. 177 | Some DarkNet layers are not created and there is no corresponding ONNX node, 178 | but we still need to track them in order to set up skip connections. 179 | """ 180 | 181 | def __init__(self, name, channels): 182 | """ Initialize a MajorNodeSpecs object. 183 | 184 | Keyword arguments: 185 | name -- name of the ONNX node 186 | channels -- number of output channels of this node 187 | """ 188 | self.name = name 189 | self.channels = channels 190 | self.created_onnx_node = False 191 | if name is not None and isinstance(channels, int) and channels > 0: 192 | self.created_onnx_node = True 193 | 194 | 195 | class ConvParams(object): 196 | """Helper class to store the hyper parameters of a Conv layer, 197 | including its prefix name in the ONNX graph and the expected dimensions 198 | of weights for convolution, bias, and batch normalization. 199 | 200 | Additionally acts as a wrapper for generating safe names for all 201 | weights, checking on feasible combinations. 202 | """ 203 | 204 | def __init__(self, node_name, batch_normalize, conv_weight_dims): 205 | """Constructor based on the base node name (e.g. 101_convolutional), the batch 206 | normalization setting, and the convolutional weights shape. 207 | 208 | Keyword arguments: 209 | node_name -- base name of this YOLO convolutional layer 210 | batch_normalize -- bool value if batch normalization is used 211 | conv_weight_dims -- the dimensions of this layer's convolutional weights 212 | """ 213 | self.node_name = node_name 214 | self.batch_normalize = batch_normalize 215 | assert len(conv_weight_dims) == 4 216 | self.conv_weight_dims = conv_weight_dims 217 | 218 | def generate_param_name(self, param_category, suffix): 219 | """Generates a name based on two string inputs, 220 | and checks if the combination is valid.""" 221 | assert suffix 222 | assert param_category in ['bn', 'conv'] 223 | assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias']) 224 | if param_category == 'bn': 225 | assert self.batch_normalize 226 | assert suffix in ['scale', 'bias', 'mean', 'var'] 227 | elif param_category == 'conv': 228 | assert suffix in ['weights', 'bias'] 229 | if suffix == 'bias': 230 | assert not self.batch_normalize 231 | param_name = self.node_name + '_' + param_category + '_' + suffix 232 | return param_name 233 | 234 | class UpsampleParams(object): 235 | #Helper class to store the scale parameter for an Upsample node. 236 | 237 | def __init__(self, node_name, value): 238 | """Constructor based on the base node name (e.g. 86_Upsample), 239 | and the value of the scale input tensor. 240 | 241 | Keyword arguments: 242 | node_name -- base name of this YOLO Upsample layer 243 | value -- the value of the scale input to the Upsample layer as a numpy array 244 | """ 245 | self.node_name = node_name 246 | self.value = value 247 | 248 | def generate_param_name(self): 249 | """Generates the scale parameter name for the Upsample node.""" 250 | param_name = self.node_name + '_' + "scale" 251 | return param_name 252 | 253 | class WeightLoader(object): 254 | """Helper class used for loading the serialized weights of a binary file stream 255 | and returning the initializers and the input tensors required for populating 256 | the ONNX graph with weights. 257 | """ 258 | 259 | def __init__(self, weights_file_path): 260 | """Initialized with a path to the YOLOv3 .weights file. 261 | 262 | Keyword argument: 263 | weights_file_path -- path to the weights file. 264 | """ 265 | self.weights_file = self._open_weights_file(weights_file_path) 266 | 267 | def load_upsample_scales(self, upsample_params): 268 | """Returns the initializers with the value of the scale input 269 | tensor given by upsample_params. 270 | 271 | Keyword argument: 272 | upsample_params -- a UpsampleParams object 273 | """ 274 | initializer = list() 275 | inputs = list() 276 | name = upsample_params.generate_param_name() 277 | shape = upsample_params.value.shape 278 | data = upsample_params.value 279 | scale_init = helper.make_tensor( 280 | name, TensorProto.FLOAT, shape, data) 281 | scale_input = helper.make_tensor_value_info( 282 | name, TensorProto.FLOAT, shape) 283 | initializer.append(scale_init) 284 | inputs.append(scale_input) 285 | return initializer, inputs 286 | 287 | 288 | def load_conv_weights(self, conv_params): 289 | """Returns the initializers with weights from the weights file and 290 | the input tensors of a convolutional layer for all corresponding ONNX nodes. 291 | 292 | Keyword argument: 293 | conv_params -- a ConvParams object 294 | """ 295 | initializer = list() 296 | inputs = list() 297 | if conv_params.batch_normalize: 298 | bias_init, bias_input = self._create_param_tensors( 299 | conv_params, 'bn', 'bias') 300 | bn_scale_init, bn_scale_input = self._create_param_tensors( 301 | conv_params, 'bn', 'scale') 302 | bn_mean_init, bn_mean_input = self._create_param_tensors( 303 | conv_params, 'bn', 'mean') 304 | bn_var_init, bn_var_input = self._create_param_tensors( 305 | conv_params, 'bn', 'var') 306 | initializer.extend( 307 | [bn_scale_init, bias_init, bn_mean_init, bn_var_init]) 308 | inputs.extend([bn_scale_input, bias_input, 309 | bn_mean_input, bn_var_input]) 310 | else: 311 | bias_init, bias_input = self._create_param_tensors( 312 | conv_params, 'conv', 'bias') 313 | initializer.append(bias_init) 314 | inputs.append(bias_input) 315 | conv_init, conv_input = self._create_param_tensors( 316 | conv_params, 'conv', 'weights') 317 | initializer.append(conv_init) 318 | inputs.append(conv_input) 319 | return initializer, inputs 320 | 321 | def _open_weights_file(self, weights_file_path): 322 | """Opens a YOLOv3 DarkNet file stream and skips the header. 323 | 324 | Keyword argument: 325 | weights_file_path -- path to the weights file. 326 | """ 327 | weights_file = open(weights_file_path, 'rb') 328 | length_header = 5 329 | np.ndarray( 330 | shape=(length_header, ), dtype='int32', buffer=weights_file.read( 331 | length_header * 4)) 332 | return weights_file 333 | 334 | def _create_param_tensors(self, conv_params, param_category, suffix): 335 | """Creates the initializers with weights from the weights file together with 336 | the input tensors. 337 | 338 | Keyword arguments: 339 | conv_params -- a ConvParams object 340 | param_category -- the category of parameters to be created ('bn' or 'conv') 341 | suffix -- a string determining the sub-type of above param_category (e.g., 342 | 'weights' or 'bias') 343 | """ 344 | param_name, param_data, param_data_shape = self._load_one_param_type( 345 | conv_params, param_category, suffix) 346 | 347 | initializer_tensor = helper.make_tensor( 348 | param_name, TensorProto.FLOAT, param_data_shape, param_data) 349 | input_tensor = helper.make_tensor_value_info( 350 | param_name, TensorProto.FLOAT, param_data_shape) 351 | return initializer_tensor, input_tensor 352 | 353 | def _load_one_param_type(self, conv_params, param_category, suffix): 354 | """Deserializes the weights from a file stream in the DarkNet order. 355 | 356 | Keyword arguments: 357 | conv_params -- a ConvParams object 358 | param_category -- the category of parameters to be created ('bn' or 'conv') 359 | suffix -- a string determining the sub-type of above param_category (e.g., 360 | 'weights' or 'bias') 361 | """ 362 | param_name = conv_params.generate_param_name(param_category, suffix) 363 | channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims 364 | if param_category == 'bn': 365 | param_shape = [channels_out] 366 | elif param_category == 'conv': 367 | if suffix == 'weights': 368 | param_shape = [channels_out, channels_in, filter_h, filter_w] 369 | elif suffix == 'bias': 370 | param_shape = [channels_out] 371 | param_size = np.product(np.array(param_shape)) 372 | param_data = np.ndarray( 373 | shape=param_shape, 374 | dtype='float32', 375 | buffer=self.weights_file.read(param_size * 4)) 376 | param_data = param_data.flatten().astype(float) 377 | return param_name, param_data, param_shape 378 | 379 | 380 | class GraphBuilderONNX(object): 381 | """Class for creating an ONNX graph from a previously generated list of layer dictionaries.""" 382 | 383 | def __init__(self, output_tensors): 384 | """Initialize with all DarkNet default parameters used creating YOLOv3, 385 | and specify the output tensors as an OrderedDict for their output dimensions 386 | with their names as keys. 387 | 388 | Keyword argument: 389 | output_tensors -- the output tensors as an OrderedDict containing the keys' 390 | output dimensions 391 | """ 392 | self.output_tensors = output_tensors 393 | self._nodes = list() 394 | self.graph_def = None 395 | self.input_tensor = None 396 | self.epsilon_bn = 1e-5 397 | self.momentum_bn = 0.99 398 | self.alpha_lrelu = 0.1 399 | self.param_dict = OrderedDict() 400 | self.major_node_specs = list() 401 | self.batch_size = 1 402 | 403 | def build_onnx_graph( 404 | self, 405 | layer_configs, 406 | weights_file_path, 407 | verbose=True): 408 | """Iterate over all layer configs (parsed from the DarkNet representation 409 | of YOLOv3-608), create an ONNX graph, populate it with weights from the weights 410 | file and return the graph definition. 411 | 412 | Keyword arguments: 413 | layer_configs -- an OrderedDict object with all parsed layers' configurations 414 | weights_file_path -- location of the weights file 415 | verbose -- toggles if the graph is printed after creation (default: True) 416 | """ 417 | for layer_name in layer_configs.keys(): 418 | layer_dict = layer_configs[layer_name] 419 | major_node_specs = self._make_onnx_node(layer_name, layer_dict) 420 | if major_node_specs.name is not None: 421 | self.major_node_specs.append(major_node_specs) 422 | outputs = list() 423 | for tensor_name in self.output_tensors.keys(): 424 | output_dims = [self.batch_size, ] + \ 425 | self.output_tensors[tensor_name] 426 | output_tensor = helper.make_tensor_value_info( 427 | tensor_name, TensorProto.FLOAT, output_dims) 428 | outputs.append(output_tensor) 429 | inputs = [self.input_tensor] 430 | weight_loader = WeightLoader(weights_file_path) 431 | initializer = list() 432 | # If a layer has parameters, add them to the initializer and input lists. 433 | for layer_name in self.param_dict.keys(): 434 | _, layer_type = layer_name.split('_', 1) 435 | params = self.param_dict[layer_name] 436 | if layer_type == 'convolutional': 437 | initializer_layer, inputs_layer = weight_loader.load_conv_weights( 438 | params) 439 | initializer.extend(initializer_layer) 440 | inputs.extend(inputs_layer) 441 | elif layer_type == "upsample": 442 | initializer_layer, inputs_layer = weight_loader.load_upsample_scales( 443 | params) 444 | initializer.extend(initializer_layer) 445 | inputs.extend(inputs_layer) 446 | del weight_loader 447 | self.graph_def = helper.make_graph( 448 | nodes=self._nodes, 449 | name='YOLOv3-608', 450 | inputs=inputs, 451 | outputs=outputs, 452 | initializer=initializer 453 | ) 454 | if verbose: 455 | print(helper.printable_graph(self.graph_def)) 456 | model_def = helper.make_model(self.graph_def, 457 | producer_name='NVIDIA TensorRT sample') 458 | return model_def 459 | 460 | def _make_onnx_node(self, layer_name, layer_dict): 461 | """Take in a layer parameter dictionary, choose the correct function for 462 | creating an ONNX node and store the information important to graph creation 463 | as a MajorNodeSpec object. 464 | 465 | Keyword arguments: 466 | layer_name -- the layer's name (also the corresponding key in layer_configs) 467 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 468 | """ 469 | layer_type = layer_dict['type'] 470 | if self.input_tensor is None: 471 | if layer_type == 'net': 472 | major_node_output_name, major_node_output_channels = self._make_input_tensor( 473 | layer_name, layer_dict) 474 | major_node_specs = MajorNodeSpecs(major_node_output_name, 475 | major_node_output_channels) 476 | else: 477 | raise ValueError('The first node has to be of type "net".') 478 | else: 479 | node_creators = dict() 480 | node_creators['convolutional'] = self._make_conv_node 481 | node_creators['shortcut'] = self._make_shortcut_node 482 | node_creators['route'] = self._make_route_node 483 | node_creators['upsample'] = self._make_upsample_node 484 | node_creators['maxpool'] = self._make_maxpool_node 485 | 486 | if layer_type in node_creators.keys(): 487 | major_node_output_name, major_node_output_channels = \ 488 | node_creators[layer_type](layer_name, layer_dict) 489 | major_node_specs = MajorNodeSpecs(major_node_output_name, 490 | major_node_output_channels) 491 | else: 492 | print( 493 | 'Layer of type %s not supported, skipping ONNX node generation.' % 494 | layer_type) 495 | major_node_specs = MajorNodeSpecs(layer_name, 496 | None) 497 | return major_node_specs 498 | 499 | def _make_input_tensor(self, layer_name, layer_dict): 500 | """Create an ONNX input tensor from a 'net' layer and store the batch size. 501 | 502 | Keyword arguments: 503 | layer_name -- the layer's name (also the corresponding key in layer_configs) 504 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 505 | """ 506 | batch_size = layer_dict['batch'] 507 | channels = layer_dict['channels'] 508 | height = layer_dict['height'] 509 | width = layer_dict['width'] 510 | self.batch_size = batch_size 511 | input_tensor = helper.make_tensor_value_info( 512 | str(layer_name), TensorProto.FLOAT, [ 513 | batch_size, channels, height, width]) 514 | self.input_tensor = input_tensor 515 | return layer_name, channels 516 | 517 | def _get_previous_node_specs(self, target_index=-1): 518 | """Get a previously generated ONNX node (skip those that were not generated). 519 | Target index can be passed for jumping to a specific index. 520 | 521 | Keyword arguments: 522 | target_index -- optional for jumping to a specific index (default: -1 for jumping 523 | to previous element) 524 | """ 525 | previous_node = None 526 | for node in self.major_node_specs[target_index::-1]: 527 | if node.created_onnx_node: 528 | previous_node = node 529 | break 530 | assert previous_node is not None 531 | return previous_node 532 | 533 | def _make_conv_node(self, layer_name, layer_dict): 534 | """Create an ONNX Conv node with optional batch normalization and 535 | activation nodes. 536 | 537 | Keyword arguments: 538 | layer_name -- the layer's name (also the corresponding key in layer_configs) 539 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 540 | """ 541 | previous_node_specs = self._get_previous_node_specs() 542 | inputs = [previous_node_specs.name] 543 | previous_channels = previous_node_specs.channels 544 | kernel_size = layer_dict['size'] 545 | stride = layer_dict['stride'] 546 | filters = layer_dict['filters'] 547 | batch_normalize = False 548 | if 'batch_normalize' in layer_dict.keys( 549 | ) and layer_dict['batch_normalize'] == 1: 550 | batch_normalize = True 551 | 552 | kernel_shape = [kernel_size, kernel_size] 553 | weights_shape = [filters, previous_channels] + kernel_shape 554 | conv_params = ConvParams(layer_name, batch_normalize, weights_shape) 555 | 556 | strides = [stride, stride] 557 | dilations = [1, 1] 558 | weights_name = conv_params.generate_param_name('conv', 'weights') 559 | inputs.append(weights_name) 560 | if not batch_normalize: 561 | bias_name = conv_params.generate_param_name('conv', 'bias') 562 | inputs.append(bias_name) 563 | 564 | conv_node = helper.make_node( 565 | 'Conv', 566 | inputs=inputs, 567 | outputs=[layer_name], 568 | kernel_shape=kernel_shape, 569 | strides=strides, 570 | auto_pad='SAME_LOWER', 571 | dilations=dilations, 572 | name=layer_name 573 | ) 574 | self._nodes.append(conv_node) 575 | inputs = [layer_name] 576 | layer_name_output = layer_name 577 | 578 | if batch_normalize: 579 | layer_name_bn = layer_name + '_bn' 580 | bn_param_suffixes = ['scale', 'bias', 'mean', 'var'] 581 | for suffix in bn_param_suffixes: 582 | bn_param_name = conv_params.generate_param_name('bn', suffix) 583 | inputs.append(bn_param_name) 584 | batchnorm_node = helper.make_node( 585 | 'BatchNormalization', 586 | inputs=inputs, 587 | outputs=[layer_name_bn], 588 | epsilon=self.epsilon_bn, 589 | momentum=self.momentum_bn, 590 | name=layer_name_bn 591 | ) 592 | self._nodes.append(batchnorm_node) 593 | inputs = [layer_name_bn] 594 | layer_name_output = layer_name_bn 595 | 596 | if layer_dict['activation'] == 'leaky': 597 | layer_name_lrelu = layer_name + '_lrelu' 598 | 599 | lrelu_node = helper.make_node( 600 | 'LeakyRelu', 601 | inputs=inputs, 602 | outputs=[layer_name_lrelu], 603 | name=layer_name_lrelu, 604 | alpha=self.alpha_lrelu 605 | ) 606 | self._nodes.append(lrelu_node) 607 | inputs = [layer_name_lrelu] 608 | layer_name_output = layer_name_lrelu 609 | elif layer_dict['activation'] == 'linear': 610 | pass 611 | else: 612 | print('Activation not supported.') 613 | 614 | self.param_dict[layer_name] = conv_params 615 | return layer_name_output, filters 616 | 617 | def _make_shortcut_node(self, layer_name, layer_dict): 618 | """Create an ONNX Add node with the shortcut properties from 619 | the DarkNet-based graph. 620 | 621 | Keyword arguments: 622 | layer_name -- the layer's name (also the corresponding key in layer_configs) 623 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 624 | """ 625 | shortcut_index = layer_dict['from'] 626 | activation = layer_dict['activation'] 627 | assert activation == 'linear' 628 | 629 | first_node_specs = self._get_previous_node_specs() 630 | second_node_specs = self._get_previous_node_specs( 631 | target_index=shortcut_index) 632 | assert first_node_specs.channels == second_node_specs.channels 633 | channels = first_node_specs.channels 634 | inputs = [first_node_specs.name, second_node_specs.name] 635 | shortcut_node = helper.make_node( 636 | 'Add', 637 | inputs=inputs, 638 | outputs=[layer_name], 639 | name=layer_name, 640 | ) 641 | self._nodes.append(shortcut_node) 642 | return layer_name, channels 643 | 644 | def _make_route_node(self, layer_name, layer_dict): 645 | """If the 'layers' parameter from the DarkNet configuration is only one index, continue 646 | node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node 647 | with the route properties from the DarkNet-based graph. 648 | 649 | Keyword arguments: 650 | layer_name -- the layer's name (also the corresponding key in layer_configs) 651 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 652 | """ 653 | route_node_indexes = layer_dict['layers'] 654 | if len(route_node_indexes) == 1: 655 | split_index = route_node_indexes[0] 656 | assert split_index < 0 657 | # Increment by one because we skipped the YOLO layer: 658 | split_index += 1 659 | self.major_node_specs = self.major_node_specs[:split_index] 660 | layer_name = None 661 | channels = None 662 | else: 663 | inputs = list() 664 | channels = 0 665 | for index in route_node_indexes: 666 | if index > 0: 667 | # Increment by one because we count the input as a node (DarkNet 668 | # does not) 669 | index += 1 670 | route_node_specs = self._get_previous_node_specs( 671 | target_index=index) 672 | inputs.append(route_node_specs.name) 673 | channels += route_node_specs.channels 674 | assert inputs 675 | assert channels > 0 676 | 677 | route_node = helper.make_node( 678 | 'Concat', 679 | axis=1, 680 | inputs=inputs, 681 | outputs=[layer_name], 682 | name=layer_name, 683 | ) 684 | self._nodes.append(route_node) 685 | return layer_name, channels 686 | 687 | def _make_upsample_node(self, layer_name, layer_dict): 688 | """Create an ONNX Upsample node with the properties from 689 | the DarkNet-based graph. 690 | 691 | Keyword arguments: 692 | layer_name -- the layer's name (also the corresponding key in layer_configs) 693 | layer_dict -- a layer parameter dictionary (one element of layer_configs) 694 | """ 695 | upsample_factor = float(layer_dict['stride']) 696 | # Create the scales array with node parameters 697 | scales=np.array([1.0, 1.0, upsample_factor, upsample_factor]).astype(np.float32) 698 | previous_node_specs = self._get_previous_node_specs() 699 | inputs = [previous_node_specs.name] 700 | 701 | channels = previous_node_specs.channels 702 | assert channels > 0 703 | upsample_params = UpsampleParams(layer_name, scales) 704 | scales_name = upsample_params.generate_param_name() 705 | # For ONNX opset >= 9, the Upsample node takes the scales array as an input. 706 | inputs.append(scales_name) 707 | 708 | upsample_node = helper.make_node( 709 | 'Upsample', 710 | mode='nearest', 711 | inputs=inputs, 712 | outputs=[layer_name], 713 | name=layer_name, 714 | ) 715 | self._nodes.append(upsample_node) 716 | self.param_dict[layer_name] = upsample_params 717 | return layer_name, channels 718 | 719 | def _make_maxpool_node(self, layer_name, layer_dict): 720 | stride=layer_dict['stride'] 721 | kernel_size = layer_dict['size'] 722 | previous_node_specs = self._get_previous_node_specs() 723 | inputs = [previous_node_specs.name] 724 | channels = previous_node_specs.channels 725 | kernel_shape = [kernel_size, kernel_size] 726 | strides = [stride, stride] 727 | assert channels > 0 728 | maxpool_node = helper.make_node( 729 | 'MaxPool', 730 | inputs=inputs, 731 | outputs=[layer_name], 732 | kernel_shape=kernel_shape, 733 | strides=strides, 734 | auto_pad='SAME_UPPER', 735 | name=layer_name, 736 | ) 737 | self._nodes.append(maxpool_node) 738 | return layer_name, channels 739 | 740 | 741 | def generate_md5_checksum(local_path): 742 | """Returns the MD5 checksum of a local file. 743 | 744 | Keyword argument: 745 | local_path -- path of the file whose checksum shall be generated 746 | """ 747 | with open(local_path) as local_file: 748 | data = local_file.read() 749 | return hashlib.md5(data).hexdigest() 750 | 751 | 752 | def download_file(local_path, link, checksum_reference=None): 753 | """Checks if a local file is present and downloads it from the specified path otherwise. 754 | If checksum_reference is specified, the file's md5 checksum is compared against the 755 | expected value. 756 | 757 | Keyword arguments: 758 | local_path -- path of the file whose checksum shall be generated 759 | link -- link where the file shall be downloaded from if it is not found locally 760 | checksum_reference -- expected MD5 checksum of the file 761 | """ 762 | if not os.path.exists(local_path): 763 | print('Downloading from %s, this may take a while...' % link) 764 | wget.download(link, local_path) 765 | print() 766 | if checksum_reference is not None: 767 | checksum = generate_md5_checksum(local_path) 768 | if checksum != checksum_reference: 769 | raise ValueError( 770 | 'The MD5 checksum of local file %s differs from %s, please manually remove \ 771 | the file and try again.' % 772 | (local_path, checksum_reference)) 773 | return local_path 774 | 775 | 776 | def main(): 777 | """Run the DarkNet-to-ONNX conversion for YOLOv3-tiny-416.""" 778 | img_size = 416 779 | # Have to use python 2 due to hashlib compatibility 780 | if sys.version_info[0] > 2: 781 | raise Exception("This script is only compatible with python2, please re-run this script with python2. The rest of this sample can be run with either version of python.") 782 | 783 | # Download the config for YOLOv3 if not present yet, and analyze the checksum: 784 | cfg_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.cfg' 785 | 786 | # These are the only layers DarkNetParser will extract parameters from. The three layers of 787 | # type 'yolo' are not parsed in detail because they are included in the post-processing later: 788 | supported_layers = ['net', 'convolutional', 'shortcut', 789 | 'route', 'upsample', 'maxpool'] 790 | 791 | # Create a DarkNetParser object, and the use it to generate an OrderedDict with all 792 | # layer's configs from the cfg file: 793 | parser = DarkNetParser(supported_layers) 794 | layer_configs = parser.parse_cfg_file(cfg_file_path) 795 | # We do not need the parser anymore after we got layer_configs: 796 | del parser 797 | 798 | # In above layer_config, there are three outputs that we need to know the output 799 | # shape of (in CHW format): 800 | output_tensor_dims = OrderedDict() 801 | kernel_size_1 = img_size/32 802 | kernel_size_2 = img_size/16 803 | output_tensor_dims['016_convolutional'] = [18, kernel_size_1, kernel_size_1] 804 | output_tensor_dims['023_convolutional'] = [18, kernel_size_2, kernel_size_2] 805 | 806 | # Create a GraphBuilderONNX object with the known output tensor dimensions: 807 | builder = GraphBuilderONNX(output_tensor_dims) 808 | 809 | # We want to populate our network with weights later, that's why we download those from 810 | # the official mirror (and verify the checksum): 811 | weights_file_path = '/home/nvidia/yolov3-tiny2onnx2trt/yolov3-tiny.weights' 812 | 813 | # Now generate an ONNX graph with weights from the previously parsed layer configurations 814 | # and the weights file: 815 | yolov3_model_def = builder.build_onnx_graph( 816 | layer_configs=layer_configs, 817 | weights_file_path=weights_file_path, 818 | verbose=True) 819 | # Once we have the model definition, we do not need the builder anymore: 820 | del builder 821 | 822 | # Perform a sanity check on the ONNX model definition: 823 | onnx.checker.check_model(yolov3_model_def) 824 | 825 | # Serialize the generated ONNX graph to this file: 826 | output_file_path = 'yolov3-tiny.onnx' 827 | onnx.save(yolov3_model_def, output_file_path) 828 | 829 | if __name__ == '__main__': 830 | main() 831 | --------------------------------------------------------------------------------