├── images
    ├── dog.jpg
    ├── person.jpg
    └── person2.jpg
├── base_module.py
├── README.md
├── alpha_yolo3_module_drawing.py
├── onnx_to_trt_1batch.py
├── onnx_to_trt_multibatch.py
├── bbox.py
├── common.py
├── trt_yolo3_module_1batch.py
├── trt_yolo3_module_multibatch.py
├── yolov3-608.cfg
├── util.py
└── weight_to_onnx.py


/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cw-zero/TensorRT_yolo3_module/HEAD/images/dog.jpg


--------------------------------------------------------------------------------
/images/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cw-zero/TensorRT_yolo3_module/HEAD/images/person.jpg


--------------------------------------------------------------------------------
/images/person2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cw-zero/TensorRT_yolo3_module/HEAD/images/person2.jpg


--------------------------------------------------------------------------------
/base_module.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | class BaseModule(ABC):
 4 | 
 5 | 	@abstractmethod
 6 | 	def process_frame(self):
 7 | 		pass
 8 | 
 9 | 	@abstractmethod
10 | 	def process_frame_batch(self):
11 | 		pass


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TensorRT_yolo3_module
 2 | 
 3 | ------
 4 | 
 5 | ## 1. Install TensorRT on Ubuntu
 6 | ## 2. Test TensorRT_yolo3_module
 7 | - a. Download yolo3.weight from [this](https://pjreddie.com/media/files/yolov3.weights), and change the name to **yolov3-608.weights**.
 8 | - b. `python2 weight_to_onnx.py`. To execute this script you must use python 2.7, and you will have a file named **yolov3-608.onnx**.
 9 | - c1. `python3 onnx_to_trt_1batch.py`. If you only need to process one image each time, for example you only have one camera. Executing this script you need python 3.x, and you will have a file named **yolov3-608.trt**, which is the file we ultimately need.
10 | - c2. `python3 onnx_to_trt_multibatch.py`. If you need to process multiple images each time, for example you have multiple cameras. Executing this script you also need python 3.x, and you will have a file named **yolov3-608.trt**, which is the file we ultimately need. And the data accuracy is **FP16**, so the acceleration is more obvious.
11 | - d1.`python3 trt_yolo3_module_1batch.py`, if you choose **c1**
12 | - d2.`python3 trt_yolo3_module_multibatch.py`,if you choose **c2**. It detects 4 images at a time.
13 | 
14 | ## 3. Import TensorRT_yolo3_module
15 | - This project has been packaged into **class**, so you can use it directly according `import xx` command.
16 | 


--------------------------------------------------------------------------------
/alpha_yolo3_module_drawing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import cv2
 4 | """
 5 | alpha_yolo3模块的绘图函数
 6 | 
 7 | 必须包含如下函数
 8 | drawing(frame,result_dict)
 9 | 
10 | 注意传递的字典应该遵循如下形式,
11 | dict = {}
12 | dict['info']={'frame_id':1,'camera_id':0}#info字段为图像的基本信息
13 | dict['img']=@#@#@#@##@# #为opencv打开的numpy.arry
14 | dict['data']={'number':1,'box_list':[[30,10,123,23]]} #data字段为算法产生的数据
15 | 
16 | """
17 | 
18 | 
19 | def drawing(frame,result_dict): 
20 | 	"""
21 | 	在输入的图像上按照result_dict中的信息进行绘图
22 | 
23 | 	Parameters:
24 | 		frame: 图像 # opencv打开的图像,numpy.array格式
25 | 		result_dict: 字典,包含'info'字段与'data'字段,用来绘图
26 | 
27 | 	Returns:
28 | 		None
29 | 	"""
30 | 	draw_bbx(frame,result_dict)
31 | 
32 | def draw_bbx(frame,result_dict):
33 | 	class_color_scheme = {'person':(0,255,255),'motorbike':(72,118,255),'car':(255,191,0),'bicycle':(0,128,255),'umbrella':(72,118,255),'truck':(152,251,152),'handbag':(255,165,0),'backpack':(160,32,240)}
34 | 	if 'box_list' in result_dict['data']:
35 | 		if 'class_list' in result_dict['data']:
36 | 			box_list = result_dict['data']['box_list']
37 | 			class_list = result_dict['data']['class_list']
38 | 			i = 0
39 | 			for j in range(len(box_list)):
40 | 				xmin = box_list[j][0]
41 | 				xmax = box_list[j][1]
42 | 				ymin = box_list[j][2]
43 | 				ymax = box_list[j][3]
44 | 				class_name = class_list[j]
45 | 				cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), class_color_scheme[class_name], 2)
46 | 				cv2.putText(frame, '{}_{}'.format(class_name,i), (xmin+10, ymin+10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, class_color_scheme[class_name])
47 | 				i+=1


--------------------------------------------------------------------------------
/onnx_to_trt_1batch.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | import tensorrt as trt
 6 | import pycuda.driver as cuda
 7 | import pycuda.autoinit
 8 | 
 9 | import sys, os
10 | sys.path.insert(1, os.path.join(sys.path[0], ".."))
11 | import common
12 | 
13 | TRT_LOGGER = trt.Logger()
14 | 
15 | def get_engine(onnx_file_path, engine_file_path=""):
16 |     """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
17 |     def build_engine():
18 |         """Takes an ONNX file and creates a TensorRT engine to run inference with"""
19 |         with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
20 |             # builder.fp16_mode = True
21 |             # builder.strict_type_constraints = True
22 |             
23 |             builder.max_workspace_size = 1 << 30 # 1GB
24 |             builder.max_batch_size = 1
25 |             # Parse model file
26 |             if not os.path.exists(onnx_file_path):
27 |                 print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
28 |                 exit(0)
29 |             print('Loading ONNX file from path {}...'.format(onnx_file_path))
30 |             with open(onnx_file_path, 'rb') as model:
31 |                 print('Beginning ONNX file parsing')
32 |                 parser.parse(model.read())
33 |             print('Completed parsing of ONNX file')
34 |             print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
35 |             engine = builder.build_cuda_engine(network)
36 |             print("Completed creating Engine")
37 |             with open(engine_file_path, "wb") as f:
38 |                 f.write(engine.serialize())
39 |             # return engine
40 | 
41 |     if os.path.exists(engine_file_path):
42 |         print("Please delete yolov3-608.trt firstly, otherwise you can not get a new file")
43 |     else:
44 |         build_engine()
45 | 
46 | def main():
47 | 
48 |     """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
49 | 
50 |     # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
51 |     onnx_file_path = 'yolov3-608.onnx'
52 |     engine_file_path = "yolov3-608.trt"
53 |     get_engine(onnx_file_path, engine_file_path)
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/onnx_to_trt_multibatch.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | import tensorrt as trt
 6 | import pycuda.driver as cuda
 7 | import pycuda.autoinit
 8 | 
 9 | import sys, os
10 | sys.path.insert(1, os.path.join(sys.path[0], ".."))
11 | import common
12 | 
13 | TRT_LOGGER = trt.Logger()
14 | 
15 | def get_engine(onnx_file_path, engine_file_path=""):
16 |     """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
17 |     def build_engine():
18 |         """Takes an ONNX file and creates a TensorRT engine to run inference with"""
19 |         with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
20 |             builder.fp16_mode = True
21 |             builder.strict_type_constraints = True
22 |             
23 |             builder.max_workspace_size = 1 << 30 # 1GB
24 |             builder.max_batch_size = 4
25 |             # Parse model file
26 |             if not os.path.exists(onnx_file_path):
27 |                 print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
28 |                 exit(0)
29 |             print('Loading ONNX file from path {}...'.format(onnx_file_path))
30 |             with open(onnx_file_path, 'rb') as model:
31 |                 print('Beginning ONNX file parsing')
32 |                 parser.parse(model.read())
33 |             print('Completed parsing of ONNX file')
34 |             print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
35 |             engine = builder.build_cuda_engine(network)
36 |             print("Completed creating Engine")
37 |             with open(engine_file_path, "wb") as f:
38 |                 f.write(engine.serialize())
39 |             # return engine
40 | 
41 |     if os.path.exists(engine_file_path):
42 |         print("Please delete yolov3-608.trt firstly, otherwise you can not get a new file")
43 |     else:
44 |         build_engine()
45 | 
46 | def main():
47 | 
48 |     """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
49 | 
50 |     # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
51 |     onnx_file_path = 'yolov3-608.onnx'
52 |     engine_file_path = "yolov3-608.trt"
53 |     get_engine(onnx_file_path, engine_file_path)
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/bbox.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import torch 
  4 | import random
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | 
  9 | def confidence_filter(result, confidence):
 10 |     conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
 11 |     result = result*conf_mask    
 12 |     
 13 |     return result
 14 | 
 15 | def confidence_filter_cls(result, confidence):
 16 |     max_scores = torch.max(result[:,:,5:25], 2)[0]
 17 |     res = torch.cat((result, max_scores),2)
 18 |     print(res.shape)
 19 |     
 20 |     
 21 |     cond_1 = (res[:,:,4] > confidence).float()
 22 |     cond_2 = (res[:,:,25] > 0.995).float()
 23 |     
 24 |     conf = cond_1 + cond_2
 25 |     conf = torch.clamp(conf, 0.0, 1.0)
 26 |     conf = conf.unsqueeze(2)
 27 |     result = result*conf   
 28 |     return result
 29 | 
 30 | 
 31 | 
 32 | def get_abs_coord(box):
 33 |     box[2], box[3] = abs(box[2]), abs(box[3])
 34 |     x1 = (box[0] - box[2]/2) - 1 
 35 |     y1 = (box[1] - box[3]/2) - 1 
 36 |     x2 = (box[0] + box[2]/2) - 1 
 37 |     y2 = (box[1] + box[3]/2) - 1
 38 |     return x1, y1, x2, y2
 39 |     
 40 | 
 41 | 
 42 | def sanity_fix(box):
 43 |     if (box[0] > box[2]):
 44 |         box[0], box[2] = box[2], box[0]
 45 |     
 46 |     if (box[1] >  box[3]):
 47 |         box[1], box[3] = box[3], box[1]
 48 |         
 49 |     return box
 50 | 
 51 | def bbox_iou(box1, box2):
 52 |     """
 53 |     Returns the IoU of two bounding boxes 
 54 |     
 55 |     
 56 |     """
 57 |     #Get the coordinates of bounding boxes
 58 |     b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
 59 |     b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
 60 |     
 61 |     #get the corrdinates of the intersection rectangle
 62 |     inter_rect_x1 =  torch.max(b1_x1, b2_x1)
 63 |     inter_rect_y1 =  torch.max(b1_y1, b2_y1)
 64 |     inter_rect_x2 =  torch.min(b1_x2, b2_x2)
 65 |     inter_rect_y2 =  torch.min(b1_y2, b2_y2)
 66 |     
 67 |     #Intersection area
 68 |     
 69 |     inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
 70 |     # inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))
 71 |     
 72 |     #Union Area
 73 |     b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
 74 |     b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
 75 |     
 76 |     iou = inter_area / (b1_area + b2_area - inter_area)
 77 |     
 78 |     return iou
 79 | 
 80 | 
 81 | def pred_corner_coord(prediction):
 82 |     #Get indices of non-zero confidence bboxes
 83 |     ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
 84 |     
 85 |     box = prediction[ind_nz[0], ind_nz[1]]
 86 |     
 87 |     
 88 |     box_a = box.new(box.shape)
 89 |     box_a[:,0] = (box[:,0] - box[:,2]/2)
 90 |     box_a[:,1] = (box[:,1] - box[:,3]/2)
 91 |     box_a[:,2] = (box[:,0] + box[:,2]/2) 
 92 |     box_a[:,3] = (box[:,1] + box[:,3]/2)
 93 |     box[:,:4] = box_a[:,:4]
 94 |     
 95 |     prediction[ind_nz[0], ind_nz[1]] = box
 96 |     
 97 |     return prediction
 98 | 
 99 | 
100 | 
101 | 
102 | def write(x, batches, results, colors, classes):
103 |     c1 = tuple(x[1:3].int())
104 |     c2 = tuple(x[3:5].int())
105 |     img = results[int(x[0])]
106 |     cls = int(x[-1])
107 |     label = "{0}".format(classes[cls])
108 |     color = random.choice(colors)
109 |     cv2.rectangle(img, c1, c2,color, 1)
110 |     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
111 |     c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
112 |     cv2.rectangle(img, c1, c2,color, -1)
113 |     cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
114 |     return img
115 | 


--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
  3 | #
  4 | # NOTICE TO LICENSEE:
  5 | #
  6 | # This source code and/or documentation ("Licensed Deliverables") are
  7 | # subject to NVIDIA intellectual property rights under U.S. and
  8 | # international Copyright laws.
  9 | #
 10 | # These Licensed Deliverables contained herein is PROPRIETARY and
 11 | # CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | # conditions of a form of NVIDIA software license agreement by and
 13 | # between NVIDIA and Licensee ("License Agreement") or electronically
 14 | # accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | # the contrary in the License Agreement, reproduction or disclosure
 16 | # of the Licensed Deliverables to any third party without the express
 17 | # written consent of NVIDIA is prohibited.
 18 | #
 19 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | # OF THESE LICENSED DELIVERABLES.
 33 | #
 34 | # U.S. Government End Users.  These Licensed Deliverables are a
 35 | # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | # 1995), consisting of "commercial computer software" and "commercial
 37 | # computer software documentation" as such terms are used in 48
 38 | # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | # U.S. Government End Users acquire the Licensed Deliverables with
 42 | # only those rights set forth herein.
 43 | #
 44 | # Any use of the Licensed Deliverables in individual and commercial
 45 | # software must include, in the user documentation and internal
 46 | # comments to the code, the above Disclaimer and U.S. Government End
 47 | # Users Notice.
 48 | #
 49 | 
 50 | import os
 51 | import argparse
 52 | import numpy as np
 53 | import pycuda.driver as cuda
 54 | import tensorrt as trt
 55 | 
 56 | try:
 57 |     # Sometimes python2 does not understand FileNotFoundError
 58 |     FileNotFoundError
 59 | except NameError:
 60 |     FileNotFoundError = IOError
 61 | 
 62 | def GiB(val):
 63 |     return val * 1 << 30
 64 | 
 65 | def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
 66 |     '''
 67 |     Parses sample arguments.
 68 |     Args:
 69 |         description (str): Description of the sample.
 70 |         subfolder (str): The subfolder containing data relevant to this sample
 71 |         find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
 72 |     Returns:
 73 |         str: Path of data directory.
 74 |     Raises:
 75 |         FileNotFoundError
 76 |     '''
 77 |     kDEFAULT_DATA_ROOT = os.path.abspath("/usr/src/tensorrt/data")
 78 | 
 79 |     # Standard command-line arguments for all samples.
 80 |     parser = argparse.ArgumentParser(description=description)
 81 |     parser.add_argument("-d", "--datadir", help="Location of the TensorRT sample data directory.")
 82 |     args, unknown_args = parser.parse_known_args()
 83 | 
 84 |     # If data directory is not specified, use the default.
 85 |     data_root = args.datadir if args.datadir else kDEFAULT_DATA_ROOT
 86 |     # If the subfolder exists, append it to the path, otherwise use the provided path as-is.
 87 |     subfolder_path = os.path.join(data_root, subfolder)
 88 |     if not os.path.exists(subfolder_path):
 89 |         print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
 90 |     data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
 91 | 
 92 |     # Make sure data directory exists.
 93 |     if not (os.path.exists(data_path)):
 94 |         raise FileNotFoundError(data_path + " does not exist. Please provide the correct data path with the -d option.")
 95 | 
 96 |     # Find all requested files.
 97 |     for index, f in enumerate(find_files):
 98 |         find_files[index] = os.path.abspath(os.path.join(data_path, f))
 99 |         if not os.path.exists(find_files[index]):
100 |             raise FileNotFoundError(find_files[index] + " does not exist. Please provide the correct data path with the -d option.")
101 |     if find_files:
102 |         return data_path, find_files
103 |     else:
104 |         return data_path
105 | 
106 | # Simple helper data class that's a little nicer to use than a 2-tuple.
107 | class HostDeviceMem(object):
108 |     def __init__(self, host_mem, device_mem):
109 |         self.host = host_mem
110 |         self.device = device_mem
111 | 
112 |     def __str__(self):
113 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
114 | 
115 |     def __repr__(self):
116 |         return self.__str__()
117 | 
118 | # Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
119 | def allocate_buffers(engine):
120 |     inputs = []
121 |     outputs = []
122 |     bindings = []
123 |     stream = cuda.Stream()
124 |     for binding in engine:
125 |         size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
126 |         # dtype = trt.nptype(engine.get_binding_dtype(binding))
127 |         dtype = np.float32
128 |         # Allocate host and device buffers
129 |         host_mem = cuda.pagelocked_empty(size, dtype)
130 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
131 |         # Append the device buffer to device bindings.
132 |         bindings.append(int(device_mem))
133 |         # Append to the appropriate list.
134 |         if engine.binding_is_input(binding):
135 |             inputs.append(HostDeviceMem(host_mem, device_mem))
136 |         else:
137 |             outputs.append(HostDeviceMem(host_mem, device_mem))
138 |     return inputs, outputs, bindings, stream
139 | 
140 | # This function is generalized for multiple inputs/outputs.
141 | # inputs and outputs are expected to be lists of HostDeviceMem objects.
142 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
143 |     # Transfer input data to the GPU.
144 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
145 |     # Run inference.
146 |     context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
147 |     # Transfer predictions back from the GPU.
148 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
149 |     # Synchronize the stream
150 |     stream.synchronize()
151 |     # Return only the host outputs.
152 |     return [out.host for out in outputs]
153 | 


--------------------------------------------------------------------------------
/trt_yolo3_module_1batch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import tensorrt as trt
  4 | import pycuda.driver as cuda
  5 | import pycuda.autoinit
  6 | import time
  7 | from base_module import BaseModule
  8 | from util import *
  9 | from alpha_yolo3_module_drawing import drawing
 10 | 
 11 | # from data_processing import PreprocessYOLO
 12 | 
 13 | import sys, os
 14 | sys.path.insert(1, os.path.join(sys.path[0], ".."))
 15 | import common
 16 | 
 17 | TRT_LOGGER = trt.Logger()
 18 | 
 19 | def get_engine(engine_file_path):
 20 |     if os.path.exists(engine_file_path):
 21 |         print("Reading engine from file {}".format(engine_file_path))
 22 |         with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
 23 |             return runtime.deserialize_cuda_engine(f.read())
 24 |     else:
 25 |         print("TRT file not found")
 26 | 
 27 | 
 28 | def prep_image(orig_im, inp_dim):
 29 |     dim = orig_im.shape[1], orig_im.shape[0]
 30 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
 31 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() #(3 608 608)
 32 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
 33 |     img_ = img_.numpy()
 34 |     return img_, orig_im, dim
 35 | 
 36 | def letterbox_image(img, inp_dim):
 37 |     '''resize image with unchanged aspect ratio using padding'''
 38 |     img_w, img_h = img.shape[1], img.shape[0]
 39 |     w, h = inp_dim
 40 |     new_w = int(img_w * min(w / img_w, h / img_h))
 41 |     new_h = int(img_h * min(w / img_w, h / img_h))
 42 |     resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
 43 |     canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
 44 |     canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w, :] = resized_image
 45 |     return canvas
 46 | 
 47 | class trt_yolo3_module(BaseModule):
 48 |     def __init__(self, init_dict):
 49 |         a = torch.cuda.FloatTensor()  #pytorch必须首先占用部分CUDA
 50 |         builder = trt.Builder(TRT_LOGGER)
 51 |         builder.fp16_mode = True
 52 |         builder.strict_type_constraints = True
 53 |         self.trt_file = init_dict['trt']
 54 |         self.use_cuda = init_dict['use_cuda']
 55 |         self.inp_dim = 608
 56 |         self.num_classes = 80
 57 |         self.output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)] #yolo3-608
 58 |         self.yolo_anchors = [[(116, 90), (156, 198), (373, 326)],
 59 |                              [(30, 61),  (62, 45),   (59, 119)],
 60 |                              [(10, 13),  (16, 30),   (33, 23)]]
 61 | 
 62 |         self.engine = get_engine(self.trt_file)
 63 |         self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(self.engine)
 64 |         self.context = self.engine.create_execution_context()
 65 | 
 66 |     def preparing(self,orig_img_list):
 67 |         img = []
 68 |         orig_img = []
 69 |         im_name = []
 70 |         im_dim_list = []
 71 |         batch = 1
 72 |         for im in orig_img_list:
 73 |             im_name_k = ''
 74 |             img_k, orig_img_k, im_dim_list_k = prep_image(im, self.inp_dim)
 75 |             img.append(img_k)
 76 |             orig_img.append(orig_img_k)
 77 |             im_name.append(im_name_k)
 78 |             im_dim_list.append(im_dim_list_k)
 79 | 
 80 |         with torch.no_grad():
 81 |             im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
 82 |             im_dim_list_ = im_dim_list
 83 | 
 84 |         procession_tuple = (img, orig_img, im_name, im_dim_list)
 85 |         return procession_tuple
 86 | 
 87 |     def detection(self,procession_tuple):
 88 |         (img, orig_img, im_name, im_dim_list) = procession_tuple
 89 |         # with get_engine(self.trt_file) as engine, engine.create_execution_context() as context:
 90 |         if 1:
 91 |             # inputs, outputs, bindings, stream = common.allocate_buffers(self.engine)
 92 |             inference_start = time.time()
 93 |             self.inputs[0].host = img[0] #waiting fix bug
 94 |             trt_outputs = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream)
 95 |             inference_end = time.time()
 96 |             # print('inference time : %f' % (inference_end-inference_start))
 97 |             write = 0
 98 |             for output, shape, anchors in zip(trt_outputs, self.output_shapes, self.yolo_anchors):
 99 |                 output = output.reshape(shape)
100 |                 trt_output = torch.from_numpy(output).cuda().data
101 |                 # trt_output = trt_output.data
102 |                 # cuda_time1 = time.time()
103 |                 trt_output = predict_transform(trt_output, self.inp_dim, anchors, self.num_classes, self.use_cuda)
104 |                 # cuda_time2 = time.time()
105 |                 # print('CUDA time : %f' % (cuda_time2 - cuda_time1))
106 |                 if type(trt_output) == int:
107 |                     continue
108 | 
109 |                 if not write:
110 |                     detections = trt_output
111 |                     write = 1
112 | 
113 |                 else:
114 |                     detections = torch.cat((detections, trt_output), 1)
115 | 
116 |             o_time1 = time.time()
117 |             print('TensorRT inference time : %f' % (o_time1-inference_start))
118 |             dets = dynamic_write_results(detections, 0.5, self.num_classes, nms=True, nms_conf=0.45)
119 |             o_time2 = time.time()
120 |             print('After process time : %f' %(o_time2-o_time1))
121 |             class_list_all = []
122 |             box_list_all = []
123 |             conf_list_all = []
124 |             if not isinstance(dets,int):
125 |                 dets = dets.cpu()
126 |                 im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
127 |                 scaling_factor = torch.min(self.inp_dim / im_dim_list, 1)[0].view(-1, 1)
128 |                 dets[:, [1, 3]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
129 |                 dets[:, [2, 4]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2
130 |                 dets[:, 1:5] /= scaling_factor
131 |                 for j in range(dets.shape[0]):
132 |                     dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
133 |                     dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
134 |                 boxes = dets[:, 1:5]
135 |                 scores = dets[:, 5:6]
136 |                 for k in range(len(orig_img)):
137 |                     boxes_k = boxes[dets[:,0]==k]
138 |                     scores_k = scores[dets[:,0]==k]
139 |                     class_list = []
140 |                     box_list = []
141 |                     for b in boxes_k:
142 |                         x1=int(b[0])
143 |                         x2=int(b[2])
144 |                         y1=int(b[1])
145 |                         y2=int(b[3])
146 |                         box_list.append([x1,x2,y1,y2])
147 |                         class_list.append('person')		
148 | 
149 |                     score_list = scores_k.numpy().tolist()
150 |                     s_list = []
151 |                     for s in score_list:
152 |                         s_list.append(s[0])
153 |                     box_list_all.append(box_list)
154 |                     conf_list_all.append(s_list)
155 |                     class_list_all.append(class_list)
156 | 
157 |         return (class_list_all,box_list_all,conf_list_all)            
158 | 
159 | 
160 | 
161 |     def dict_checkup(self,dict):
162 |         if 'img' not in dict:
163 |             dict['img']= ''
164 |             print('no img in dict')	
165 |         if 'data' not in dict:
166 |             dict['data']={}
167 |             print('no data in dict')
168 |         if 'info' not in dict:
169 |             dict['info']={}
170 |             print('no info in dict')	
171 | 
172 |     def process_frame(self, frame_dic):
173 |         pass
174 | 
175 |     def process_frame_batch(self, frame_dic_list):
176 |         for dic in frame_dic_list:
177 |             self.dict_checkup(dic)
178 |         
179 |         img_list = []
180 |         for dic in frame_dic_list:
181 |             img_list.append(dic['img'])
182 |         
183 |         procession_tuple = self.preparing(img_list)
184 |         # (img, orig_img, im_name, im_dim_list) = procession_tuple
185 |         (class_list_all,box_list_all,conf_list_all) = self.detection(procession_tuple)
186 |         if len(class_list_all) == 0:
187 |             for frame_dic in frame_dic_list:
188 |                 frame_dic['data']['number'] = 0
189 |                 frame_dic['data']['box_list'] = []
190 |                 frame_dic['data']['class_list'] = []
191 |                 frame_dic['data']['conf_list'] = []
192 |         else:
193 |             for i,frame_dic in enumerate(frame_dic_list):
194 |                 frame_dic['data']['number'] = len(class_list_all[i])
195 |                 frame_dic['data']['box_list'] = box_list_all[i]
196 |                 frame_dic['data']['class_list'] = class_list_all[i]
197 |                 frame_dic['data']['conf_list'] = conf_list_all[i]
198 | 
199 |         return frame_dic_list
200 | 
201 | 
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     init_dict = {'trt':"yolov3-608.trt", 'use_cuda':True}
206 |     alpha_yolo3_unit = trt_yolo3_module(init_dict)
207 | 
208 |     input_dic_list = []
209 |     img_path = './images/person.jpg'
210 |     dic = {'img':cv2.imread(img_path),'data':{},'info':{}}
211 |     input_dic_list.append(dic)
212 | 
213 |     while True:
214 |         output_dic_list = alpha_yolo3_unit.process_frame_batch(input_dic_list)
215 |         for dic in output_dic_list:
216 |             img_array = dic['img']
217 |             drawing(img_array,dic)	
218 |             cv2.imshow('show',img_array)
219 |             cv2.waitKey(5000)
220 | 


--------------------------------------------------------------------------------
/trt_yolo3_module_multibatch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import tensorrt as trt
  4 | import pycuda.driver as cuda
  5 | import pycuda.autoinit
  6 | import time
  7 | from base_module import BaseModule
  8 | from util import *
  9 | from alpha_yolo3_module_drawing import drawing
 10 | 
 11 | # from data_processing import PreprocessYOLO
 12 | 
 13 | import sys, os
 14 | sys.path.insert(1, os.path.join(sys.path[0], ".."))
 15 | import common
 16 | 
 17 | TRT_LOGGER = trt.Logger()
 18 | 
 19 | def get_engine(engine_file_path):
 20 |     if os.path.exists(engine_file_path):
 21 |         print("Reading engine from file {}".format(engine_file_path))
 22 |         with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
 23 |             return runtime.deserialize_cuda_engine(f.read())
 24 |     else:
 25 |         print("TRT file not found")
 26 | 
 27 | 
 28 | def prep_image(orig_im, inp_dim):
 29 |     dim = orig_im.shape[1], orig_im.shape[0]
 30 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
 31 |     img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() #(3 608 608)
 32 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
 33 |     img_ = img_.numpy()
 34 |     return img_, orig_im, dim
 35 | 
 36 | def letterbox_image(img, inp_dim):
 37 |     '''resize image with unchanged aspect ratio using padding'''
 38 |     img_w, img_h = img.shape[1], img.shape[0]
 39 |     w, h = inp_dim
 40 |     new_w = int(img_w * min(w / img_w, h / img_h))
 41 |     new_h = int(img_h * min(w / img_w, h / img_h))
 42 |     resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
 43 |     canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
 44 |     canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w, :] = resized_image
 45 |     return canvas
 46 | 
 47 | class trt_yolo3_module(BaseModule):
 48 |     def __init__(self, init_dict):
 49 |         a = torch.cuda.FloatTensor()  #pytorch必须首先占用部分CUDA
 50 |         builder = trt.Builder(TRT_LOGGER)
 51 |         builder.max_batch_size = 4
 52 |         builder.fp16_mode = True
 53 |         builder.strict_type_constraints = True
 54 |         self.trt_file = init_dict['trt']
 55 |         self.use_cuda = init_dict['use_cuda']
 56 |         self.inp_dim = 608
 57 |         self.num_classes = 80
 58 |         self.output_shapes = [(4, 255, 19, 19), (4, 255, 38, 38), (4, 255, 76, 76)] #yolo3-608
 59 |         self.yolo_anchors = [[(116, 90), (156, 198), (373, 326)],
 60 |                              [(30, 61),  (62, 45),   (59, 119)],
 61 |                              [(10, 13),  (16, 30),   (33, 23)]]
 62 | 
 63 |         self.engine = get_engine(self.trt_file)
 64 |         self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(self.engine)
 65 |         self.context = self.engine.create_execution_context()
 66 | 
 67 |     def preparing(self,orig_img_list):
 68 |         img = []
 69 |         orig_img = []
 70 |         im_name = []
 71 |         im_dim_list = []
 72 |         for im in orig_img_list:
 73 |             im_name_k = ''
 74 |             img_k, orig_img_k, im_dim_list_k = prep_image(im, self.inp_dim)
 75 |             img.append(img_k)
 76 |             orig_img.append(orig_img_k)
 77 |             im_name.append(im_name_k)
 78 |             im_dim_list.append(im_dim_list_k)
 79 | 
 80 |         with torch.no_grad():
 81 |             # img = torch.cat(img)
 82 |             im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
 83 |             im_dim_list_ = im_dim_list
 84 | 
 85 |         procession_tuple = (img, orig_img, im_name, im_dim_list)
 86 |         return procession_tuple
 87 | 
 88 |     def detection(self,procession_tuple):
 89 |         (img, orig_img, im_name, im_dim_list) = procession_tuple
 90 |         # aaa = np.array(img)
 91 |         # with get_engine(self.trt_file) as engine, engine.create_execution_context() as context:
 92 |         if 1:
 93 |             # inputs, outputs, bindings, stream = common.allocate_buffers(self.engine)
 94 |             inference_start = time.time()
 95 |             self.inputs[0].host = np.array(img) #img[0]
 96 |             trt_outputs = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=4)
 97 |             inference_end = time.time()
 98 |             # print('inference time : %f' % (inference_end-inference_start))
 99 |             write = 0
100 |             for output, shape, anchors in zip(trt_outputs, self.output_shapes, self.yolo_anchors):
101 |                 output = output.reshape(shape)
102 |                 trt_output = torch.from_numpy(output).cuda().data
103 |                 # trt_output = trt_output.data
104 |                 # cuda_time1 = time.time()
105 |                 trt_output = predict_transform(trt_output, self.inp_dim, anchors, self.num_classes, self.use_cuda)
106 |                 # cuda_time2 = time.time()
107 |                 # print('CUDA time : %f' % (cuda_time2 - cuda_time1))
108 |                 if type(trt_output) == int:
109 |                     continue
110 | 
111 |                 if not write:
112 |                     detections = trt_output
113 |                     write = 1
114 | 
115 |                 else:
116 |                     detections = torch.cat((detections, trt_output), 1)
117 | 
118 |             o_time1 = time.time()
119 |             print('TensorRT inference time : %f' % (o_time1-inference_start))
120 |             dets = dynamic_write_results(detections, 0.5, self.num_classes, nms=True, nms_conf=0.45)
121 |             o_time2 = time.time()
122 |             print('After process time : %f' %(o_time2-o_time1))
123 |             class_list_all = []
124 |             box_list_all = []
125 |             conf_list_all = []
126 |             if not isinstance(dets,int):
127 |                 dets = dets.cpu()
128 |                 im_dim_list = torch.index_select(im_dim_list,0, dets[:, 0].long())
129 |                 scaling_factor = torch.min(self.inp_dim / im_dim_list, 1)[0].view(-1, 1)
130 |                 dets[:, [1, 3]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
131 |                 dets[:, [2, 4]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2
132 |                 dets[:, 1:5] /= scaling_factor
133 |                 for j in range(dets.shape[0]):
134 |                     dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
135 |                     dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
136 |                 boxes = dets[:, 1:5]
137 |                 scores = dets[:, 5:6]
138 |                 for k in range(len(orig_img)):
139 |                     boxes_k = boxes[dets[:,0]==k]
140 |                     scores_k = scores[dets[:,0]==k]
141 |                     class_list = []
142 |                     box_list = []
143 |                     for b in boxes_k:
144 |                         x1=int(b[0])
145 |                         x2=int(b[2])
146 |                         y1=int(b[1])
147 |                         y2=int(b[3])
148 |                         box_list.append([x1,x2,y1,y2])
149 |                         class_list.append('person')		
150 | 
151 |                     score_list = scores_k.numpy().tolist()
152 |                     s_list = []
153 |                     for s in score_list:
154 |                         s_list.append(s[0])
155 |                     box_list_all.append(box_list)
156 |                     conf_list_all.append(s_list)
157 |                     class_list_all.append(class_list)
158 | 
159 |         return (class_list_all,box_list_all,conf_list_all)            
160 | 
161 | 
162 | 
163 |     def dict_checkup(self,dict):
164 |         if 'img' not in dict:
165 |             dict['img']= ''
166 |             print('no img in dict')	
167 |         if 'data' not in dict:
168 |             dict['data']={}
169 |             print('no data in dict')
170 |         if 'info' not in dict:
171 |             dict['info']={}
172 |             print('no info in dict')	
173 | 
174 |     def process_frame(self, frame_dic):
175 |         pass
176 | 
177 |     def process_frame_batch(self, frame_dic_list):
178 |         for dic in frame_dic_list:
179 |             self.dict_checkup(dic)
180 |         
181 |         img_list = []
182 |         for dic in frame_dic_list:
183 |             img_list.append(dic['img'])
184 |         
185 |         procession_tuple = self.preparing(img_list)
186 |         # (img, orig_img, im_name, im_dim_list) = procession_tuple
187 |         (class_list_all,box_list_all,conf_list_all) = self.detection(procession_tuple)
188 |         if len(class_list_all) == 0:
189 |             for frame_dic in frame_dic_list:
190 |                 frame_dic['data']['number'] = 0
191 |                 frame_dic['data']['box_list'] = []
192 |                 frame_dic['data']['class_list'] = []
193 |                 frame_dic['data']['conf_list'] = []
194 |         else:
195 |             for i,frame_dic in enumerate(frame_dic_list):
196 |                 frame_dic['data']['number'] = len(class_list_all[i])
197 |                 frame_dic['data']['box_list'] = box_list_all[i]
198 |                 frame_dic['data']['class_list'] = class_list_all[i]
199 |                 frame_dic['data']['conf_list'] = conf_list_all[i]
200 | 
201 |         return frame_dic_list
202 | 
203 | 
204 | 
205 | 
206 | if __name__ == '__main__':
207 |     init_dict = {'trt':"yolov3-608.trt", 'use_cuda':True}
208 |     alpha_yolo3_unit = trt_yolo3_module(init_dict)
209 | 
210 |     input_dic_list = []
211 | 
212 |     img_path = './images/person.jpg'
213 |     dic = {'img':cv2.imread(img_path),'data':{},'info':{}}
214 |     input_dic_list.append(dic)
215 | 
216 |     img_path = './images/person2.jpg'
217 |     dic = {'img':cv2.imread(img_path),'data':{},'info':{}}
218 |     input_dic_list.append(dic)
219 | 
220 |     img_path = './images/person.jpg'
221 |     dic = {'img':cv2.imread(img_path),'data':{},'info':{}}
222 |     input_dic_list.append(dic)
223 | 
224 |     img_path = './images/person2.jpg'
225 |     dic = {'img':cv2.imread(img_path),'data':{},'info':{}}
226 |     input_dic_list.append(dic)
227 | 
228 |     while True:
229 |         output_dic_list = alpha_yolo3_unit.process_frame_batch(input_dic_list)
230 |         # for dic in output_dic_list:
231 |         #     img_array = dic['img']
232 |         #     drawing(img_array,dic)	
233 |         #     cv2.imshow('show',img_array)
234 |         #     cv2.waitKey(5000)
235 | 


--------------------------------------------------------------------------------
/yolov3-608.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import division
  3 | 
  4 | import torch 
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F 
  7 | from torch.autograd import Variable
  8 | import numpy as np
  9 | import sys
 10 | if '/opt/ros/kinetic/lib/python2.7/dist-packages' in sys.path:
 11 |     sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
 12 | import cv2 
 13 | #import matplotlib.pyplot as plt
 14 | try:
 15 |     from bbox import bbox_iou
 16 | except ImportError:
 17 |     from yolo.bbox import bbox_iou
 18 | 
 19 | 
 20 | def count_parameters(model):
 21 |     return sum(p.numel() for p in model.parameters())
 22 | 
 23 | def count_learnable_parameters(model):
 24 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 25 | 
 26 | def convert2cpu(matrix):
 27 |     if matrix.is_cuda:
 28 |         return torch.FloatTensor(matrix.size()).copy_(matrix)
 29 |     else:
 30 |         return matrix
 31 | 
 32 | def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
 33 |     batch_size = prediction.size(0)
 34 |     stride =  inp_dim // prediction.size(2)
 35 |     grid_size = inp_dim // stride
 36 |     bbox_attrs = 5 + num_classes
 37 |     num_anchors = len(anchors)
 38 |     
 39 |     anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
 40 | 
 41 | 
 42 | 
 43 |     prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
 44 |     prediction = prediction.transpose(1,2).contiguous()
 45 |     prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
 46 | 
 47 | 
 48 |     #Sigmoid the  centre_X, centre_Y. and object confidencce
 49 |     prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
 50 |     prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
 51 |     prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
 52 |     
 53 | 
 54 |     
 55 |     #Add the center offsets
 56 |     grid_len = np.arange(grid_size)
 57 |     a,b = np.meshgrid(grid_len, grid_len)
 58 |     
 59 |     x_offset = torch.FloatTensor(a).view(-1,1)
 60 |     y_offset = torch.FloatTensor(b).view(-1,1)
 61 |     
 62 |     if CUDA:
 63 |         x_offset = x_offset.cuda()
 64 |         y_offset = y_offset.cuda()
 65 |     
 66 |     x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
 67 |     
 68 |     prediction[:,:,:2] += x_y_offset
 69 |       
 70 |     #log space transform height and the width
 71 |     anchors = torch.FloatTensor(anchors)
 72 |     
 73 |     if CUDA:
 74 |         anchors = anchors.cuda()
 75 |     
 76 |     anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
 77 |     prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
 78 | 
 79 |     #Softmax the class scores
 80 |     prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
 81 | 
 82 |     prediction[:,:,:4] *= stride
 83 |    
 84 |     
 85 |     return prediction
 86 | 
 87 | def load_classes(namesfile):
 88 |     fp = open(namesfile, "r")
 89 |     names = fp.read().split("\n")[:-1]
 90 |     return names
 91 | 
 92 | def get_im_dim(im):
 93 |     im = cv2.imread(im)
 94 |     w,h = im.shape[1], im.shape[0]
 95 |     return w,h
 96 | 
 97 | def unique(tensor):
 98 |     tensor_np = tensor.cpu().numpy()
 99 |     unique_np = np.unique(tensor_np)
100 |     unique_tensor = torch.from_numpy(unique_np)
101 | 
102 |     tensor_res = tensor.new(unique_tensor.shape)
103 |     tensor_res.copy_(unique_tensor)
104 |     return tensor_res
105 | 
106 | def dynamic_write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
107 |     prediction_bak = prediction.clone()
108 |     dets = write_results(prediction.clone(), confidence, num_classes, nms, nms_conf)
109 |     if isinstance(dets, int):
110 |         return dets
111 | 
112 |     if dets.shape[0] > 100:
113 |         nms_conf -= 0.05
114 |         dets = write_results(prediction_bak.clone(), confidence, num_classes, nms, nms_conf)
115 | 
116 |     return dets
117 | 
118 | 
119 | def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
120 |     conf_mask = (prediction[:, :, 4] > confidence).float().float().unsqueeze(2)
121 |     prediction = prediction * conf_mask
122 | 
123 |     try:
124 |         ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
125 |     except:
126 |         return 0
127 | 
128 |     box_a = prediction.new(prediction.shape)
129 |     box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
130 |     box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
131 |     box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
132 |     box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
133 |     prediction[:,:,:4] = box_a[:,:,:4]
134 | 
135 |     batch_size = prediction.size(0)
136 | 
137 |     output = prediction.new(1, prediction.size(2) + 1)
138 |     write = False
139 |     num = 0
140 |     for ind in range(batch_size):
141 |         #select the image from the batch
142 |         image_pred = prediction[ind]
143 | 
144 |         #Get the class having maximum score, and the index of that class
145 |         #Get rid of num_classes softmax scores 
146 |         #Add the class index and the class score of class having maximum score
147 |         max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
148 |         max_conf = max_conf.float().unsqueeze(1)
149 |         max_conf_score = max_conf_score.float().unsqueeze(1)
150 |         seq = (image_pred[:,:5], max_conf, max_conf_score)
151 |         image_pred = torch.cat(seq, 1)
152 | 
153 |         #Get rid of the zero entries
154 |         non_zero_ind =  (torch.nonzero(image_pred[:,4]))
155 | 
156 |         image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
157 | 
158 |         #Get the various classes detected in the image
159 |         try:
160 |             img_classes = unique(image_pred_[:,-1])
161 |         except:
162 |             continue
163 | 
164 |         #WE will do NMS classwise
165 |         #print(img_classes)
166 |         for cls in img_classes:
167 |             if cls != 0: #0 is the person
168 |                 continue
169 |             #get the detections with one particular class
170 |             cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
171 |             class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
172 | 
173 |             image_pred_class = image_pred_[class_mask_ind].view(-1,7)
174 | 
175 |             #sort the detections such that the entry with the maximum objectness
176 |             #confidence is at the top
177 |             conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
178 |             image_pred_class = image_pred_class[conf_sort_index]
179 |             idx = image_pred_class.size(0)
180 | 
181 |             #if nms has to be done
182 |             if nms:
183 |                 #For each detection
184 |                 for i in range(idx):
185 |                     #Get the IOUs of all boxes that come after the one we are looking at 
186 |                     #in the loop
187 |                     try:
188 |                         ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
189 |                     except ValueError:
190 |                         break
191 |         
192 |                     except IndexError:
193 |                         break
194 |                     
195 |                     #Zero out all the detections that have IoU > treshhold
196 |                     iou_mask = (ious < nms_conf).float().unsqueeze(1)
197 |                     image_pred_class[i+1:] *= iou_mask       
198 |                     
199 |                     #Remove the non-zero entries
200 |                     non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
201 |                     image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
202 | 
203 |             #if nms has to be done
204 |             # if nms:
205 |             #     # Perform non-maximum suppression
206 |             #     max_detections = []
207 |             #     while image_pred_class.size(0):
208 |             #         # Get detection with highest confidence and save as max detection
209 |             #         max_detections.append(image_pred_class[0].unsqueeze(0))
210 |             #         # Stop if we're at the last detection
211 |             #         if len(image_pred_class) == 1:
212 |             #             break
213 |             #         # Get the IOUs for all boxes with lower confidence
214 |             #         ious = bbox_iou(max_detections[-1], image_pred_class[1:])
215 |             #         # Remove detections with IoU >= NMS threshold
216 |             #         image_pred_class = image_pred_class[1:][ious < nms_conf]
217 | 
218 |             #     image_pred_class = torch.cat(max_detections).data
219 | 
220 | 
221 |             #Concatenate the batch_id of the image to the detection
222 |             #this helps us identify which image does the detection correspond to 
223 |             #We use a linear straucture to hold ALL the detections from the batch
224 |             #the batch_dim is flattened
225 |             #batch is identified by extra batch column
226 | 
227 |             batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
228 |             seq = batch_ind, image_pred_class
229 |             if not write:
230 |                 output = torch.cat(seq,1)
231 |                 write = True
232 |             else:
233 |                 out = torch.cat(seq,1)
234 |                 output = torch.cat((output,out))
235 |             num += 1
236 |     
237 |     if not num:
238 |         return 0
239 | 
240 |     return output
241 | 
242 | #!/usr/bin/env python3
243 | # -*- coding: utf-8 -*-
244 | """
245 | Created on Sat Mar 24 00:12:16 2018
246 | 
247 | @author: ayooshmac
248 | """
249 | 
250 | def predict_transform_half(prediction, inp_dim, anchors, num_classes, CUDA = True):
251 |     batch_size = prediction.size(0)
252 |     stride =  inp_dim // prediction.size(2)
253 | 
254 |     bbox_attrs = 5 + num_classes
255 |     num_anchors = len(anchors)
256 |     grid_size = inp_dim // stride
257 | 
258 |     
259 |     prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
260 |     prediction = prediction.transpose(1,2).contiguous()
261 |     prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
262 |     
263 |     
264 |     #Sigmoid the  centre_X, centre_Y. and object confidencce
265 |     prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
266 |     prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
267 |     prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
268 | 
269 |     
270 |     #Add the center offsets
271 |     grid_len = np.arange(grid_size)
272 |     a,b = np.meshgrid(grid_len, grid_len)
273 |     
274 |     x_offset = torch.FloatTensor(a).view(-1,1)
275 |     y_offset = torch.FloatTensor(b).view(-1,1)
276 |     
277 |     if CUDA:
278 |         x_offset = x_offset.cuda().half()
279 |         y_offset = y_offset.cuda().half()
280 |     
281 |     x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
282 |     
283 |     prediction[:,:,:2] += x_y_offset
284 |       
285 |     #log space transform height and the width
286 |     anchors = torch.HalfTensor(anchors)
287 |     
288 |     if CUDA:
289 |         anchors = anchors.cuda()
290 |     
291 |     anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
292 |     prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
293 | 
294 |     #Softmax the class scores
295 |     prediction[:,:,5: 5 + num_classes] = nn.Softmax(-1)(Variable(prediction[:,:, 5 : 5 + num_classes])).data
296 | 
297 |     prediction[:,:,:4] *= stride
298 |     
299 |     
300 |     return prediction
301 | 
302 | 
303 | def write_results_half(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
304 |     conf_mask = (prediction[:,:,4] > confidence).half().unsqueeze(2)
305 |     prediction = prediction*conf_mask
306 |     
307 |     try:
308 |         ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
309 |     except:
310 |         return 0
311 |     
312 |     
313 |     
314 |     box_a = prediction.new(prediction.shape)
315 |     box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
316 |     box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
317 |     box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
318 |     box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
319 |     prediction[:,:,:4] = box_a[:,:,:4]
320 |     
321 |     
322 |     
323 |     batch_size = prediction.size(0)
324 |     
325 |     output = prediction.new(1, prediction.size(2) + 1)
326 |     write = False
327 |     
328 |     for ind in range(batch_size):
329 |         #select the image from the batch
330 |         image_pred = prediction[ind]
331 | 
332 |         
333 |         #Get the class having maximum score, and the index of that class
334 |         #Get rid of num_classes softmax scores 
335 |         #Add the class index and the class score of class having maximum score
336 |         max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
337 |         max_conf = max_conf.half().unsqueeze(1)
338 |         max_conf_score = max_conf_score.half().unsqueeze(1)
339 |         seq = (image_pred[:,:5], max_conf, max_conf_score)
340 |         image_pred = torch.cat(seq, 1)
341 |         
342 |         
343 |         #Get rid of the zero entries
344 |         non_zero_ind =  (torch.nonzero(image_pred[:,4]))
345 |         try:
346 |             image_pred_ = image_pred[non_zero_ind.squeeze(),:]
347 |         except:
348 |             continue
349 |         
350 |         #Get the various classes detected in the image
351 |         img_classes = unique(image_pred_[:,-1].long()).half()
352 |         
353 |         
354 |         
355 |                 
356 |         #WE will do NMS classwise
357 |         for cls in img_classes:
358 |             #get the detections with one particular class
359 |             cls_mask = image_pred_*(image_pred_[:,-1] == cls).half().unsqueeze(1)
360 |             class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
361 |             
362 | 
363 |             image_pred_class = image_pred_[class_mask_ind]
364 | 
365 |         
366 |              #sort the detections such that the entry with the maximum objectness
367 |              #confidence is at the top
368 |             conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
369 |             image_pred_class = image_pred_class[conf_sort_index]
370 |             idx = image_pred_class.size(0)
371 |             
372 |             #if nms has to be done
373 |             if nms:
374 |                 #For each detection
375 |                 for i in range(idx):
376 |                     #Get the IOUs of all boxes that come after the one we are looking at 
377 |                     #in the loop
378 |                     try:
379 |                         ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
380 |                     except ValueError:
381 |                         break
382 |         
383 |                     except IndexError:
384 |                         break
385 |                     
386 |                     #Zero out all the detections that have IoU > treshhold
387 |                     iou_mask = (ious < nms_conf).half().unsqueeze(1)
388 |                     image_pred_class[i+1:] *= iou_mask       
389 |                     
390 |                     #Remove the non-zero entries
391 |                     non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
392 |                     image_pred_class = image_pred_class[non_zero_ind]
393 |                     
394 |                     
395 |             
396 |             #Concatenate the batch_id of the image to the detection
397 |             #this helps us identify which image does the detection correspond to 
398 |             #We use a linear straucture to hold ALL the detections from the batch
399 |             #the batch_dim is flattened
400 |             #batch is identified by extra batch column
401 |             batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
402 |             seq = batch_ind, image_pred_class
403 |             
404 |             if not write:
405 |                 output = torch.cat(seq,1)
406 |                 write = True
407 |             else:
408 |                 out = torch.cat(seq,1)
409 |                 output = torch.cat((output,out))
410 |     
411 |     return output
412 | 


--------------------------------------------------------------------------------
/weight_to_onnx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | 
  3 | from __future__ import print_function
  4 | from collections import OrderedDict
  5 | import hashlib
  6 | import os.path
  7 | 
  8 | import onnx
  9 | from onnx import helper
 10 | from onnx import TensorProto
 11 | import numpy as np
 12 | 
 13 | import sys
 14 | 
 15 | class DarkNetParser(object):
 16 |     """Definition of a parser for DarkNet-based YOLOv3-608 (only tested for this topology)."""
 17 | 
 18 |     def __init__(self, supported_layers):
 19 |         """Initializes a DarkNetParser object.
 20 | 
 21 |         Keyword argument:
 22 |         supported_layers -- a string list of supported layers in DarkNet naming convention,
 23 |         parameters are only added to the class dictionary if a parsed layer is included.
 24 |         """
 25 | 
 26 |         # A list of YOLOv3 layers containing dictionaries with all layer
 27 |         # parameters:
 28 |         self.layer_configs = OrderedDict()
 29 |         self.supported_layers = supported_layers
 30 |         self.layer_counter = 0
 31 | 
 32 |     def parse_cfg_file(self, cfg_file_path):
 33 |         """Takes the yolov3.cfg file and parses it layer by layer,
 34 |         appending each layer's parameters as a dictionary to layer_configs.
 35 | 
 36 |         Keyword argument:
 37 |         cfg_file_path -- path to the yolov3.cfg file as string
 38 |         """
 39 |         with open(cfg_file_path, 'rb') as cfg_file:
 40 |             remainder = cfg_file.read()
 41 |             while remainder is not None:
 42 |                 layer_dict, layer_name, remainder = self._next_layer(remainder)
 43 |                 if layer_dict is not None:
 44 |                     self.layer_configs[layer_name] = layer_dict
 45 |         return self.layer_configs
 46 | 
 47 |     def _next_layer(self, remainder):
 48 |         """Takes in a string and segments it by looking for DarkNet delimiters.
 49 |         Returns the layer parameters and the remaining string after the last delimiter.
 50 |         Example for the first Conv layer in yolo.cfg ...
 51 | 
 52 |         [convolutional]
 53 |         batch_normalize=1
 54 |         filters=32
 55 |         size=3
 56 |         stride=1
 57 |         pad=1
 58 |         activation=leaky
 59 | 
 60 |         ... becomes the following layer_dict return value:
 61 |         {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32,
 62 |         'batch_normalize': 1, 'type': 'convolutional', 'size': 3}.
 63 | 
 64 |         '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg
 65 |         are returned as the next remainder.
 66 | 
 67 |         Keyword argument:
 68 |         remainder -- a string with all raw text after the previously parsed layer
 69 |         """
 70 |         remainder = remainder.split('[', 1)
 71 |         if len(remainder) == 2:
 72 |             remainder = remainder[1]
 73 |         else:
 74 |             return None, None, None
 75 |         remainder = remainder.split(']', 1)
 76 |         if len(remainder) == 2:
 77 |             layer_type, remainder = remainder
 78 |         else:
 79 |             return None, None, None
 80 |         if remainder.replace(' ', '')[0] == '#':
 81 |             remainder = remainder.split('\n', 1)[1]
 82 | 
 83 |         layer_param_block, remainder = remainder.split('\n\n', 1)
 84 |         layer_param_lines = layer_param_block.split('\n')[1:]
 85 |         layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type
 86 |         layer_dict = dict(type=layer_type)
 87 |         if layer_type in self.supported_layers:
 88 |             for param_line in layer_param_lines:
 89 |                 if param_line[0] == '#':
 90 |                     continue
 91 |                 param_type, param_value = self._parse_params(param_line)
 92 |                 layer_dict[param_type] = param_value
 93 |         self.layer_counter += 1
 94 |         return layer_dict, layer_name, remainder
 95 | 
 96 |     def _parse_params(self, param_line):
 97 |         """Identifies the parameters contained in one of the cfg file and returns
 98 |         them in the required format for each parameter type, e.g. as a list, an int or a float.
 99 | 
100 |         Keyword argument:
101 |         param_line -- one parsed line within a layer block
102 |         """
103 |         param_line = param_line.replace(' ', '')
104 |         param_type, param_value_raw = param_line.split('=')
105 |         param_value = None
106 |         if param_type == 'layers':
107 |             layer_indexes = list()
108 |             for index in param_value_raw.split(','):
109 |                 layer_indexes.append(int(index))
110 |             param_value = layer_indexes
111 |         elif isinstance(param_value_raw, str) and not param_value_raw.isalpha():
112 |             condition_param_value_positive = param_value_raw.isdigit()
113 |             condition_param_value_negative = param_value_raw[0] == '-' and \
114 |                 param_value_raw[1:].isdigit()
115 |             if condition_param_value_positive or condition_param_value_negative:
116 |                 param_value = int(param_value_raw)
117 |             else:
118 |                 param_value = float(param_value_raw)
119 |         else:
120 |             param_value = str(param_value_raw)
121 |         return param_type, param_value
122 | 
123 | 
124 | class MajorNodeSpecs(object):
125 |     """Helper class used to store the names of ONNX output names,
126 |     corresponding to the output of a DarkNet layer and its output channels.
127 |     Some DarkNet layers are not created and there is no corresponding ONNX node,
128 |     but we still need to track them in order to set up skip connections.
129 |     """
130 | 
131 |     def __init__(self, name, channels):
132 |         """ Initialize a MajorNodeSpecs object.
133 | 
134 |         Keyword arguments:
135 |         name -- name of the ONNX node
136 |         channels -- number of output channels of this node
137 |         """
138 |         self.name = name
139 |         self.channels = channels
140 |         self.created_onnx_node = False
141 |         if name is not None and isinstance(channels, int) and channels > 0:
142 |             self.created_onnx_node = True
143 | 
144 | 
145 | class ConvParams(object):
146 |     """Helper class to store the hyper parameters of a Conv layer,
147 |     including its prefix name in the ONNX graph and the expected dimensions
148 |     of weights for convolution, bias, and batch normalization.
149 | 
150 |     Additionally acts as a wrapper for generating safe names for all
151 |     weights, checking on feasible combinations.
152 |     """
153 | 
154 |     def __init__(self, node_name, batch_normalize, conv_weight_dims):
155 |         """Constructor based on the base node name (e.g. 101_convolutional), the batch
156 |         normalization setting, and the convolutional weights shape.
157 | 
158 |         Keyword arguments:
159 |         node_name -- base name of this YOLO convolutional layer
160 |         batch_normalize -- bool value if batch normalization is used
161 |         conv_weight_dims -- the dimensions of this layer's convolutional weights
162 |         """
163 |         self.node_name = node_name
164 |         self.batch_normalize = batch_normalize
165 |         assert len(conv_weight_dims) == 4
166 |         self.conv_weight_dims = conv_weight_dims
167 | 
168 |     def generate_param_name(self, param_category, suffix):
169 |         """Generates a name based on two string inputs,
170 |         and checks if the combination is valid."""
171 |         assert suffix
172 |         assert param_category in ['bn', 'conv']
173 |         assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias'])
174 |         if param_category == 'bn':
175 |             assert self.batch_normalize
176 |             assert suffix in ['scale', 'bias', 'mean', 'var']
177 |         elif param_category == 'conv':
178 |             assert suffix in ['weights', 'bias']
179 |             if suffix == 'bias':
180 |                 assert not self.batch_normalize
181 |         param_name = self.node_name + '_' + param_category + '_' + suffix
182 |         return param_name
183 | 
184 | 
185 | class WeightLoader(object):
186 |     """Helper class used for loading the serialized weights of a binary file stream
187 |     and returning the initializers and the input tensors required for populating
188 |     the ONNX graph with weights.
189 |     """
190 | 
191 |     def __init__(self, weights_file_path):
192 |         """Initialized with a path to the YOLOv3 .weights file.
193 | 
194 |         Keyword argument:
195 |         weights_file_path -- path to the weights file.
196 |         """
197 |         self.weights_file = self._open_weights_file(weights_file_path)
198 | 
199 |     def load_conv_weights(self, conv_params):
200 |         """Returns the initializers with weights from the weights file and
201 |         the input tensors of a convolutional layer for all corresponding ONNX nodes.
202 | 
203 |         Keyword argument:
204 |         conv_params -- a ConvParams object
205 |         """
206 |         initializer = list()
207 |         inputs = list()
208 |         if conv_params.batch_normalize:
209 |             bias_init, bias_input = self._create_param_tensors(
210 |                 conv_params, 'bn', 'bias')
211 |             bn_scale_init, bn_scale_input = self._create_param_tensors(
212 |                 conv_params, 'bn', 'scale')
213 |             bn_mean_init, bn_mean_input = self._create_param_tensors(
214 |                 conv_params, 'bn', 'mean')
215 |             bn_var_init, bn_var_input = self._create_param_tensors(
216 |                 conv_params, 'bn', 'var')
217 |             initializer.extend(
218 |                 [bn_scale_init, bias_init, bn_mean_init, bn_var_init])
219 |             inputs.extend([bn_scale_input, bias_input,
220 |                            bn_mean_input, bn_var_input])
221 |         else:
222 |             bias_init, bias_input = self._create_param_tensors(
223 |                 conv_params, 'conv', 'bias')
224 |             initializer.append(bias_init)
225 |             inputs.append(bias_input)
226 |         conv_init, conv_input = self._create_param_tensors(
227 |             conv_params, 'conv', 'weights')
228 |         initializer.append(conv_init)
229 |         inputs.append(conv_input)
230 |         return initializer, inputs
231 | 
232 |     def _open_weights_file(self, weights_file_path):
233 |         """Opens a YOLOv3 DarkNet file stream and skips the header.
234 | 
235 |         Keyword argument:
236 |         weights_file_path -- path to the weights file.
237 |         """
238 |         weights_file = open(weights_file_path, 'rb')
239 |         length_header = 5
240 |         np.ndarray(
241 |             shape=(length_header, ), dtype='int32', buffer=weights_file.read(
242 |                 length_header * 4))
243 |         return weights_file
244 | 
245 |     def _create_param_tensors(self, conv_params, param_category, suffix):
246 |         """Creates the initializers with weights from the weights file together with
247 |         the input tensors.
248 | 
249 |         Keyword arguments:
250 |         conv_params -- a ConvParams object
251 |         param_category -- the category of parameters to be created ('bn' or 'conv')
252 |         suffix -- a string determining the sub-type of above param_category (e.g.,
253 |         'weights' or 'bias')
254 |         """
255 |         param_name, param_data, param_data_shape = self._load_one_param_type(
256 |             conv_params, param_category, suffix)
257 | 
258 |         initializer_tensor = helper.make_tensor(
259 |             param_name, TensorProto.FLOAT, param_data_shape, param_data)
260 |         input_tensor = helper.make_tensor_value_info(
261 |             param_name, TensorProto.FLOAT, param_data_shape)
262 |         return initializer_tensor, input_tensor
263 | 
264 |     def _load_one_param_type(self, conv_params, param_category, suffix):
265 |         """Deserializes the weights from a file stream in the DarkNet order.
266 | 
267 |         Keyword arguments:
268 |         conv_params -- a ConvParams object
269 |         param_category -- the category of parameters to be created ('bn' or 'conv')
270 |         suffix -- a string determining the sub-type of above param_category (e.g.,
271 |         'weights' or 'bias')
272 |         """
273 |         param_name = conv_params.generate_param_name(param_category, suffix)
274 |         channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims
275 |         if param_category == 'bn':
276 |             param_shape = [channels_out]
277 |         elif param_category == 'conv':
278 |             if suffix == 'weights':
279 |                 param_shape = [channels_out, channels_in, filter_h, filter_w]
280 |             elif suffix == 'bias':
281 |                 param_shape = [channels_out]
282 |         param_size = np.product(np.array(param_shape))
283 |         param_data = np.ndarray(
284 |             shape=param_shape,
285 |             dtype='float32',
286 |             buffer=self.weights_file.read(param_size * 4))
287 |         param_data = param_data.flatten().astype(float)
288 |         return param_name, param_data, param_shape
289 | 
290 | 
291 | class GraphBuilderONNX(object):
292 |     """Class for creating an ONNX graph from a previously generated list of layer dictionaries."""
293 | 
294 |     def __init__(self, output_tensors):
295 |         """Initialize with all DarkNet default parameters used creating YOLOv3,
296 |         and specify the output tensors as an OrderedDict for their output dimensions
297 |         with their names as keys.
298 | 
299 |         Keyword argument:
300 |         output_tensors -- the output tensors as an OrderedDict containing the keys'
301 |         output dimensions
302 |         """
303 |         self.output_tensors = output_tensors
304 |         self._nodes = list()
305 |         self.graph_def = None
306 |         self.input_tensor = None
307 |         self.epsilon_bn = 1e-5
308 |         self.momentum_bn = 0.99
309 |         self.alpha_lrelu = 0.1
310 |         self.param_dict = OrderedDict()
311 |         self.major_node_specs = list()
312 |         self.batch_size = 1
313 | 
314 |     def build_onnx_graph(
315 |             self,
316 |             layer_configs,
317 |             weights_file_path,
318 |             verbose=True):
319 |         """Iterate over all layer configs (parsed from the DarkNet representation
320 |         of YOLOv3-608), create an ONNX graph, populate it with weights from the weights
321 |         file and return the graph definition.
322 | 
323 |         Keyword arguments:
324 |         layer_configs -- an OrderedDict object with all parsed layers' configurations
325 |         weights_file_path -- location of the weights file
326 |         verbose -- toggles if the graph is printed after creation (default: True)
327 |         """
328 |         for layer_name in layer_configs.keys():
329 |             layer_dict = layer_configs[layer_name]
330 |             major_node_specs = self._make_onnx_node(layer_name, layer_dict)
331 |             if major_node_specs.name is not None:
332 |                 self.major_node_specs.append(major_node_specs)
333 |         outputs = list()
334 |         for tensor_name in self.output_tensors.keys():
335 |             output_dims = [self.batch_size, ] + \
336 |                 self.output_tensors[tensor_name]
337 |             output_tensor = helper.make_tensor_value_info(
338 |                 tensor_name, TensorProto.FLOAT, output_dims)
339 |             outputs.append(output_tensor)
340 |         inputs = [self.input_tensor]
341 |         weight_loader = WeightLoader(weights_file_path)
342 |         initializer = list()
343 |         for layer_name in self.param_dict.keys():
344 |             _, layer_type = layer_name.split('_', 1)
345 |             conv_params = self.param_dict[layer_name]
346 |             assert layer_type == 'convolutional'
347 |             initializer_layer, inputs_layer = weight_loader.load_conv_weights(
348 |                 conv_params)
349 |             initializer.extend(initializer_layer)
350 |             inputs.extend(inputs_layer)
351 |         del weight_loader
352 |         self.graph_def = helper.make_graph(
353 |             nodes=self._nodes,
354 |             name='YOLOv3-608',
355 |             inputs=inputs,
356 |             outputs=outputs,
357 |             initializer=initializer
358 |         )
359 |         if verbose:
360 |             print(helper.printable_graph(self.graph_def))
361 |         model_def = helper.make_model(self.graph_def,
362 |                                       producer_name='NVIDIA TensorRT sample')
363 |         return model_def
364 | 
365 |     def _make_onnx_node(self, layer_name, layer_dict):
366 |         """Take in a layer parameter dictionary, choose the correct function for
367 |         creating an ONNX node and store the information important to graph creation
368 |         as a MajorNodeSpec object.
369 | 
370 |         Keyword arguments:
371 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
372 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
373 |         """
374 |         layer_type = layer_dict['type']
375 |         if self.input_tensor is None:
376 |             if layer_type == 'net':
377 |                 major_node_output_name, major_node_output_channels = self._make_input_tensor(
378 |                     layer_name, layer_dict)
379 |                 major_node_specs = MajorNodeSpecs(major_node_output_name,
380 |                                                   major_node_output_channels)
381 |             else:
382 |                 raise ValueError('The first node has to be of type "net".')
383 |         else:
384 |             node_creators = dict()
385 |             node_creators['convolutional'] = self._make_conv_node
386 |             node_creators['shortcut'] = self._make_shortcut_node
387 |             node_creators['route'] = self._make_route_node
388 |             node_creators['upsample'] = self._make_upsample_node
389 | 
390 |             if layer_type in node_creators.keys():
391 |                 major_node_output_name, major_node_output_channels = \
392 |                     node_creators[layer_type](layer_name, layer_dict)
393 |                 major_node_specs = MajorNodeSpecs(major_node_output_name,
394 |                                                   major_node_output_channels)
395 |             else:
396 |                 print(
397 |                     'Layer of type %s not supported, skipping ONNX node generation.' %
398 |                     layer_type)
399 |                 major_node_specs = MajorNodeSpecs(layer_name,
400 |                                                   None)
401 |         return major_node_specs
402 | 
403 |     def _make_input_tensor(self, layer_name, layer_dict):
404 |         """Create an ONNX input tensor from a 'net' layer and store the batch size.
405 | 
406 |         Keyword arguments:
407 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
408 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
409 |         """
410 |         batch_size = layer_dict['batch']
411 |         channels = layer_dict['channels']
412 |         height = layer_dict['height']
413 |         width = layer_dict['width']
414 |         self.batch_size = batch_size
415 |         input_tensor = helper.make_tensor_value_info(
416 |             str(layer_name), TensorProto.FLOAT, [
417 |                 batch_size, channels, height, width])
418 |         self.input_tensor = input_tensor
419 |         return layer_name, channels
420 | 
421 |     def _get_previous_node_specs(self, target_index=-1):
422 |         """Get a previously generated ONNX node (skip those that were not generated).
423 |         Target index can be passed for jumping to a specific index.
424 | 
425 |         Keyword arguments:
426 |         target_index -- optional for jumping to a specific index (default: -1 for jumping
427 |         to previous element)
428 |         """
429 |         previous_node = None
430 |         for node in self.major_node_specs[target_index::-1]:
431 |             if node.created_onnx_node:
432 |                 previous_node = node
433 |                 break
434 |         assert previous_node is not None
435 |         return previous_node
436 | 
437 |     def _make_conv_node(self, layer_name, layer_dict):
438 |         """Create an ONNX Conv node with optional batch normalization and
439 |         activation nodes.
440 | 
441 |         Keyword arguments:
442 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
443 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
444 |         """
445 |         previous_node_specs = self._get_previous_node_specs()
446 |         inputs = [previous_node_specs.name]
447 |         previous_channels = previous_node_specs.channels
448 |         kernel_size = layer_dict['size']
449 |         stride = layer_dict['stride']
450 |         filters = layer_dict['filters']
451 |         batch_normalize = False
452 |         if 'batch_normalize' in layer_dict.keys(
453 |         ) and layer_dict['batch_normalize'] == 1:
454 |             batch_normalize = True
455 | 
456 |         kernel_shape = [kernel_size, kernel_size]
457 |         weights_shape = [filters, previous_channels] + kernel_shape
458 |         conv_params = ConvParams(layer_name, batch_normalize, weights_shape)
459 | 
460 |         strides = [stride, stride]
461 |         dilations = [1, 1]
462 |         weights_name = conv_params.generate_param_name('conv', 'weights')
463 |         inputs.append(weights_name)
464 |         if not batch_normalize:
465 |             bias_name = conv_params.generate_param_name('conv', 'bias')
466 |             inputs.append(bias_name)
467 | 
468 |         conv_node = helper.make_node(
469 |             'Conv',
470 |             inputs=inputs,
471 |             outputs=[layer_name],
472 |             kernel_shape=kernel_shape,
473 |             strides=strides,
474 |             auto_pad='SAME_LOWER',
475 |             dilations=dilations,
476 |             name=layer_name
477 |         )
478 |         self._nodes.append(conv_node)
479 |         inputs = [layer_name]
480 |         layer_name_output = layer_name
481 | 
482 |         if batch_normalize:
483 |             layer_name_bn = layer_name + '_bn'
484 |             bn_param_suffixes = ['scale', 'bias', 'mean', 'var']
485 |             for suffix in bn_param_suffixes:
486 |                 bn_param_name = conv_params.generate_param_name('bn', suffix)
487 |                 inputs.append(bn_param_name)
488 |             batchnorm_node = helper.make_node(
489 |                 'BatchNormalization',
490 |                 inputs=inputs,
491 |                 outputs=[layer_name_bn],
492 |                 epsilon=self.epsilon_bn,
493 |                 momentum=self.momentum_bn,
494 |                 name=layer_name_bn
495 |             )
496 |             self._nodes.append(batchnorm_node)
497 |             inputs = [layer_name_bn]
498 |             layer_name_output = layer_name_bn
499 | 
500 |         if layer_dict['activation'] == 'leaky':
501 |             layer_name_lrelu = layer_name + '_lrelu'
502 | 
503 |             lrelu_node = helper.make_node(
504 |                 'LeakyRelu',
505 |                 inputs=inputs,
506 |                 outputs=[layer_name_lrelu],
507 |                 name=layer_name_lrelu,
508 |                 alpha=self.alpha_lrelu
509 |             )
510 |             self._nodes.append(lrelu_node)
511 |             inputs = [layer_name_lrelu]
512 |             layer_name_output = layer_name_lrelu
513 |         elif layer_dict['activation'] == 'linear':
514 |             pass
515 |         else:
516 |             print('Activation not supported.')
517 | 
518 |         self.param_dict[layer_name] = conv_params
519 |         return layer_name_output, filters
520 | 
521 |     def _make_shortcut_node(self, layer_name, layer_dict):
522 |         """Create an ONNX Add node with the shortcut properties from
523 |         the DarkNet-based graph.
524 | 
525 |         Keyword arguments:
526 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
527 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
528 |         """
529 |         shortcut_index = layer_dict['from']
530 |         activation = layer_dict['activation']
531 |         assert activation == 'linear'
532 | 
533 |         first_node_specs = self._get_previous_node_specs()
534 |         second_node_specs = self._get_previous_node_specs(
535 |             target_index=shortcut_index)
536 |         assert first_node_specs.channels == second_node_specs.channels
537 |         channels = first_node_specs.channels
538 |         inputs = [first_node_specs.name, second_node_specs.name]
539 |         shortcut_node = helper.make_node(
540 |             'Add',
541 |             inputs=inputs,
542 |             outputs=[layer_name],
543 |             name=layer_name,
544 |         )
545 |         self._nodes.append(shortcut_node)
546 |         return layer_name, channels
547 | 
548 |     def _make_route_node(self, layer_name, layer_dict):
549 |         """If the 'layers' parameter from the DarkNet configuration is only one index, continue
550 |         node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node
551 |         with the route properties from the DarkNet-based graph.
552 | 
553 |         Keyword arguments:
554 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
555 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
556 |         """
557 |         route_node_indexes = layer_dict['layers']
558 |         if len(route_node_indexes) == 1:
559 |             split_index = route_node_indexes[0]
560 |             assert split_index < 0
561 |             # Increment by one because we skipped the YOLO layer:
562 |             split_index += 1
563 |             self.major_node_specs = self.major_node_specs[:split_index]
564 |             layer_name = None
565 |             channels = None
566 |         else:
567 |             inputs = list()
568 |             channels = 0
569 |             for index in route_node_indexes:
570 |                 if index > 0:
571 |                     # Increment by one because we count the input as a node (DarkNet
572 |                     # does not)
573 |                     index += 1
574 |                 route_node_specs = self._get_previous_node_specs(
575 |                     target_index=index)
576 |                 inputs.append(route_node_specs.name)
577 |                 channels += route_node_specs.channels
578 |             assert inputs
579 |             assert channels > 0
580 | 
581 |             route_node = helper.make_node(
582 |                 'Concat',
583 |                 axis=1,
584 |                 inputs=inputs,
585 |                 outputs=[layer_name],
586 |                 name=layer_name,
587 |             )
588 |             self._nodes.append(route_node)
589 |         return layer_name, channels
590 | 
591 |     def _make_upsample_node(self, layer_name, layer_dict):
592 |         """Create an ONNX Upsample node with the properties from
593 |         the DarkNet-based graph.
594 | 
595 |         Keyword arguments:
596 |         layer_name -- the layer's name (also the corresponding key in layer_configs)
597 |         layer_dict -- a layer parameter dictionary (one element of layer_configs)
598 |         """
599 |         upsample_factor = float(layer_dict['stride'])
600 |         previous_node_specs = self._get_previous_node_specs()
601 |         inputs = [previous_node_specs.name]
602 |         channels = previous_node_specs.channels
603 |         assert channels > 0
604 |         upsample_node = helper.make_node(
605 |             'Upsample',
606 |             mode='nearest',
607 |             # For ONNX versions <0.7.0, Upsample nodes accept different parameters than 'scales':
608 |             scales=[1.0, 1.0, upsample_factor, upsample_factor],
609 |             inputs=inputs,
610 |             outputs=[layer_name],
611 |             name=layer_name,
612 |         )
613 |         self._nodes.append(upsample_node)
614 |         return layer_name, channels
615 | 
616 | def main():
617 |     """Run the DarkNet-to-ONNX conversion for YOLOv3-608."""
618 |     # Have to use python 2 due to hashlib compatibility
619 |     if sys.version_info[0] > 2:
620 |         raise Exception("This is script is only compatible with python2, please re-run this script \
621 |     with python2. The rest of this sample can be run with either version of python")
622 | 
623 |     cfg_file_path = "yolov3-608.cfg"
624 | 
625 |     # These are the only layers DarkNetParser will extract parameters from. The three layers of
626 |     # type 'yolo' are not parsed in detail because they are included in the post-processing later:
627 |     supported_layers = ['net', 'convolutional', 'shortcut', 'route', 'upsample']
628 | 
629 |     # Create a DarkNetParser object, and the use it to generate an OrderedDict with all
630 |     # layer's configs from the cfg file:
631 |     parser = DarkNetParser(supported_layers)
632 |     layer_configs = parser.parse_cfg_file(cfg_file_path)
633 |     # We do not need the parser anymore after we got layer_configs:
634 |     del parser
635 | 
636 |     # In above layer_config, there are three outputs that we need to know the output
637 |     # shape of (in CHW format):
638 |     output_tensor_dims = OrderedDict()
639 |     #yolo-v3(608*608)
640 |     output_tensor_dims['082_convolutional'] = [255, 19, 19]
641 |     output_tensor_dims['094_convolutional'] = [255, 38, 38]
642 |     output_tensor_dims['106_convolutional'] = [255, 76, 76]
643 |     #yolo-v3(416*416)
644 |     # output_tensor_dims['082_convolutional'] = [255, 13, 13]
645 |     # output_tensor_dims['094_convolutional'] = [255, 26, 26]
646 |     # output_tensor_dims['106_convolutional'] = [255, 52, 52]
647 | 
648 |     # Create a GraphBuilderONNX object with the known output tensor dimensions:
649 |     builder = GraphBuilderONNX(output_tensor_dims)
650 | 
651 |     weights_file_path = "yolov3-608.weights"
652 | 
653 |     # Now generate an ONNX graph with weights from the previously parsed layer configurations
654 |     # and the weights file:
655 |     yolov3_model_def = builder.build_onnx_graph(
656 |         layer_configs=layer_configs,
657 |         weights_file_path=weights_file_path,
658 |         verbose=True)
659 |     # Once we have the model definition, we do not need the builder anymore:
660 |     del builder
661 | 
662 |     # Perform a sanity check on the ONNX model definition:
663 |     onnx.checker.check_model(yolov3_model_def)
664 | 
665 |     # Serialize the generated ONNX graph to this file:
666 |     output_file_path = 'yolov3-608.onnx'
667 |     onnx.save(yolov3_model_def, output_file_path)
668 | 
669 | if __name__ == '__main__':
670 |     main()


--------------------------------------------------------------------------------