├── README.md
├── darknet2pb
    ├── convert_weights.py
    ├── convert_weights_pb.py
    ├── README.md
    ├── CODE_OF_CONDUCT.md
    ├── demo.py
    ├── yolo_v3_tiny.py
    ├── yolo_v3.py
    ├── utils.py
    └── LICENSE
├── yolo-cluster-anchors.py
├── merge_bn.py
└── trt_yolov3_tiny.cpp


/README.md:
--------------------------------------------------------------------------------
1 | # cv_tools
2 | 
3 | - yolo-cluster-anchors.py 目标检测算法之YOLO系列算法的Anchor聚类，算法原理和使用方法见：https://blog.csdn.net/just_sort/article/details/103386047 。
4 | - darknet2pb 这个文件夹是将https://github.com/mystic123/tensorflow-yolo-v3 这个darknet2pb的工具支持了深度可分离卷积，即支持以mobilenet做yolo的backbone的转换工具。在mystic123的基础上只改动了yolov3-tiny.py，想自己改哪些卷积层按照我的方式添加和修改就ok了。
5 | - merge_bn.py 将Caffe模型的BN层进行折叠，提高网络运行速度。
6 | - trt_yolov3_tiny.cpp 使用TensorRT6.0结合VS2015完成对YOLOv3-Tiny的推理。


--------------------------------------------------------------------------------
/darknet2pb/convert_weights.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | import yolo_v3
 6 | import yolo_v3_tiny
 7 | 
 8 | from utils import load_coco_names, load_weights
 9 | 
10 | FLAGS = tf.app.flags.FLAGS
11 | 
12 | tf.app.flags.DEFINE_string(
13 |     'class_names', 'coco.names', 'File with class names')
14 | tf.app.flags.DEFINE_string(
15 |     'weights_file', 'yolov3.weights', 'Binary file with detector weights')
16 | tf.app.flags.DEFINE_string(
17 |     'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC')
18 | tf.app.flags.DEFINE_bool(
19 |     'tiny', False, 'Use tiny version of YOLOv3')
20 | tf.app.flags.DEFINE_bool(
21 |     'spp', False, 'Use SPP version of YOLOv3')
22 | tf.app.flags.DEFINE_string(
23 |     'ckpt_file', './saved_model/model.ckpt', 'Chceckpoint file')
24 | 
25 | 
26 | def main(argv=None):
27 |     if FLAGS.tiny:
28 |         model = yolo_v3_tiny.yolo_v3_tiny
29 |     elif FLAGS.spp:
30 |         model = yolo_v3.yolo_v3_spp
31 |     else:
32 |         model = yolo_v3.yolo_v3
33 | 
34 |     classes = load_coco_names(FLAGS.class_names)
35 | 
36 |     # placeholder for detector inputs
37 |     # any size > 320 will work here
38 |     inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
39 | 
40 |     with tf.variable_scope('detector'):
41 |         detections = model(inputs, len(classes),
42 |                            data_format=FLAGS.data_format)
43 |         load_ops = load_weights(tf.global_variables(
44 |             scope='detector'), FLAGS.weights_file)
45 | 
46 |     saver = tf.train.Saver(tf.global_variables(scope='detector'))
47 | 
48 |     with tf.Session() as sess:
49 |         sess.run(load_ops)
50 | 
51 |         save_path = saver.save(sess, save_path=FLAGS.ckpt_file)
52 |         print('Model saved in path: {}'.format(save_path))
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     tf.app.run()
57 | 


--------------------------------------------------------------------------------
/darknet2pb/convert_weights_pb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | import yolo_v3
 6 | import yolo_v3_tiny
 7 | from PIL import Image, ImageDraw
 8 | 
 9 | from utils import load_weights, load_coco_names, detections_boxes, freeze_graph
10 | 
11 | FLAGS = tf.app.flags.FLAGS
12 | 
13 | tf.app.flags.DEFINE_string(
14 |     'class_names', 'coco.names', 'File with class names')
15 | tf.app.flags.DEFINE_string(
16 |     'weights_file', 'yolov3.weights', 'Binary file with detector weights')
17 | tf.app.flags.DEFINE_string(
18 |     'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC')
19 | tf.app.flags.DEFINE_string(
20 |     'output_graph', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model output path')
21 | 
22 | tf.app.flags.DEFINE_bool(
23 |     'tiny', False, 'Use tiny version of YOLOv3')
24 | tf.app.flags.DEFINE_bool(
25 |     'spp', False, 'Use SPP version of YOLOv3')
26 | tf.app.flags.DEFINE_integer(
27 |     'size', 416, 'Image size')
28 | 
29 | 
30 | 
31 | def main(argv=None):
32 |     if FLAGS.tiny:
33 |         model = yolo_v3_tiny.yolo_v3_tiny
34 |     elif FLAGS.spp:
35 |         model = yolo_v3.yolo_v3_spp
36 |     else:
37 |         model = yolo_v3.yolo_v3
38 | 
39 |     classes = load_coco_names(FLAGS.class_names)
40 | 
41 |     # placeholder for detector inputs
42 |     inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs")
43 | 
44 |     with tf.variable_scope('detector'):
45 |         detections = model(inputs, len(classes), data_format=FLAGS.data_format)
46 |         load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file)
47 | 
48 |     # Sets the output nodes in the current session
49 |     boxes = detections_boxes(detections)
50 | 
51 |     with tf.Session() as sess:
52 |         sess.run(load_ops)
53 |         freeze_graph(sess, FLAGS.output_graph)
54 | 
55 | if __name__ == '__main__':
56 |     tf.app.run()
57 | 


--------------------------------------------------------------------------------
/darknet2pb/README.md:
--------------------------------------------------------------------------------
 1 | # tensorflow-yolo-v3
 2 | 
 3 | Implementation of YOLO v3 object detector in Tensorflow (TF-Slim). Full tutorial can be found [here](https://medium.com/@pawekapica_31302/implementing-yolo-v3-in-tensorflow-tf-slim-c3c55ff59dbe).
 4 | 
 5 | Tested on Python 3.5, Tensorflow 1.11.0 on Ubuntu 16.04.
 6 | 
 7 | ## Todo list:
 8 | - [x] YOLO v3 architecture
 9 | - [x] Basic working demo
10 | - [x] Weights converter (util for exporting loaded COCO weights as TF checkpoint)
11 | - [ ] Training pipeline
12 | - [ ] More backends
13 | 
14 | ## How to run the demo:
15 | To run demo type this in the command line:
16 | 
17 | 1. Download COCO class names file: `wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names`
18 | 2. Download and convert model weights:    
19 |     1. Download binary file with desired weights: 
20 |         1. Full weights: `wget https://pjreddie.com/media/files/yolov3.weights`
21 |         1. Tiny weights: `wget https://pjreddie.com/media/files/yolov3-tiny.weights` 
22 |         1. SPP weights: `wget https://pjreddie.com/media/files/yolov3-spp.weights` 
23 |     2. Run `python ./convert_weights.py` and `python ./convert_weights_pb.py`        
24 | 3. Run `python ./demo.py --input_img <path-to-image> --output_img <name-of-output-image> --frozen_model <path-to-frozen-model>`
25 | 
26 | 
27 | ####Optional Flags
28 | 1. convert_weights:
29 |     1. `--class_names`
30 |         1. Path to the class names file
31 |     2. `--weights_file`
32 |         1. Path to the desired weights file
33 |     3. `--data_format`
34 |         1.  `NCHW` (gpu only) or `NHWC`
35 |     4. `--tiny`
36 |         1. Use yolov3-tiny
37 |     5. `--spp`
38 |         1. Use yolov3-spp
39 |     6. `--ckpt_file`
40 |         1. Output checkpoint file
41 | 2. convert_weights_pb.py:
42 |     1. `--class_names`
43 |             1. Path to the class names file
44 |     2. `--weights_file`
45 |         1. Path to the desired weights file    
46 |     3. `--data_format`
47 |         1.  `NCHW` (gpu only) or `NHWC`
48 |     4. `--tiny`
49 |         1. Use yolov3-tiny
50 |     5. `--spp`
51 |         1. Use yolov3-spp
52 |     6. `--output_graph`
53 |         1. Location to write the output .pb graph to
54 | 3. demo.py
55 |     1. `--class_names`
56 |         1. Path to the class names file
57 |     2. `--weights_file`
58 |         1. Path to the desired weights file
59 |     3. `--data_format`
60 |         1.  `NCHW` (gpu only) or `NHWC`
61 |     4. `--ckpt_file`
62 |         1. Path to the checkpoint file
63 |     5. `--frozen_model`
64 |         1. Path to the frozen model
65 |     6. `--conf_threshold`
66 |         1. Desired confidence threshold
67 |     7. `--iou_threshold`
68 |         1. Desired iou threshold
69 |     8. `--gpu_memory_fraction`
70 |         1. Fraction of gpu memory to work with
71 | 


--------------------------------------------------------------------------------
/darknet2pb/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at pawel.kapica@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/darknet2pb/demo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from PIL import Image
  6 | import time
  7 | 
  8 | import yolo_v3
  9 | import yolo_v3_tiny
 10 | 
 11 | from utils import load_coco_names, draw_boxes, get_boxes_and_inputs, get_boxes_and_inputs_pb, non_max_suppression, \
 12 |                   load_graph, letter_box_image
 13 | 
 14 | FLAGS = tf.app.flags.FLAGS
 15 | 
 16 | tf.app.flags.DEFINE_string(
 17 |     'input_img', '', 'Input image')
 18 | tf.app.flags.DEFINE_string(
 19 |     'output_img', '', 'Output image')
 20 | tf.app.flags.DEFINE_string(
 21 |     'class_names', 'coco.names', 'File with class names')
 22 | tf.app.flags.DEFINE_string(
 23 |     'weights_file', 'yolov3.weights', 'Binary file with detector weights')
 24 | tf.app.flags.DEFINE_string(
 25 |     'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC')
 26 | tf.app.flags.DEFINE_string(
 27 |     'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file')
 28 | tf.app.flags.DEFINE_string(
 29 |     'frozen_model', '', 'Frozen tensorflow protobuf model')
 30 | tf.app.flags.DEFINE_bool(
 31 |     'tiny', False, 'Use tiny version of YOLOv3')
 32 | tf.app.flags.DEFINE_bool(
 33 |     'spp', False, 'Use SPP version of YOLOv3')
 34 | 
 35 | tf.app.flags.DEFINE_integer(
 36 |     'size', 416, 'Image size')
 37 | 
 38 | tf.app.flags.DEFINE_float(
 39 |     'conf_threshold', 0.5, 'Confidence threshold')
 40 | tf.app.flags.DEFINE_float(
 41 |     'iou_threshold', 0.4, 'IoU threshold')
 42 | 
 43 | tf.app.flags.DEFINE_float(
 44 |     'gpu_memory_fraction', 1.0, 'Gpu memory fraction to use')
 45 | 
 46 | def main(argv=None):
 47 | 
 48 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
 49 | 
 50 |     config = tf.ConfigProto(
 51 |         gpu_options=gpu_options,
 52 |         log_device_placement=False,
 53 |     )
 54 | 
 55 |     img = Image.open(FLAGS.input_img)
 56 |     img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
 57 |     img_resized = img_resized.astype(np.float32)
 58 |     classes = load_coco_names(FLAGS.class_names)
 59 | 
 60 |     if FLAGS.frozen_model:
 61 | 
 62 |         t0 = time.time()
 63 |         frozenGraph = load_graph(FLAGS.frozen_model)
 64 |         print("Loaded graph in {:.2f}s".format(time.time()-t0))
 65 | 
 66 |         boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
 67 | 
 68 |         with tf.Session(graph=frozenGraph, config=config) as sess:
 69 |             t0 = time.time()
 70 |             detected_boxes = sess.run(
 71 |                 boxes, feed_dict={inputs: [img_resized]})
 72 | 
 73 |     else:
 74 |         if FLAGS.tiny:
 75 |             model = yolo_v3_tiny.yolo_v3_tiny
 76 |         elif FLAGS.spp:
 77 |             model = yolo_v3.yolo_v3_spp
 78 |         else:
 79 |             model = yolo_v3.yolo_v3
 80 | 
 81 |         boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format)
 82 | 
 83 |         saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
 84 | 
 85 |         with tf.Session(config=config) as sess:
 86 |             t0 = time.time()
 87 |             saver.restore(sess, FLAGS.ckpt_file)
 88 |             print('Model restored in {:.2f}s'.format(time.time()-t0))
 89 | 
 90 |             t0 = time.time()
 91 |             detected_boxes = sess.run(
 92 |                 boxes, feed_dict={inputs: [img_resized]})
 93 | 
 94 |     filtered_boxes = non_max_suppression(detected_boxes,
 95 |                                          confidence_threshold=FLAGS.conf_threshold,
 96 |                                          iou_threshold=FLAGS.iou_threshold)
 97 |     print("Predictions found in {:.2f}s".format(time.time() - t0))
 98 | 
 99 |     draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True)
100 | 
101 |     img.save(FLAGS.output_img)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     tf.app.run()
106 | 


--------------------------------------------------------------------------------
/yolo-cluster-anchors.py:
--------------------------------------------------------------------------------
  1 | #coding=utf-8
  2 | import xml.etree.ElementTree as ET
  3 | import numpy as np
  4 | 
  5 |  
  6 | def iou(box, clusters):
  7 |     """
  8 |     计算一个ground truth边界盒和k个先验框(Anchor)的交并比(IOU)值。
  9 |     参数box: 元组或者数据，代表ground truth的长宽。
 10 |     参数clusters: 形如(k,2)的numpy数组，其中k是聚类Anchor框的个数
 11 |     返回：ground truth和每个Anchor框的交并比。
 12 |     """
 13 |     x = np.minimum(clusters[:, 0], box[0])
 14 |     y = np.minimum(clusters[:, 1], box[1])
 15 |     if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
 16 |         raise ValueError("Box has no area")
 17 |     intersection = x * y
 18 |     box_area = box[0] * box[1]
 19 |     cluster_area = clusters[:, 0] * clusters[:, 1]
 20 |     iou_ = intersection / (box_area + cluster_area - intersection)
 21 |     return iou_
 22 | 
 23 | 
 24 | def avg_iou(boxes, clusters):
 25 |     """
 26 |     计算一个ground truth和k个Anchor的交并比的均值。
 27 |     """
 28 |     return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
 29 | 
 30 | def kmeans(boxes, k, dist=np.median):
 31 |     """
 32 |     利用IOU值进行K-means聚类
 33 |     参数boxes: 形状为(r, 2)的ground truth框，其中r是ground truth的个数
 34 |     参数k: Anchor的个数
 35 |     参数dist: 距离函数
 36 |     返回值：形状为(k, 2)的k个Anchor框
 37 |     """
 38 |     # 即是上面提到的r
 39 |     rows = boxes.shape[0]
 40 |     # 距离数组，计算每个ground truth和k个Anchor的距离
 41 |     distances = np.empty((rows, k))
 42 |     # 上一次每个ground truth"距离"最近的Anchor索引
 43 |     last_clusters = np.zeros((rows,))
 44 |     # 设置随机数种子
 45 |     np.random.seed()
 46 | 
 47 |     # 初始化聚类中心，k个簇，从r个ground truth随机选k个
 48 |     clusters = boxes[np.random.choice(rows, k, replace=False)]
 49 |     # 开始聚类
 50 |     while True:
 51 |         # 计算每个ground truth和k个Anchor的距离，用1-IOU(box,anchor)来计算
 52 |         for row in range(rows):
 53 |             distances[row] = 1 - iou(boxes[row], clusters)
 54 |         # 对每个ground truth，选取距离最小的那个Anchor，并存下索引
 55 |         nearest_clusters = np.argmin(distances, axis=1)
 56 |         # 如果当前每个ground truth"距离"最近的Anchor索引和上一次一样，聚类结束
 57 |         if (last_clusters == nearest_clusters).all():
 58 |             break
 59 |         # 更新簇中心为簇里面所有的ground truth框的均值
 60 |         for cluster in range(k):
 61 |             clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
 62 |         # 更新每个ground truth"距离"最近的Anchor索引
 63 |         last_clusters = nearest_clusters
 64 | 
 65 |     return clusters
 66 | 
 67 | # 加载自己的数据集，只需要所有labelimg标注出来的xml文件即可
 68 | def load_dataset(path):
 69 |     dataset = []
 70 |     for xml_file in glob.glob("{}/*xml".format(path)):
 71 |         tree = ET.parse(xml_file)
 72 |         # 图片高度
 73 |         height = int(tree.findtext("./size/height"))
 74 |         # 图片宽度
 75 |         width = int(tree.findtext("./size/width"))
 76 |         
 77 |         for obj in tree.iter("object"):
 78 |             # 偏移量
 79 |             xmin = int(obj.findtext("bndbox/xmin")) / width
 80 |             ymin = int(obj.findtext("bndbox/ymin")) / height
 81 |             xmax = int(obj.findtext("bndbox/xmax")) / width
 82 |             ymax = int(obj.findtext("bndbox/ymax")) / height
 83 |             xmin = np.float64(xmin)
 84 |             ymin = np.float64(ymin)
 85 |             xmax = np.float64(xmax)
 86 |             ymax = np.float64(ymax)
 87 |             if xmax == xmin or ymax == ymin:
 88 |                 print(xml_file)
 89 |             # 将Anchor的长宽放入dateset，运行kmeans获得Anchor
 90 |             dataset.append([xmax - xmin, ymax - ymin])
 91 |     return np.array(dataset)
 92 |  
 93 | if __name__ == '__main__':
 94 |     
 95 |     ANNOTATIONS_PATH = "F:\Annotations" #xml文件所在文件夹
 96 |     CLUSTERS = 9 #聚类数量，anchor数量
 97 |     INPUTDIM = 416 #输入网络大小
 98 |  
 99 |     data = load_dataset(ANNOTATIONS_PATH)
100 |     out = kmeans(data, k=CLUSTERS)
101 |     print('Boxes:')
102 |     print(np.array(out)*INPUTDIM)    
103 |     print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))       
104 |     final_anchors = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
105 |     print("Before Sort Ratios:\n {}".format(final_anchors))
106 |     print("After Sort Ratios:\n {}".format(sorted(final_anchors)))
107 |     


--------------------------------------------------------------------------------
/darknet2pb/yolo_v3_tiny.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \
  6 |     _detection_layer, _upsample
  7 | 
  8 | slim = tf.contrib.slim
  9 | 
 10 | _BATCH_NORM_DECAY = 0.9
 11 | _BATCH_NORM_EPSILON = 1e-05
 12 | _LEAKY_RELU = 0.1
 13 | 
 14 | _ANCHORS = [(10, 14),  (23, 27),  (37, 58),
 15 |             (81, 82),  (135, 169),  (344, 319)]
 16 | 
 17 | 
 18 | def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
 19 |     """
 20 |     Creates YOLO v3 tiny model.
 21 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
 22 |         Dimension batch_size may be undefined. The channel order is RGB.
 23 |     :param num_classes: number of predicted classes.
 24 |     :param is_training: whether is training or not.
 25 |     :param data_format: data format NCHW or NHWC.
 26 |     :param reuse: whether or not the network and its variables should be reused.
 27 |     :return:
 28 |     """
 29 |     # it will be needed later on
 30 |     img_size = inputs.get_shape().as_list()[1:3]
 31 | 
 32 |     # transpose the inputs to NCHW
 33 |     if data_format == 'NCHW':
 34 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
 35 | 
 36 |     # normalize values to range [0..1]
 37 |     inputs = inputs / 255
 38 | 
 39 |     # set batch norm params
 40 |     batch_norm_params = {
 41 |         'decay': _BATCH_NORM_DECAY,
 42 |         'epsilon': _BATCH_NORM_EPSILON,
 43 |         'scale': True,
 44 |         'is_training': is_training,
 45 |         'fused': None,  # Use fused batch norm if possible.
 46 |     }
 47 | 
 48 |     with tf.variable_scope('yolo-v3-tiny'):
 49 |         for i in range(6):
 50 |             inputs = slim.conv2d(inputs, 16 * pow(2, i), 3, 1, padding='SAME', biases_initializer=None,
 51 |                                      activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 52 |                                      normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params)
 53 | 
 54 |             if i == 4:
 55 |                 route_1 = inputs
 56 | 
 57 |             if i == 5:
 58 |                 inputs = slim.max_pool2d(
 59 |                     inputs, [2, 2], stride=1, padding="SAME", scope='pool2')
 60 |             else:
 61 |                 inputs = slim.max_pool2d(
 62 |                     inputs, [2, 2], scope='pool2')
 63 | 
 64 |         # inputs = _conv2d_fixed_padding(inputs, 1024, 3)
 65 |         inputs = slim.separable_conv2d(inputs, num_outputs=None, kernel_size=3, depth_multiplier=1, stride=1, biases_initializer=None,
 66 |                                                activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 67 |                                                normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params,
 68 |                                                padding='SAME')
 69 | 
 70 |         inputs = slim.conv2d(inputs, 1024, 1, 1, biases_initializer=None,
 71 |                              activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 72 |                              normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='VALID')
 73 | 
 74 |         inputs = slim.conv2d(inputs, 256, 1, 1, padding='SAME', biases_initializer=None,
 75 |                              activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 76 |                              normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params)
 77 |         route_2 = inputs
 78 | 
 79 |         inputs = slim.conv2d(inputs, 512, 3, 1, padding='SAME', biases_initializer=None,
 80 |                              activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 81 |                              normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params)
 82 |         # inputs = _conv2d_fixed_padding(inputs, 255, 1)
 83 | 
 84 |         detect_1 = _detection_layer(
 85 |             inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
 86 |         detect_1 = tf.identity(detect_1, name='detect_1')
 87 | 
 88 |         inputs = slim.conv2d(route_2, 128, 1, 1, padding='SAME', biases_initializer=None,
 89 |                              activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 90 |                              normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params)
 91 |         upsample_size = route_1.get_shape().as_list()
 92 |         inputs = _upsample(inputs, upsample_size, data_format)
 93 | 
 94 |         inputs = tf.concat([inputs, route_1],
 95 |                            axis=1 if data_format == 'NCHW' else 3)
 96 | 
 97 |         inputs = slim.conv2d(inputs, 256, 3, 1, padding='SAME', biases_initializer=None,
 98 |                              activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU),
 99 |                              normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params)
100 |         # inputs = _conv2d_fixed_padding(inputs, 255, 1)
101 | 
102 |         detect_2 = _detection_layer(
103 |             inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
104 |         detect_2 = tf.identity(detect_2, name='detect_2')
105 | 
106 |         detections = tf.concat([detect_1, detect_2], axis=1)
107 |         detections = tf.identity(detections, name='detections')
108 |         return detections


--------------------------------------------------------------------------------
/merge_bn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: UTF-8 -*-
  3 |  
  4 | import numpy as np
  5 | import sys
  6 | import os
  7 | import os.path as osp
  8 | import google.protobuf as pb
  9 | import google.protobuf.text_format
 10 | from argparse import ArgumentParser
 11 | import caffe
 12 |  
 13 | caffe.set_mode_cpu()
 14 |  
 15 | def load_and_fill_biases(src_model, src_weights, dst_model, dst_weights):
 16 |     with open(src_model) as f:
 17 |         model = caffe.proto.caffe_pb2.NetParameter()
 18 |         pb.text_format.Merge(f.read(), model)
 19 |  
 20 |     for i, layer in enumerate(model.layer):
 21 |         if layer.type == 'Convolution': # or layer.type == 'Scale':
 22 |             # Add bias layer if needed
 23 |             if layer.convolution_param.bias_term == False:
 24 |                 layer.convolution_param.bias_term = True
 25 |                 layer.convolution_param.bias_filler.type = 'constant'
 26 |                 layer.convolution_param.bias_filler.value = 0.0
 27 |  
 28 |     with open(dst_model, 'w') as f:
 29 |         f.write(pb.text_format.MessageToString(model))
 30 |  
 31 |     caffe.set_mode_cpu()
 32 |     net_src = caffe.Net(src_model, src_weights, caffe.TEST)
 33 |     net_dst = caffe.Net(dst_model, caffe.TEST)
 34 |     for key in net_src.params.keys():
 35 |         for i in range(len(net_src.params[key])):
 36 |             net_dst.params[key][i].data[:] = net_src.params[key][i].data[:]
 37 |  
 38 |     if dst_weights is not None:
 39 |         # Store params
 40 |         pass
 41 |  
 42 |     return net_dst
 43 |  
 44 |  
 45 | def merge_conv_and_bn(net, i_conv, i_bn, i_scale):
 46 |     # This is based on Kyeheyon's work
 47 |     assert(i_conv != None)
 48 |     assert(i_bn != None)
 49 |  
 50 |     def copy_double(data):
 51 |         return np.array(data, copy=True, dtype=np.double)
 52 |  
 53 |     key_conv = net._layer_names[i_conv]
 54 |     key_bn = net._layer_names[i_bn]
 55 |     key_scale = net._layer_names[i_scale] if i_scale else None
 56 |  
 57 |     # Copy
 58 |     bn_mean = copy_double(net.params[key_bn][0].data)
 59 |     bn_variance = copy_double(net.params[key_bn][1].data)
 60 |     num_bn_samples = copy_double(net.params[key_bn][2].data)
 61 |  
 62 |     # and Invalidate the BN layer
 63 |     net.params[key_bn][0].data[:] = 0
 64 |     net.params[key_bn][1].data[:] = 1
 65 |     net.params[key_bn][2].data[:] = 1
 66 |  
 67 |     if num_bn_samples[0] == 0:
 68 |         num_bn_samples[0] = 1
 69 |  
 70 |     if net.params.has_key(key_scale):
 71 |         print 'Combine {:s} + {:s} + {:s}'.format(key_conv, key_bn, key_scale)
 72 |         scale_weight = copy_double(net.params[key_scale][0].data)
 73 |         scale_bias = copy_double(net.params[key_scale][1].data)
 74 |         net.params[key_scale][0].data[:] = 1
 75 |         net.params[key_scale][1].data[:] = 0
 76 |  
 77 |     else:
 78 |         print 'Combine {:s} + {:s}'.format(key_conv, key_bn)
 79 |         scale_weight = 1
 80 |         scale_bias = 0
 81 |  
 82 |     weight = copy_double(net.params[key_conv][0].data)
 83 |     bias = copy_double(net.params[key_conv][1].data)
 84 |  
 85 |     alpha = scale_weight / np.sqrt(bn_variance / num_bn_samples[0] + 1e-5)
 86 |     net.params[key_conv][1].data[:] = bias * alpha + (scale_bias - (bn_mean / num_bn_samples[0]) * alpha)
 87 |     for i in range(len(alpha)):
 88 |         net.params[key_conv][0].data[i] = weight[i] * alpha[i]
 89 |  
 90 |  
 91 | def merge_batchnorms_in_net(net):
 92 |     # for each BN
 93 |     for i, layer in enumerate(net.layers):
 94 |         if layer.type != 'BatchNorm':
 95 |             continue
 96 |  
 97 |         l_name = net._layer_names[i]
 98 |  
 99 |         l_bottom = net.bottom_names[l_name]
100 |         assert(len(l_bottom) == 1)
101 |         l_bottom = l_bottom[0]
102 |         l_top = net.top_names[l_name]
103 |         assert(len(l_top) == 1)
104 |         l_top = l_top[0]
105 |  
106 |         can_be_absorbed = True
107 |  
108 |         # Search all (bottom) layers
109 |         for j in xrange(i - 1, -1, -1):
110 |             tops_of_j = net.top_names[net._layer_names[j]]
111 |             if l_bottom in tops_of_j:
112 |                 if net.layers[j].type not in ['Convolution', 'InnerProduct']:
113 |                     can_be_absorbed = False
114 |                 else:
115 |                     # There must be only one layer
116 |                     conv_ind = j
117 |                     break
118 |  
119 |         if not can_be_absorbed:
120 |             continue
121 |  
122 |         # find the following Scale
123 |         scale_ind = None
124 |         for j in xrange(i + 1, len(net.layers)):
125 |             bottoms_of_j = net.bottom_names[net._layer_names[j]]
126 |             if l_top in bottoms_of_j:
127 |                 if scale_ind:
128 |                     # Followed by two or more layers
129 |                     scale_ind = None
130 |                     break
131 |  
132 |                 if net.layers[j].type in ['Scale']:
133 |                     scale_ind = j
134 |  
135 |                     top_of_j = net.top_names[net._layer_names[j]][0]
136 |                     if top_of_j == bottoms_of_j[0]:
137 |                         # On-the-fly => Can be merged
138 |                         break
139 |  
140 |                 else:
141 |                     # Followed by a layer which is not 'Scale'
142 |                     scale_ind = None
143 |                     break
144 |  
145 |  
146 |         merge_conv_and_bn(net, conv_ind, i, scale_ind)
147 |  
148 |     return net
149 |  
150 |  
151 | def process_model(net, src_model, dst_model, func_loop, func_finally):
152 |     with open(src_model) as f:
153 |         model = caffe.proto.caffe_pb2.NetParameter()
154 |         pb.text_format.Merge(f.read(), model)
155 |  
156 |     for i, layer in enumerate(model.layer):
157 |         map(lambda x: x(layer, net, model, i), func_loop)
158 |  
159 |     map(lambda x: x(net, model), func_finally)
160 |  
161 |     with open(dst_model, 'w') as f:
162 |         f.write(pb.text_format.MessageToString(model))
163 |  
164 |  
165 | # Functions to remove (redundant) BN and Scale layers
166 | to_delete_empty = []
167 | def pick_empty_layers(layer, net, model, i):
168 |     if layer.type not in ['BatchNorm', 'Scale']:
169 |         return
170 |  
171 |     bottom = layer.bottom[0]
172 |     top = layer.top[0]
173 |  
174 |     if (bottom != top):
175 |         # Not supperted yet
176 |         return
177 |  
178 |     if layer.type == 'BatchNorm':
179 |         zero_mean = np.all(net.params[layer.name][0].data == 0)
180 |         one_var = np.all(net.params[layer.name][1].data == 1)
181 |  
182 |         if zero_mean and one_var:
183 |             print 'Delete layer: {}'.format(layer.name)
184 |             to_delete_empty.append(layer)
185 |  
186 |     if layer.type == 'Scale':
187 |         no_scaling = np.all(net.params[layer.name][0].data == 1)
188 |         zero_bias = np.all(net.params[layer.name][1].data == 0)
189 |  
190 |         if no_scaling and zero_bias:
191 |             print 'Delete layer: {}'.format(layer.name)
192 |             to_delete_empty.append(layer)
193 |  
194 |  
195 | def remove_empty_layers(net, model):
196 |     map(model.layer.remove, to_delete_empty)
197 |  
198 |  
199 | # A function to add 'engine: CAFFE' param into 1x1 convolutions
200 | def set_engine_caffe(layer, net, model, i):
201 |     if layer.type == 'Convolution':
202 |         if layer.convolution_param.kernel_size == 1\
203 |             or (layer.convolution_param.kernel_h == layer.convolution_param.kernel_w == 1):
204 |             layer.convolution_param.engine = dict(layer.convolution_param.Engine.items())['CAFFE']
205 |  
206 |  
207 | def main():
208 |     # Set default output file names
209 |     if args.output_model is None:
210 |        file_name = osp.splitext(args.model)[0]
211 |        args.output_model = file_name + '_inference.prototxt'
212 |     if args.output_weights is None:
213 |        file_name = osp.splitext(args.weights)[0]
214 |        args.output_weights = file_name + '_inference.caffemodel'
215 |  
216 |     net = load_and_fill_biases(args.model, args.weights, args.model + '.temp.pt', None)
217 |     net = merge_batchnorms_in_net(net)
218 |  
219 |     process_model(net, args.model + '.temp.pt', args.output_model,
220 |                   [pick_empty_layers, set_engine_caffe],
221 |                   [remove_empty_layers])
222 |  
223 |     # Store params
224 |     net.save(args.output_weights)
225 |  
226 |  
227 | if __name__ == '__main__':
228 |    parser = ArgumentParser(
229 |            description="Generate Batch Normalized model for inference")
230 |    parser.add_argument('--model', default="MobileNetSSD_deploy.prototxt", help="The net definition prototxt")
231 |    parser.add_argument('--weights', default="MobileNetSSD_deploy.caffemodel", help="The weights caffemodel")
232 |    parser.add_argument('--output_model')
233 |    parser.add_argument('--output_weights')
234 |    args = parser.parse_args()
235 |    main()


--------------------------------------------------------------------------------
/darknet2pb/yolo_v3.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | slim = tf.contrib.slim
  7 | 
  8 | _BATCH_NORM_DECAY = 0.9
  9 | _BATCH_NORM_EPSILON = 1e-05
 10 | _LEAKY_RELU = 0.1
 11 | 
 12 | _ANCHORS = [(10, 13), (16, 30), (33, 23),
 13 |             (30, 61), (62, 45), (59, 119),
 14 |             (116, 90), (156, 198), (373, 326)]
 15 | 
 16 | 
 17 | def darknet53(inputs):
 18 |     """
 19 |     Builds Darknet-53 model.
 20 |     """
 21 |     inputs = _conv2d_fixed_padding(inputs, 32, 3)
 22 |     inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2)
 23 |     inputs = _darknet53_block(inputs, 32)
 24 |     inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2)
 25 | 
 26 |     for i in range(2):
 27 |         inputs = _darknet53_block(inputs, 64)
 28 | 
 29 |     inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2)
 30 | 
 31 |     for i in range(8):
 32 |         inputs = _darknet53_block(inputs, 128)
 33 | 
 34 |     route_1 = inputs
 35 |     inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2)
 36 | 
 37 |     for i in range(8):
 38 |         inputs = _darknet53_block(inputs, 256)
 39 | 
 40 |     route_2 = inputs
 41 |     inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2)
 42 | 
 43 |     for i in range(4):
 44 |         inputs = _darknet53_block(inputs, 512)
 45 | 
 46 |     return route_1, route_2, inputs
 47 | 
 48 | 
 49 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1):
 50 |     if strides > 1:
 51 |         inputs = _fixed_padding(inputs, kernel_size)
 52 |     inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
 53 |                          padding=('SAME' if strides == 1 else 'VALID'))
 54 |     return inputs
 55 | 
 56 | 
 57 | def _darknet53_block(inputs, filters):
 58 |     shortcut = inputs
 59 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
 60 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
 61 | 
 62 |     inputs = inputs + shortcut
 63 |     return inputs
 64 | 
 65 | 
 66 | def _spp_block(inputs, data_format='NCHW'):
 67 |     return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'),
 68 |                       slim.max_pool2d(inputs, 9, 1, 'SAME'),
 69 |                       slim.max_pool2d(inputs, 5, 1, 'SAME'),
 70 |                       inputs],
 71 |                      axis=1 if data_format == 'NCHW' else 3)
 72 | 
 73 | 
 74 | @tf.contrib.framework.add_arg_scope
 75 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs):
 76 |     """
 77 |     Pads the input along the spatial dimensions independently of input size.
 78 | 
 79 |     Args:
 80 |       inputs: A tensor of size [batch, channels, height_in, width_in] or
 81 |         [batch, height_in, width_in, channels] depending on data_format.
 82 |       kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
 83 |                    Should be a positive integer.
 84 |       data_format: The input format ('NHWC' or 'NCHW').
 85 |       mode: The mode for tf.pad.
 86 | 
 87 |     Returns:
 88 |       A tensor with the same format as the input with the data either intact
 89 |       (if kernel_size == 1) or padded (if kernel_size > 1).
 90 |     """
 91 |     pad_total = kernel_size - 1
 92 |     pad_beg = pad_total // 2
 93 |     pad_end = pad_total - pad_beg
 94 | 
 95 |     if kwargs['data_format'] == 'NCHW':
 96 |         padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
 97 |                                         [pad_beg, pad_end],
 98 |                                         [pad_beg, pad_end]],
 99 |                                mode=mode)
100 |     else:
101 |         padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
102 |                                         [pad_beg, pad_end], [0, 0]], mode=mode)
103 |     return padded_inputs
104 | 
105 | 
106 | def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False):
107 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
108 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
109 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
110 | 
111 |     if with_spp:
112 |         inputs = _spp_block(inputs, data_format)
113 |         inputs = _conv2d_fixed_padding(inputs, filters, 1)
114 | 
115 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
116 |     inputs = _conv2d_fixed_padding(inputs, filters, 1)
117 |     route = inputs
118 |     inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
119 |     return route, inputs
120 | 
121 | 
122 | def _get_size(shape, data_format):
123 |     if len(shape) == 4:
124 |         shape = shape[1:]
125 |     return shape[1:3] if data_format == 'NCHW' else shape[0:2]
126 | 
127 | 
128 | def _detection_layer(inputs, num_classes, anchors, img_size, data_format):
129 |     num_anchors = len(anchors)
130 |     predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1,
131 |                               stride=1, normalizer_fn=None,
132 |                               activation_fn=None,
133 |                               biases_initializer=tf.zeros_initializer())
134 | 
135 |     shape = predictions.get_shape().as_list()
136 |     grid_size = _get_size(shape, data_format)
137 |     dim = grid_size[0] * grid_size[1]
138 |     bbox_attrs = 5 + num_classes
139 | 
140 |     if data_format == 'NCHW':
141 |         predictions = tf.reshape(
142 |             predictions, [-1, num_anchors * bbox_attrs, dim])
143 |         predictions = tf.transpose(predictions, [0, 2, 1])
144 | 
145 |     predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs])
146 | 
147 |     stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
148 | 
149 |     anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
150 | 
151 |     box_centers, box_sizes, confidence, classes = tf.split(
152 |         predictions, [2, 2, 1, num_classes], axis=-1)
153 | 
154 |     box_centers = tf.nn.sigmoid(box_centers)
155 |     confidence = tf.nn.sigmoid(confidence)
156 | 
157 |     grid_x = tf.range(grid_size[0], dtype=tf.float32)
158 |     grid_y = tf.range(grid_size[1], dtype=tf.float32)
159 |     a, b = tf.meshgrid(grid_x, grid_y)
160 | 
161 |     x_offset = tf.reshape(a, (-1, 1))
162 |     y_offset = tf.reshape(b, (-1, 1))
163 | 
164 |     x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
165 |     x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
166 | 
167 |     box_centers = box_centers + x_y_offset
168 |     box_centers = box_centers * stride
169 | 
170 |     anchors = tf.tile(anchors, [dim, 1])
171 |     box_sizes = tf.exp(box_sizes) * anchors
172 |     box_sizes = box_sizes * stride
173 | 
174 |     detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
175 | 
176 |     classes = tf.nn.sigmoid(classes)
177 |     predictions = tf.concat([detections, classes], axis=-1)
178 |     return predictions
179 | 
180 | 
181 | def _upsample(inputs, out_shape, data_format='NCHW'):
182 |     # tf.image.resize_nearest_neighbor accepts input in format NHWC
183 |     if data_format == 'NCHW':
184 |         inputs = tf.transpose(inputs, [0, 2, 3, 1])
185 | 
186 |     if data_format == 'NCHW':
187 |         new_height = out_shape[3]
188 |         new_width = out_shape[2]
189 |     else:
190 |         new_height = out_shape[2]
191 |         new_width = out_shape[1]
192 | 
193 |     inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
194 | 
195 |     # back to NCHW if needed
196 |     if data_format == 'NCHW':
197 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
198 | 
199 |     inputs = tf.identity(inputs, name='upsampled')
200 |     return inputs
201 | 
202 | 
203 | def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False):
204 |     """
205 |     Creates YOLO v3 model.
206 | 
207 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
208 |         Dimension batch_size may be undefined. The channel order is RGB.
209 |     :param num_classes: number of predicted classes.
210 |     :param is_training: whether is training or not.
211 |     :param data_format: data format NCHW or NHWC.
212 |     :param reuse: whether or not the network and its variables should be reused.
213 |     :param with_spp: whether or not is using spp layer.
214 |     :return:
215 |     """
216 |     # it will be needed later on
217 |     img_size = inputs.get_shape().as_list()[1:3]
218 | 
219 |     # transpose the inputs to NCHW
220 |     if data_format == 'NCHW':
221 |         inputs = tf.transpose(inputs, [0, 3, 1, 2])
222 | 
223 |     # normalize values to range [0..1]
224 |     inputs = inputs / 255
225 | 
226 |     # set batch norm params
227 |     batch_norm_params = {
228 |         'decay': _BATCH_NORM_DECAY,
229 |         'epsilon': _BATCH_NORM_EPSILON,
230 |         'scale': True,
231 |         'is_training': is_training,
232 |         'fused': None,  # Use fused batch norm if possible.
233 |     }
234 | 
235 |     # Set activation_fn and parameters for conv2d, batch_norm.
236 |     with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse):
237 |         with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm,
238 |                             normalizer_params=batch_norm_params,
239 |                             biases_initializer=None,
240 |                             activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
241 |             with tf.variable_scope('darknet-53'):
242 |                 route_1, route_2, inputs = darknet53(inputs)
243 | 
244 |             with tf.variable_scope('yolo-v3'):
245 |                 route, inputs = _yolo_block(inputs, 512, data_format, with_spp)
246 | 
247 |                 detect_1 = _detection_layer(
248 |                     inputs, num_classes, _ANCHORS[6:9], img_size, data_format)
249 |                 detect_1 = tf.identity(detect_1, name='detect_1')
250 | 
251 |                 inputs = _conv2d_fixed_padding(route, 256, 1)
252 |                 upsample_size = route_2.get_shape().as_list()
253 |                 inputs = _upsample(inputs, upsample_size, data_format)
254 |                 inputs = tf.concat([inputs, route_2],
255 |                                    axis=1 if data_format == 'NCHW' else 3)
256 | 
257 |                 route, inputs = _yolo_block(inputs, 256)
258 | 
259 |                 detect_2 = _detection_layer(
260 |                     inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
261 |                 detect_2 = tf.identity(detect_2, name='detect_2')
262 | 
263 |                 inputs = _conv2d_fixed_padding(route, 128, 1)
264 |                 upsample_size = route_1.get_shape().as_list()
265 |                 inputs = _upsample(inputs, upsample_size, data_format)
266 |                 inputs = tf.concat([inputs, route_1],
267 |                                    axis=1 if data_format == 'NCHW' else 3)
268 | 
269 |                 _, inputs = _yolo_block(inputs, 128)
270 | 
271 |                 detect_3 = _detection_layer(
272 |                     inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
273 |                 detect_3 = tf.identity(detect_3, name='detect_3')
274 | 
275 |                 detections = tf.concat([detect_1, detect_2, detect_3], axis=1)
276 |                 detections = tf.identity(detections, name='detections')
277 |                 return detections
278 | 
279 | 
280 | def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
281 |     """
282 |     Creates YOLO v3 with SPP  model.
283 | 
284 |     :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
285 |         Dimension batch_size may be undefined. The channel order is RGB.
286 |     :param num_classes: number of predicted classes.
287 |     :param is_training: whether is training or not.
288 |     :param data_format: data format NCHW or NHWC.
289 |     :param reuse: whether or not the network and its variables should be reused.
290 |     :return:
291 |     """
292 |     return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True)
293 | 


--------------------------------------------------------------------------------
/darknet2pb/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from PIL import ImageDraw, Image
  6 | 
  7 | 
  8 | def get_boxes_and_inputs_pb(frozen_graph):
  9 | 
 10 |     with frozen_graph.as_default():
 11 |         boxes = tf.get_default_graph().get_tensor_by_name("output_boxes:0")
 12 |         inputs = tf.get_default_graph().get_tensor_by_name("inputs:0")
 13 | 
 14 |     return boxes, inputs
 15 | 
 16 | 
 17 | def get_boxes_and_inputs(model, num_classes, size, data_format):
 18 | 
 19 |     inputs = tf.placeholder(tf.float32, [1, size, size, 3])
 20 | 
 21 |     with tf.variable_scope('detector'):
 22 |         detections = model(inputs, num_classes,
 23 |                            data_format=data_format)
 24 | 
 25 |     boxes = detections_boxes(detections)
 26 | 
 27 |     return boxes, inputs
 28 | 
 29 | 
 30 | def load_graph(frozen_graph_filename):
 31 | 
 32 |     with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
 33 |         graph_def = tf.GraphDef()
 34 |         graph_def.ParseFromString(f.read())
 35 | 
 36 |     with tf.Graph().as_default() as graph:
 37 |         tf.import_graph_def(graph_def, name="")
 38 | 
 39 |     return graph
 40 | 
 41 | 
 42 | def freeze_graph(sess, output_graph):
 43 | 
 44 |     output_node_names = [
 45 |         "output_boxes",
 46 |         "inputs",
 47 |     ]
 48 |     output_node_names = ",".join(output_node_names)
 49 | 
 50 |     output_graph_def = tf.graph_util.convert_variables_to_constants(
 51 |         sess,
 52 |         tf.get_default_graph().as_graph_def(),
 53 |         output_node_names.split(",")
 54 |     )
 55 | 
 56 |     with tf.gfile.GFile(output_graph, "wb") as f:
 57 |         f.write(output_graph_def.SerializeToString())
 58 | 
 59 |     print("{} ops written to {}.".format(len(output_graph_def.node), output_graph))
 60 | 
 61 | 
 62 | def load_weights(var_list, weights_file):
 63 |     """
 64 |     Loads and converts pre-trained weights.
 65 |     :param var_list: list of network variables.
 66 |     :param weights_file: name of the binary file.
 67 |     :return: list of assign ops
 68 |     """
 69 |     with open(weights_file, "rb") as fp:
 70 |         _ = np.fromfile(fp, dtype=np.int32, count=5)
 71 | 
 72 |         weights = np.fromfile(fp, dtype=np.float32)
 73 | 
 74 |     ptr = 0
 75 |     i = 0
 76 |     assign_ops = []
 77 |     while i < len(var_list) - 1:
 78 |         var1 = var_list[i]
 79 |         var2 = var_list[i + 1]
 80 |         # do something only if we process conv layer
 81 |         if 'Conv' in var1.name.split('/')[-2]:
 82 |             # check type of next layer
 83 |             if 'BatchNorm' in var2.name.split('/')[-2]:
 84 |                 # load batch norm params
 85 |                 gamma, beta, mean, var = var_list[i + 1:i + 5]
 86 |                 batch_norm_vars = [beta, gamma, mean, var]
 87 |                 for var in batch_norm_vars:
 88 |                     shape = var.shape.as_list()
 89 |                     num_params = np.prod(shape)
 90 |                     var_weights = weights[ptr:ptr + num_params].reshape(shape)
 91 |                     ptr += num_params
 92 |                     assign_ops.append(
 93 |                         tf.assign(var, var_weights, validate_shape=True))
 94 | 
 95 |                 # we move the pointer by 4, because we loaded 4 variables
 96 |                 i += 4
 97 |             elif 'Conv' in var2.name.split('/')[-2]:
 98 |                 # load biases
 99 |                 bias = var2
100 |                 bias_shape = bias.shape.as_list()
101 |                 bias_params = np.prod(bias_shape)
102 |                 bias_weights = weights[ptr:ptr +
103 |                                        bias_params].reshape(bias_shape)
104 |                 ptr += bias_params
105 |                 assign_ops.append(
106 |                     tf.assign(bias, bias_weights, validate_shape=True))
107 | 
108 |                 # we loaded 1 variable
109 |                 i += 1
110 |             # we can load weights of conv layer
111 |             shape = var1.shape.as_list()
112 |             num_params = np.prod(shape)
113 | 
114 |             var_weights = weights[ptr:ptr + num_params].reshape(
115 |                 (shape[3], shape[2], shape[0], shape[1]))
116 |             # remember to transpose to column-major
117 |             var_weights = np.transpose(var_weights, (2, 3, 1, 0))
118 |             ptr += num_params
119 |             assign_ops.append(
120 |                 tf.assign(var1, var_weights, validate_shape=True))
121 |             i += 1
122 | 
123 |     return assign_ops
124 | 
125 | 
126 | def detections_boxes(detections):
127 |     """
128 |     Converts center x, center y, width and height values to coordinates of top left and bottom right points.
129 | 
130 |     :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5))
131 |     :return: converted detections of same shape as input
132 |     """
133 |     center_x, center_y, width, height, attrs = tf.split(
134 |         detections, [1, 1, 1, 1, -1], axis=-1)
135 |     w2 = width / 2
136 |     h2 = height / 2
137 |     x0 = center_x - w2
138 |     y0 = center_y - h2
139 |     x1 = center_x + w2
140 |     y1 = center_y + h2
141 | 
142 |     boxes = tf.concat([x0, y0, x1, y1], axis=-1)
143 |     detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes")
144 |     return detections
145 | 
146 | 
147 | def _iou(box1, box2):
148 |     """
149 |     Computes Intersection over Union value for 2 bounding boxes
150 | 
151 |     :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2]
152 |     :param box2: same as box1
153 |     :return: IoU
154 |     """
155 |     b1_x0, b1_y0, b1_x1, b1_y1 = box1
156 |     b2_x0, b2_y0, b2_x1, b2_y1 = box2
157 | 
158 |     int_x0 = max(b1_x0, b2_x0)
159 |     int_y0 = max(b1_y0, b2_y0)
160 |     int_x1 = min(b1_x1, b2_x1)
161 |     int_y1 = min(b1_y1, b2_y1)
162 | 
163 |     int_area = max(int_x1 - int_x0, 0) * max(int_y1 - int_y0, 0)
164 | 
165 |     b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
166 |     b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)
167 | 
168 |     # we add small epsilon of 1e-05 to avoid division by 0
169 |     iou = int_area / (b1_area + b2_area - int_area + 1e-05)
170 |     return iou
171 | 
172 | 
173 | def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4):
174 |     """
175 |     Applies Non-max suppression to prediction boxes.
176 | 
177 |     :param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence
178 |     :param confidence_threshold: the threshold for deciding if prediction is valid
179 |     :param iou_threshold: the threshold for deciding if two boxes overlap
180 |     :return: dict: class -> [(box, score)]
181 |     """
182 |     conf_mask = np.expand_dims(
183 |         (predictions_with_boxes[:, :, 4] > confidence_threshold), -1)
184 |     predictions = predictions_with_boxes * conf_mask
185 | 
186 |     result = {}
187 |     for i, image_pred in enumerate(predictions):
188 |         shape = image_pred.shape
189 |         non_zero_idxs = np.nonzero(image_pred)
190 |         image_pred = image_pred[non_zero_idxs]
191 |         image_pred = image_pred.reshape(-1, shape[-1])
192 | 
193 |         bbox_attrs = image_pred[:, :5]
194 |         classes = image_pred[:, 5:]
195 |         classes = np.argmax(classes, axis=-1)
196 | 
197 |         unique_classes = list(set(classes.reshape(-1)))
198 | 
199 |         for cls in unique_classes:
200 |             cls_mask = classes == cls
201 |             cls_boxes = bbox_attrs[np.nonzero(cls_mask)]
202 |             cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]]
203 |             cls_scores = cls_boxes[:, -1]
204 |             cls_boxes = cls_boxes[:, :-1]
205 | 
206 |             while len(cls_boxes) > 0:
207 |                 box = cls_boxes[0]
208 |                 score = cls_scores[0]
209 |                 if cls not in result:
210 |                     result[cls] = []
211 |                 result[cls].append((box, score))
212 |                 cls_boxes = cls_boxes[1:]
213 |                 cls_scores = cls_scores[1:]
214 |                 ious = np.array([_iou(box, x) for x in cls_boxes])
215 |                 iou_mask = ious < iou_threshold
216 |                 cls_boxes = cls_boxes[np.nonzero(iou_mask)]
217 |                 cls_scores = cls_scores[np.nonzero(iou_mask)]
218 | 
219 |     return result
220 | 
221 | 
222 | def load_coco_names(file_name):
223 |     names = {}
224 |     with open(file_name) as f:
225 |         for id, name in enumerate(f):
226 |             names[id] = name
227 |     return names
228 | 
229 | 
230 | def draw_boxes(boxes, img, cls_names, detection_size, is_letter_box_image):
231 |     draw = ImageDraw.Draw(img)
232 | 
233 |     for cls, bboxs in boxes.items():
234 |         color = tuple(np.random.randint(0, 256, 3))
235 |         for box, score in bboxs:
236 |             box = convert_to_original_size(box, np.array(detection_size),
237 |                                            np.array(img.size),
238 |                                            is_letter_box_image)
239 |             draw.rectangle(box, outline=color)
240 |             draw.text(box[:2], '{} {:.2f}%'.format(
241 |                 cls_names[cls], score * 100), fill=color)
242 | 
243 | 
244 | def convert_to_original_size(box, size, original_size, is_letter_box_image):
245 |     if is_letter_box_image:
246 |         box = box.reshape(2, 2)
247 |         box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size)
248 |         box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size)
249 |     else:
250 |         ratio = original_size / size
251 |         box = box.reshape(2, 2) * ratio
252 |     return list(box.reshape(-1))
253 | 
254 | 
255 | def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray:
256 |     """
257 |     Fit image with final image with output_width and output_height.
258 |     :param image: PILLOW Image object.
259 |     :param output_height: width of the final image.
260 |     :param output_width: height of the final image.
261 |     :param fill_value: fill value for empty area. Can be uint8 or np.ndarray
262 |     :return: numpy image fit within letterbox. dtype=uint8, shape=(output_height, output_width)
263 |     """
264 | 
265 |     height_ratio = float(output_height)/image.size[1]
266 |     width_ratio = float(output_width)/image.size[0]
267 |     fit_ratio = min(width_ratio, height_ratio)
268 |     fit_height = int(image.size[1] * fit_ratio)
269 |     fit_width = int(image.size[0] * fit_ratio)
270 |     fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR))
271 | 
272 |     if isinstance(fill_value, int):
273 |         fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype)
274 | 
275 |     to_return = np.tile(fill_value, (output_height, output_width, 1))
276 |     pad_top = int(0.5 * (output_height - fit_height))
277 |     pad_left = int(0.5 * (output_width - fit_width))
278 |     to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image
279 |     return to_return
280 | 
281 | 
282 | def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray:
283 |     """
284 |     Parameters should have same shape and dimension space. (Width, Height) or (Height, Width)
285 |     :param letter_pos: The current position within letterbox image including fill value area.
286 |     :param current_size: The size of whole image including fill value area.
287 |     :param ori_image_size: The size of image before being letter boxed.
288 |     :return:
289 |     """
290 |     letter_pos = np.asarray(letter_pos, dtype=np.float)
291 |     current_size = np.asarray(current_size, dtype=np.float)
292 |     ori_image_size = np.asarray(ori_image_size, dtype=np.float)
293 |     final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1])
294 |     pad = 0.5 * (current_size - final_ratio * ori_image_size)
295 |     pad = pad.astype(np.int32)
296 |     to_return_pos = (letter_pos - pad) / final_ratio
297 |     return to_return_pos
298 | 


--------------------------------------------------------------------------------
/darknet2pb/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/trt_yolov3_tiny.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <opencv2/opencv.hpp>
  3 | #include <assert.h>
  4 | #include <cmath>
  5 | #include <cuda_runtime_api.h>
  6 | #include <fstream>
  7 | #include <iomanip>
  8 | #include <iostream>
  9 | #include <sstream>
 10 | #include <sys/stat.h>
 11 | #include <time.h>
 12 | 
 13 | #include "NvInfer.h"
 14 | #include "NvOnnxParser.h"
 15 | #include "NvOnnxParserRuntime.h"
 16 | #include "argsParser.h"
 17 | #include "logger.h"
 18 | #include "common.h"
 19 | 
 20 | using namespace std;
 21 | using namespace nvinfer1;
 22 | using namespace nvonnxparser;
 23 | using namespace cv;
 24 | 
 25 | 
 26 | // origin params
 27 | samplesCommon::Args gArgs;
 28 | 
 29 | // Res params
 30 | string onnxFile = "F:/TensorRT-6.0.1.5/data/v3tiny/yolov3-tiny.onnx";
 31 | string engineFile = "F:/TensorRT-6.0.1.5/data/v3tiny/yolov3-tiny.trt";
 32 | 
 33 | vector<string> labels = { "abnormal" };
 34 | 
 35 | vector<vector<int> > output_shape = { { 1, 18, 13, 13 },{ 1, 18, 26, 26 } };
 36 | vector<vector<int> > g_masks = { { 3, 4, 5 },{ 0, 1, 2 } };
 37 | vector<vector<int> > g_anchors = { { 10, 14 },{ 23, 27 },{ 37, 58 },{ 81, 82 },{ 135, 169 },{ 344, 319} };
 38 | float obj_threshold = 0.10;
 39 | float nms_threshold = 0.45;
 40 | 
 41 | int CATEGORY = 1;
 42 | int BATCH_SIZE = 1;
 43 | int INPUT_CHANNEL = 3;
 44 | int DETECT_WIDTH = 416;
 45 | int DETECT_HEIGHT = 416;
 46 | 
 47 | // Res struct & function
 48 | typedef struct DetectionRes {
 49 | 	float x, y, w, h, prob;
 50 | } DetectionRes;
 51 | 
 52 | float sigmoid(float in) {
 53 | 	return 1.f / (1.f + exp(-in));
 54 | }
 55 | float exponential(float in) {
 56 | 	return exp(in);
 57 | }
 58 | 
 59 | float* merge(float* out1, float* out2, int bsize_out1, int bsize_out2)
 60 | {
 61 | 	float* out_total = new float[bsize_out1 + bsize_out2];
 62 | 
 63 | 	for (int j = 0; j < bsize_out1; ++j)
 64 | 	{
 65 | 		int index = j;
 66 | 		out_total[index] = out1[j];
 67 | 	}
 68 | 
 69 | 	for (int j = 0; j < bsize_out2; ++j)
 70 | 	{
 71 | 		int index = j + bsize_out1;
 72 | 		out_total[index] = out2[j];
 73 | 	}
 74 | 	return out_total;
 75 | }
 76 | 
 77 | vector<string> split(const string& str, char delim)
 78 | {
 79 | 	stringstream ss(str);
 80 | 	string token;
 81 | 	vector<string> container;
 82 | 	while (getline(ss, token, delim))
 83 | 	{
 84 | 		container.push_back(token);
 85 | 	}
 86 | 
 87 | 	return container;
 88 | }
 89 | 
 90 | 
 91 | 
 92 | void DoNms(vector<DetectionRes>& detections, float nmsThresh) {
 93 | 	auto iouCompute = [](float * lbox, float* rbox) {
 94 | 		float interBox[] = {
 95 | 			max(lbox[0], rbox[0]), //left
 96 | 			min(lbox[0] + lbox[2], rbox[0] + rbox[2]), //right
 97 | 			max(lbox[1], rbox[1]), //top
 98 | 			min(lbox[1] + lbox[3], rbox[1] + rbox[3]), //bottom
 99 | 		};
100 | 
101 | 		if (interBox[2] >= interBox[3] || interBox[0] >= interBox[1])
102 | 			return 0.0f;
103 | 
104 | 		float interBoxS = (interBox[1] - interBox[0] + 1) * (interBox[3] - interBox[2] + 1);
105 | 		return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS);
106 | 	};
107 | 
108 | 	sort(detections.begin(), detections.end(), [=](const DetectionRes & left, const DetectionRes & right) {
109 | 		return left.prob > right.prob;
110 | 	});
111 | 
112 | 	vector<DetectionRes> result;
113 | 	for (unsigned int m = 0; m < detections.size(); ++m) {
114 | 		result.push_back(detections[m]);
115 | 		for (unsigned int n = m + 1; n < detections.size(); ++n) {
116 | 			if (iouCompute((float *)(&detections[m]), (float *)(&detections[n])) > nmsThresh) {
117 | 				detections.erase(detections.begin() + n);
118 | 				--n;
119 | 			}
120 | 		}
121 | 	}
122 | 	detections = move(result);
123 | }
124 | 
125 | vector<DetectionRes> postProcess(cv::Mat& image, float * output) {
126 | 	vector<DetectionRes> detections;
127 | 	int total_size = 0;
128 | 	for (int i = 0; i < output_shape.size(); i++) {
129 | 		auto shape = output_shape[i];
130 | 		int size = 1;
131 | 		for (int j = 0; j < shape.size(); j++) {
132 | 			size *= shape[j];
133 | 		}
134 | 		total_size += size;
135 | 	}
136 | 
137 | 	int offset = 0;
138 | 	float * transposed_output = new float[total_size];
139 | 	float * transposed_output_t = transposed_output;
140 | 	for (int i = 0; i < output_shape.size(); i++) {
141 | 		auto shape = output_shape[i];  // nchw
142 | 		int chw = shape[1] * shape[2] * shape[3];
143 | 		int hw = shape[2] * shape[3];
144 | 		for (int n = 0; n < shape[0]; n++) {
145 | 			int offset_n = offset + n * chw;
146 | 			for (int h = 0; h < shape[2]; h++) {
147 | 				for (int w = 0; w < shape[3]; w++) {
148 | 					int h_w = h * shape[3] + w;
149 | 					for (int c = 0; c < shape[1]; c++) {
150 | 						int offset_c = offset_n + hw * c + h_w;
151 | 						*transposed_output_t++ = output[offset_c];
152 | 					}
153 | 				}
154 | 			}
155 | 		}
156 | 		offset += shape[0] * chw;
157 | 	}
158 | 	vector<vector<int> > shapes;
159 | 	for (int i = 0; i < output_shape.size(); i++) {
160 | 		auto shape = output_shape[i];
161 | 		vector<int> tmp = { shape[2], shape[3], 3, 6 };
162 | 		shapes.push_back(tmp);
163 | 	}
164 | 
165 | 	offset = 0;
166 | 	for (int i = 0; i < output_shape.size(); i++) {
167 | 		auto masks = g_masks[i];
168 | 		vector<vector<int> > anchors;
169 | 		for (auto mask : masks)
170 | 			anchors.push_back(g_anchors[mask]);
171 | 		auto shape = shapes[i];
172 | 		for (int h = 0; h < shape[0]; h++) {
173 | 			int offset_h = offset + h * shape[1] * shape[2] * shape[3];
174 | 			for (int w = 0; w < shape[1]; w++) {
175 | 				int offset_w = offset_h + w * shape[2] * shape[3];
176 | 				for (int c = 0; c < shape[2]; c++) {
177 | 					int offset_c = offset_w + c * shape[3];
178 | 					float * ptr = transposed_output + offset_c;
179 | 					ptr[4] = sigmoid(ptr[4]);
180 | 					ptr[5] = sigmoid(ptr[5]);
181 | 					float score = ptr[4] * ptr[5];
182 | 					if (score < obj_threshold)
183 | 						continue;
184 | 					ptr[0] = sigmoid(ptr[0]);
185 | 					ptr[1] = sigmoid(ptr[1]);
186 | 					ptr[2] = exponential(ptr[2]) * anchors[c][0];
187 | 					ptr[3] = exponential(ptr[3]) * anchors[c][1];
188 | 
189 | 					ptr[0] += w;
190 | 					ptr[1] += h;
191 | 					ptr[0] /= shape[0];
192 | 					ptr[1] /= shape[1];
193 | 					ptr[2] /= DETECT_WIDTH;
194 | 					ptr[3] /= DETECT_WIDTH;
195 | 					ptr[0] -= ptr[2] / 2;
196 | 					ptr[1] -= ptr[3] / 2;
197 | 
198 | 					DetectionRes det;;
199 | 					det.x = ptr[0];
200 | 					det.y = ptr[1];
201 | 					det.w = ptr[2];
202 | 					det.h = ptr[3];
203 | 					det.prob = score;
204 | 					detections.push_back(det);
205 | 				}
206 | 			}
207 | 		}
208 | 		offset += shape[0] * shape[1] * shape[2] * shape[3];
209 | 	}
210 | 	delete[]transposed_output;
211 | 
212 | 	int h = DETECT_WIDTH;   //net h
213 | 	int w = DETECT_WIDTH;   //net w
214 | 
215 | 							//scale bbox to img
216 | 	int width = image.cols;
217 | 	int height = image.rows;
218 | 	float scale = min(float(w) / width, float(h) / height);
219 | 	float scaleSize[] = { width * scale, height * scale };
220 | 
221 | 	//correct box
222 | 	for (auto& bbox : detections) {
223 | 		bbox.x = (bbox.x * w - (w - scaleSize[0]) / 2.f) / scale;
224 | 		bbox.y = (bbox.y * h - (h - scaleSize[1]) / 2.f) / scale;
225 | 		bbox.w *= w;
226 | 		bbox.h *= h;
227 | 		bbox.w /= scale;
228 | 		bbox.h /= scale;
229 | 	}
230 | 
231 | 	//nms
232 | 	float nmsThresh = nms_threshold;
233 | 	if (nmsThresh > 0)
234 | 		DoNms(detections, nmsThresh);
235 | 
236 | 	return detections;
237 | }
238 | 
239 | 
240 | // prepare img
241 | vector<float> prepareImage(cv::Mat& img) {
242 | 	int c = 3;
243 | 	int h = DETECT_WIDTH;   //net h
244 | 	int w = DETECT_WIDTH;   //net w
245 | 
246 | 	float scale = min(float(w) / img.cols, float(h) / img.rows);
247 | 	auto scaleSize = cv::Size(img.cols * scale, img.rows * scale);
248 | 
249 | 	cv::Mat rgb;
250 | 	cv::cvtColor(img, rgb, CV_BGR2RGB);
251 | 	cv::Mat resized;
252 | 	cv::resize(rgb, resized, scaleSize, 0, 0, INTER_CUBIC);
253 | 
254 | 	cv::Mat cropped(h, w, CV_8UC3, 127);
255 | 	Rect rect((w - scaleSize.width) / 2, (h - scaleSize.height) / 2, scaleSize.width, scaleSize.height);
256 | 	resized.copyTo(cropped(rect));
257 | 
258 | 	cv::Mat img_float;
259 | 	cropped.convertTo(img_float, CV_32FC3, 1.f / 255.0);
260 | 
261 | 
262 | 	//HWC TO CHW
263 | 	vector<Mat> input_channels(c);
264 | 	cv::split(img_float, input_channels);
265 | 
266 | 	vector<float> result(h * w * c);
267 | 	auto data = result.data();
268 | 	int channelLength = h * w;
269 | 	for (int i = 0; i < c; ++i) {
270 | 		memcpy(data, input_channels[i].data, channelLength * sizeof(float));
271 | 		data += channelLength;
272 | 	}
273 | 	return result;
274 | }
275 | 
276 | 
277 | // load engine file
278 | bool readTrtFile(const std::string& engineFile, //name of the engine file
279 | 	IHostMemory*& trtModelStream)  //output buffer for the TensorRT model
280 | {
281 | 	using namespace std;
282 | 	fstream file;
283 | 	cout << "loading filename from:" << engineFile << endl;
284 | 	nvinfer1::IRuntime* trtRuntime;
285 | 	nvonnxparser::IPluginFactory* onnxPlugin = createPluginFactory(gLogger.getTRTLogger());
286 | 	file.open(engineFile, ios::binary | ios::in);
287 | 	file.seekg(0, ios::end);
288 | 	int length = file.tellg();
289 | 	//cout << "length:" << length << endl;
290 | 	file.seekg(0, ios::beg);
291 | 	std::unique_ptr<char[]> data(new char[length]);
292 | 	file.read(data.get(), length);
293 | 	file.close();
294 | 	cout << "load engine done" << endl;
295 | 	std::cout << "deserializing" << endl;
296 | 	trtRuntime = createInferRuntime(gLogger.getTRTLogger());
297 | 	ICudaEngine* engine = trtRuntime->deserializeCudaEngine(data.get(), length, onnxPlugin);
298 | 	cout << "deserialize done" << endl;
299 | 	trtModelStream = engine->serialize();
300 | 
301 | 	return true;
302 | }
303 | 
304 | 
305 | // ONNX模型转为TensorRT引擎
306 | bool onnxToTRTModel(const std::string& modelFile, // onnx文件的名字
307 | 	const std::string& filename,  // TensorRT引擎的名字 
308 | 	IHostMemory*& trtModelStream) // output buffer for the TensorRT model
309 | {
310 | 	// 创建builder
311 | 	IBuilder* builder = createInferBuilder(gLogger.getTRTLogger());
312 | 	assert(builder != nullptr);
313 | 	nvinfer1::INetworkDefinition* network = builder->createNetwork();
314 | 
315 | 	// 解析ONNX模型
316 | 	auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger());
317 | 
318 | 
319 | 	//可选的 - 取消下面的注释可以查看网络中每层的星系信息
320 | 	//config->setPrintLayerInfo(true);
321 | 	//parser->reportParsingInfo();
322 | 
323 | 	//判断是否成功解析ONNX模型
324 | 	if (!parser->parseFromFile(modelFile.c_str(), static_cast<int>(gLogger.getReportableSeverity())))
325 | 	{
326 | 		gLogError << "Failure while parsing ONNX file" << std::endl;
327 | 		return false;
328 | 	}
329 | 
330 | 	// 建立推理引擎
331 | 	builder->setMaxBatchSize(BATCH_SIZE);
332 | 	builder->setMaxWorkspaceSize(1 << 30);
333 | 	builder->setFp16Mode(true);
334 | 	builder->setInt8Mode(gArgs.runInInt8);
335 | 
336 | 	if (gArgs.runInInt8)
337 | 	{
338 | 		samplesCommon::setAllTensorScales(network, 127.0f, 127.0f);
339 | 	}
340 | 
341 | 	cout << "start building engine" << endl;
342 | 	ICudaEngine* engine = builder->buildCudaEngine(*network);
343 | 	cout << "build engine done" << endl;
344 | 	assert(engine);
345 | 
346 | 	// 销毁模型解释器
347 | 	parser->destroy();
348 | 
349 | 	// 序列化引擎
350 | 	trtModelStream = engine->serialize();
351 | 
352 | 	// 保存引擎
353 | 	nvinfer1::IHostMemory* data = engine->serialize();
354 | 	std::ofstream file;
355 | 	file.open(filename, std::ios::binary | std::ios::out);
356 | 	cout << "writing engine file..." << endl;
357 | 	file.write((const char*)data->data(), data->size());
358 | 	cout << "save engine file done" << endl;
359 | 	file.close();
360 | 
361 | 	// 销毁所有相关的东西
362 | 	engine->destroy();
363 | 	network->destroy();
364 | 	builder->destroy();
365 | 
366 | 	return true;
367 | }
368 | 
369 | inline int64_t volume(const nvinfer1::Dims& d)
370 | {
371 | 	return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
372 | }
373 | 
374 | inline unsigned int getElementSize(nvinfer1::DataType t)
375 | {
376 | 	switch (t)
377 | 	{
378 | 	case nvinfer1::DataType::kINT32: return 4;
379 | 	case nvinfer1::DataType::kFLOAT: return 4;
380 | 	case nvinfer1::DataType::kHALF: return 2;
381 | 	case nvinfer1::DataType::kINT8: return 1;
382 | 	}
383 | 	throw std::runtime_error("Invalid DataType.");
384 | 	return 0;
385 | }
386 | 
387 | //执行前向推理
388 | void doInferenceFrieza(IHostMemory* trtModelStream)
389 | {
390 | 	//get engine
391 | 	assert(trtModelStream != nullptr);
392 | 	IRuntime* runtime = createInferRuntime(gLogger);
393 | 	nvonnxparser::IPluginFactory* onnxPlugin = createPluginFactory(gLogger.getTRTLogger());
394 | 	assert(runtime != nullptr);
395 | 	if (gArgs.useDLACore >= 0)
396 | 	{
397 | 		runtime->setDLACore(gArgs.useDLACore);
398 | 	}
399 | 	ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), onnxPlugin);
400 | 
401 | 	// 创建推理引擎
402 | 	assert(engine != nullptr);
403 | 	trtModelStream->destroy();
404 | 	IExecutionContext* context = engine->createExecutionContext();
405 | 	assert(context != nullptr);
406 | 
407 | 	//读取输入数据到缓冲区管理对象中
408 | 	assert(engine->getNbBindings() == 3);
409 | 	void* buffers[3];
410 | 	std::vector<int64_t> bufferSize;
411 | 	int nbBindings = engine->getNbBindings();
412 | 	bufferSize.resize(nbBindings);
413 | 
414 | 	for (int i = 0; i < nbBindings; ++i)
415 | 	{
416 | 		nvinfer1::Dims dims = engine->getBindingDimensions(i);
417 | 		nvinfer1::DataType dtype = engine->getBindingDataType(i);
418 | 		int64_t totalSize = volume(dims) * 1 * getElementSize(dtype);
419 | 		bufferSize[i] = totalSize;
420 | 		CHECK(cudaMalloc(&buffers[i], totalSize));
421 | 	}
422 | 
423 | 	// 创建CUDA流以执行此推断
424 | 	cudaStream_t stream;
425 | 	CHECK(cudaStreamCreate(&stream));
426 | 
427 | 	//define inputImgs inputData outputDetections ...
428 | 	//vector<float> inputData;
429 | 	//inputData.reserve(DETECT_HEIGHT*DETECT_WIDTH*INPUT_CHANNEL*BATCH_SIZE);
430 | 	vector<cv::Mat> inputImgs;
431 | 	vector<DetectionRes> outputs;
432 | 	int outSize1 = bufferSize[1] / sizeof(float);
433 | 	int outSize2 = bufferSize[2] / sizeof(float);
434 | 	float* out1 = new float[outSize1];
435 | 	float* out2 = new float[outSize2];
436 | 
437 | 	int index = 1,
438 | 	batchCount = 0;
439 | 
440 | 	cv::Mat img = cv::imread("F:/TensorRT-6.0.1.5/data/v3tiny/1.jpg");
441 | 	inputImgs.push_back(img);
442 | 	auto t_start_pre = std::chrono::high_resolution_clock::now();
443 | 	vector<float> curInput = prepareImage(img);
444 | 	auto t_end_pre = std::chrono::high_resolution_clock::now();
445 | 	float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count();
446 | 	std::cout << "prepare image take: " << total_pre << " ms." << endl;
447 | 
448 | 	/*
449 | 	inputData.insert(inputData.end(), curInput.begin(), curInput.end());
450 | 	batchCount++;
451 | 	if (batchCount < BATCH_SIZE && i + 1 < fileNames.size())
452 | 	continue;
453 | 	*/
454 | 
455 | 	// DMA the input to the GPU,  execute the batch asynchronously, and DMA it back:
456 | 	// 将数据从主机输入缓冲区异步复制到设备输入缓冲区
457 | 	CHECK(cudaMemcpyAsync(buffers[0], curInput.data(), bufferSize[0], cudaMemcpyHostToDevice, stream));
458 | 
459 | 	// 执行推理
460 | 	auto t_start = std::chrono::high_resolution_clock::now();
461 | 	context->execute(BATCH_SIZE, buffers);
462 | 	auto t_end = std::chrono::high_resolution_clock::now();
463 | 	float total = std::chrono::duration<float, std::milli>(t_end - t_start).count();
464 | 	std::cout << "Inference take: " << total << " ms." << endl;
465 | 
466 | 	CHECK(cudaMemcpyAsync(out1, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream));
467 | 	CHECK(cudaMemcpyAsync(out2, buffers[2], bufferSize[2], cudaMemcpyDeviceToHost, stream));
468 | 	cudaStreamSynchronize(stream);
469 | 
470 | 	float* out = new float[outSize1 + outSize2];
471 | 	out = merge(out1, out2, outSize1, outSize2);
472 | 
473 | 	// postprocess
474 | 	auto t_start_post = std::chrono::high_resolution_clock::now();
475 | 	auto boxes = postProcess(img, out);
476 | 	auto t_end_post = std::chrono::high_resolution_clock::now();
477 | 	float total_post = std::chrono::duration<float, std::milli>(t_end_post - t_start_post).count();
478 | 	std::cout << "Postprocess take: " << total_post << " ms." << endl;
479 | 
480 | 	//print boxes
481 | 	for (int i = 0; i < boxes.size(); ++i)
482 | 	{
483 | 		cout << boxes[i].prob << ", " << boxes[i].x << ", " << boxes[i].y << ", " << boxes[i].w << ", " << boxes[i].h << endl;
484 | 		int x = boxes[i].x,
485 | 			y = boxes[i].y,
486 | 			w = boxes[i].w,
487 | 			h = boxes[i].h;
488 | 		cv::Rect rect = { x, y, w, h };
489 | 		cv::rectangle(img, rect, cv::Scalar(255, 255, 0), 2);
490 | 	}
491 | 
492 | 	cout << "\n" << endl;
493 | 	
494 | 
495 | 	// release the stream and the buffers
496 | 	cudaStreamDestroy(stream);
497 | 	CHECK(cudaFree(buffers[0]));
498 | 	CHECK(cudaFree(buffers[1]));
499 | 	CHECK(cudaFree(buffers[2]));
500 | 
501 | 	// destroy the engine
502 | 	context->destroy();
503 | 	engine->destroy();
504 | 	runtime->destroy();
505 | 
506 | 	cv::imshow("result", img);
507 | 	waitKey(0);
508 | 		
509 | }
510 | 
511 | int main()
512 | {
513 | 	// read imgs list
514 | 
515 | 	// create a TensorRT model from the onnx model and serialize it to a stream
516 | 	IHostMemory* trtModelStream{ nullptr };
517 | 
518 | 	// create and load engine
519 | 	fstream existEngine;
520 | 	existEngine.open(engineFile, ios::in);
521 | 	if (existEngine)
522 | 	{
523 | 		readTrtFile(engineFile, trtModelStream);
524 | 		assert(trtModelStream != nullptr);
525 | 	}
526 | 	else
527 | 	{
528 | 		onnxToTRTModel(onnxFile, engineFile, trtModelStream);
529 | 		assert(trtModelStream != nullptr);
530 | 	}
531 | 
532 | 	//onnxToTRTModel(onnxFile, engineFile, trtModelStream);
533 | 
534 | 	//do inference
535 | 	doInferenceFrieza(trtModelStream);
536 | 
537 | 	return 0;
538 | }


--------------------------------------------------------------------------------