├── README.md ├── darknet2pb ├── convert_weights.py ├── convert_weights_pb.py ├── README.md ├── CODE_OF_CONDUCT.md ├── demo.py ├── yolo_v3_tiny.py ├── yolo_v3.py ├── utils.py └── LICENSE ├── yolo-cluster-anchors.py ├── merge_bn.py └── trt_yolov3_tiny.cpp /README.md: -------------------------------------------------------------------------------- 1 | # cv_tools 2 | 3 | - yolo-cluster-anchors.py 目标检测算法之YOLO系列算法的Anchor聚类,算法原理和使用方法见:https://blog.csdn.net/just_sort/article/details/103386047 。 4 | - darknet2pb 这个文件夹是将https://github.com/mystic123/tensorflow-yolo-v3 这个darknet2pb的工具支持了深度可分离卷积,即支持以mobilenet做yolo的backbone的转换工具。在mystic123的基础上只改动了yolov3-tiny.py,想自己改哪些卷积层按照我的方式添加和修改就ok了。 5 | - merge_bn.py 将Caffe模型的BN层进行折叠,提高网络运行速度。 6 | - trt_yolov3_tiny.cpp 使用TensorRT6.0结合VS2015完成对YOLOv3-Tiny的推理。 -------------------------------------------------------------------------------- /darknet2pb/convert_weights.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import tensorflow as tf 4 | 5 | import yolo_v3 6 | import yolo_v3_tiny 7 | 8 | from utils import load_coco_names, load_weights 9 | 10 | FLAGS = tf.app.flags.FLAGS 11 | 12 | tf.app.flags.DEFINE_string( 13 | 'class_names', 'coco.names', 'File with class names') 14 | tf.app.flags.DEFINE_string( 15 | 'weights_file', 'yolov3.weights', 'Binary file with detector weights') 16 | tf.app.flags.DEFINE_string( 17 | 'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC') 18 | tf.app.flags.DEFINE_bool( 19 | 'tiny', False, 'Use tiny version of YOLOv3') 20 | tf.app.flags.DEFINE_bool( 21 | 'spp', False, 'Use SPP version of YOLOv3') 22 | tf.app.flags.DEFINE_string( 23 | 'ckpt_file', './saved_model/model.ckpt', 'Chceckpoint file') 24 | 25 | 26 | def main(argv=None): 27 | if FLAGS.tiny: 28 | model = yolo_v3_tiny.yolo_v3_tiny 29 | elif FLAGS.spp: 30 | model = yolo_v3.yolo_v3_spp 31 | else: 32 | model = yolo_v3.yolo_v3 33 | 34 | classes = load_coco_names(FLAGS.class_names) 35 | 36 | # placeholder for detector inputs 37 | # any size > 320 will work here 38 | inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) 39 | 40 | with tf.variable_scope('detector'): 41 | detections = model(inputs, len(classes), 42 | data_format=FLAGS.data_format) 43 | load_ops = load_weights(tf.global_variables( 44 | scope='detector'), FLAGS.weights_file) 45 | 46 | saver = tf.train.Saver(tf.global_variables(scope='detector')) 47 | 48 | with tf.Session() as sess: 49 | sess.run(load_ops) 50 | 51 | save_path = saver.save(sess, save_path=FLAGS.ckpt_file) 52 | print('Model saved in path: {}'.format(save_path)) 53 | 54 | 55 | if __name__ == '__main__': 56 | tf.app.run() 57 | -------------------------------------------------------------------------------- /darknet2pb/convert_weights_pb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | import yolo_v3 6 | import yolo_v3_tiny 7 | from PIL import Image, ImageDraw 8 | 9 | from utils import load_weights, load_coco_names, detections_boxes, freeze_graph 10 | 11 | FLAGS = tf.app.flags.FLAGS 12 | 13 | tf.app.flags.DEFINE_string( 14 | 'class_names', 'coco.names', 'File with class names') 15 | tf.app.flags.DEFINE_string( 16 | 'weights_file', 'yolov3.weights', 'Binary file with detector weights') 17 | tf.app.flags.DEFINE_string( 18 | 'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC') 19 | tf.app.flags.DEFINE_string( 20 | 'output_graph', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model output path') 21 | 22 | tf.app.flags.DEFINE_bool( 23 | 'tiny', False, 'Use tiny version of YOLOv3') 24 | tf.app.flags.DEFINE_bool( 25 | 'spp', False, 'Use SPP version of YOLOv3') 26 | tf.app.flags.DEFINE_integer( 27 | 'size', 416, 'Image size') 28 | 29 | 30 | 31 | def main(argv=None): 32 | if FLAGS.tiny: 33 | model = yolo_v3_tiny.yolo_v3_tiny 34 | elif FLAGS.spp: 35 | model = yolo_v3.yolo_v3_spp 36 | else: 37 | model = yolo_v3.yolo_v3 38 | 39 | classes = load_coco_names(FLAGS.class_names) 40 | 41 | # placeholder for detector inputs 42 | inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs") 43 | 44 | with tf.variable_scope('detector'): 45 | detections = model(inputs, len(classes), data_format=FLAGS.data_format) 46 | load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file) 47 | 48 | # Sets the output nodes in the current session 49 | boxes = detections_boxes(detections) 50 | 51 | with tf.Session() as sess: 52 | sess.run(load_ops) 53 | freeze_graph(sess, FLAGS.output_graph) 54 | 55 | if __name__ == '__main__': 56 | tf.app.run() 57 | -------------------------------------------------------------------------------- /darknet2pb/README.md: -------------------------------------------------------------------------------- 1 | # tensorflow-yolo-v3 2 | 3 | Implementation of YOLO v3 object detector in Tensorflow (TF-Slim). Full tutorial can be found [here](https://medium.com/@pawekapica_31302/implementing-yolo-v3-in-tensorflow-tf-slim-c3c55ff59dbe). 4 | 5 | Tested on Python 3.5, Tensorflow 1.11.0 on Ubuntu 16.04. 6 | 7 | ## Todo list: 8 | - [x] YOLO v3 architecture 9 | - [x] Basic working demo 10 | - [x] Weights converter (util for exporting loaded COCO weights as TF checkpoint) 11 | - [ ] Training pipeline 12 | - [ ] More backends 13 | 14 | ## How to run the demo: 15 | To run demo type this in the command line: 16 | 17 | 1. Download COCO class names file: `wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names` 18 | 2. Download and convert model weights: 19 | 1. Download binary file with desired weights: 20 | 1. Full weights: `wget https://pjreddie.com/media/files/yolov3.weights` 21 | 1. Tiny weights: `wget https://pjreddie.com/media/files/yolov3-tiny.weights` 22 | 1. SPP weights: `wget https://pjreddie.com/media/files/yolov3-spp.weights` 23 | 2. Run `python ./convert_weights.py` and `python ./convert_weights_pb.py` 24 | 3. Run `python ./demo.py --input_img --output_img --frozen_model ` 25 | 26 | 27 | ####Optional Flags 28 | 1. convert_weights: 29 | 1. `--class_names` 30 | 1. Path to the class names file 31 | 2. `--weights_file` 32 | 1. Path to the desired weights file 33 | 3. `--data_format` 34 | 1. `NCHW` (gpu only) or `NHWC` 35 | 4. `--tiny` 36 | 1. Use yolov3-tiny 37 | 5. `--spp` 38 | 1. Use yolov3-spp 39 | 6. `--ckpt_file` 40 | 1. Output checkpoint file 41 | 2. convert_weights_pb.py: 42 | 1. `--class_names` 43 | 1. Path to the class names file 44 | 2. `--weights_file` 45 | 1. Path to the desired weights file 46 | 3. `--data_format` 47 | 1. `NCHW` (gpu only) or `NHWC` 48 | 4. `--tiny` 49 | 1. Use yolov3-tiny 50 | 5. `--spp` 51 | 1. Use yolov3-spp 52 | 6. `--output_graph` 53 | 1. Location to write the output .pb graph to 54 | 3. demo.py 55 | 1. `--class_names` 56 | 1. Path to the class names file 57 | 2. `--weights_file` 58 | 1. Path to the desired weights file 59 | 3. `--data_format` 60 | 1. `NCHW` (gpu only) or `NHWC` 61 | 4. `--ckpt_file` 62 | 1. Path to the checkpoint file 63 | 5. `--frozen_model` 64 | 1. Path to the frozen model 65 | 6. `--conf_threshold` 66 | 1. Desired confidence threshold 67 | 7. `--iou_threshold` 68 | 1. Desired iou threshold 69 | 8. `--gpu_memory_fraction` 70 | 1. Fraction of gpu memory to work with 71 | -------------------------------------------------------------------------------- /darknet2pb/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at pawel.kapica@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /darknet2pb/demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from PIL import Image 6 | import time 7 | 8 | import yolo_v3 9 | import yolo_v3_tiny 10 | 11 | from utils import load_coco_names, draw_boxes, get_boxes_and_inputs, get_boxes_and_inputs_pb, non_max_suppression, \ 12 | load_graph, letter_box_image 13 | 14 | FLAGS = tf.app.flags.FLAGS 15 | 16 | tf.app.flags.DEFINE_string( 17 | 'input_img', '', 'Input image') 18 | tf.app.flags.DEFINE_string( 19 | 'output_img', '', 'Output image') 20 | tf.app.flags.DEFINE_string( 21 | 'class_names', 'coco.names', 'File with class names') 22 | tf.app.flags.DEFINE_string( 23 | 'weights_file', 'yolov3.weights', 'Binary file with detector weights') 24 | tf.app.flags.DEFINE_string( 25 | 'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC') 26 | tf.app.flags.DEFINE_string( 27 | 'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file') 28 | tf.app.flags.DEFINE_string( 29 | 'frozen_model', '', 'Frozen tensorflow protobuf model') 30 | tf.app.flags.DEFINE_bool( 31 | 'tiny', False, 'Use tiny version of YOLOv3') 32 | tf.app.flags.DEFINE_bool( 33 | 'spp', False, 'Use SPP version of YOLOv3') 34 | 35 | tf.app.flags.DEFINE_integer( 36 | 'size', 416, 'Image size') 37 | 38 | tf.app.flags.DEFINE_float( 39 | 'conf_threshold', 0.5, 'Confidence threshold') 40 | tf.app.flags.DEFINE_float( 41 | 'iou_threshold', 0.4, 'IoU threshold') 42 | 43 | tf.app.flags.DEFINE_float( 44 | 'gpu_memory_fraction', 1.0, 'Gpu memory fraction to use') 45 | 46 | def main(argv=None): 47 | 48 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) 49 | 50 | config = tf.ConfigProto( 51 | gpu_options=gpu_options, 52 | log_device_placement=False, 53 | ) 54 | 55 | img = Image.open(FLAGS.input_img) 56 | img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128) 57 | img_resized = img_resized.astype(np.float32) 58 | classes = load_coco_names(FLAGS.class_names) 59 | 60 | if FLAGS.frozen_model: 61 | 62 | t0 = time.time() 63 | frozenGraph = load_graph(FLAGS.frozen_model) 64 | print("Loaded graph in {:.2f}s".format(time.time()-t0)) 65 | 66 | boxes, inputs = get_boxes_and_inputs_pb(frozenGraph) 67 | 68 | with tf.Session(graph=frozenGraph, config=config) as sess: 69 | t0 = time.time() 70 | detected_boxes = sess.run( 71 | boxes, feed_dict={inputs: [img_resized]}) 72 | 73 | else: 74 | if FLAGS.tiny: 75 | model = yolo_v3_tiny.yolo_v3_tiny 76 | elif FLAGS.spp: 77 | model = yolo_v3.yolo_v3_spp 78 | else: 79 | model = yolo_v3.yolo_v3 80 | 81 | boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format) 82 | 83 | saver = tf.train.Saver(var_list=tf.global_variables(scope='detector')) 84 | 85 | with tf.Session(config=config) as sess: 86 | t0 = time.time() 87 | saver.restore(sess, FLAGS.ckpt_file) 88 | print('Model restored in {:.2f}s'.format(time.time()-t0)) 89 | 90 | t0 = time.time() 91 | detected_boxes = sess.run( 92 | boxes, feed_dict={inputs: [img_resized]}) 93 | 94 | filtered_boxes = non_max_suppression(detected_boxes, 95 | confidence_threshold=FLAGS.conf_threshold, 96 | iou_threshold=FLAGS.iou_threshold) 97 | print("Predictions found in {:.2f}s".format(time.time() - t0)) 98 | 99 | draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True) 100 | 101 | img.save(FLAGS.output_img) 102 | 103 | 104 | if __name__ == '__main__': 105 | tf.app.run() 106 | -------------------------------------------------------------------------------- /yolo-cluster-anchors.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | import xml.etree.ElementTree as ET 3 | import numpy as np 4 | 5 | 6 | def iou(box, clusters): 7 | """ 8 | 计算一个ground truth边界盒和k个先验框(Anchor)的交并比(IOU)值。 9 | 参数box: 元组或者数据,代表ground truth的长宽。 10 | 参数clusters: 形如(k,2)的numpy数组,其中k是聚类Anchor框的个数 11 | 返回:ground truth和每个Anchor框的交并比。 12 | """ 13 | x = np.minimum(clusters[:, 0], box[0]) 14 | y = np.minimum(clusters[:, 1], box[1]) 15 | if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: 16 | raise ValueError("Box has no area") 17 | intersection = x * y 18 | box_area = box[0] * box[1] 19 | cluster_area = clusters[:, 0] * clusters[:, 1] 20 | iou_ = intersection / (box_area + cluster_area - intersection) 21 | return iou_ 22 | 23 | 24 | def avg_iou(boxes, clusters): 25 | """ 26 | 计算一个ground truth和k个Anchor的交并比的均值。 27 | """ 28 | return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) 29 | 30 | def kmeans(boxes, k, dist=np.median): 31 | """ 32 | 利用IOU值进行K-means聚类 33 | 参数boxes: 形状为(r, 2)的ground truth框,其中r是ground truth的个数 34 | 参数k: Anchor的个数 35 | 参数dist: 距离函数 36 | 返回值:形状为(k, 2)的k个Anchor框 37 | """ 38 | # 即是上面提到的r 39 | rows = boxes.shape[0] 40 | # 距离数组,计算每个ground truth和k个Anchor的距离 41 | distances = np.empty((rows, k)) 42 | # 上一次每个ground truth"距离"最近的Anchor索引 43 | last_clusters = np.zeros((rows,)) 44 | # 设置随机数种子 45 | np.random.seed() 46 | 47 | # 初始化聚类中心,k个簇,从r个ground truth随机选k个 48 | clusters = boxes[np.random.choice(rows, k, replace=False)] 49 | # 开始聚类 50 | while True: 51 | # 计算每个ground truth和k个Anchor的距离,用1-IOU(box,anchor)来计算 52 | for row in range(rows): 53 | distances[row] = 1 - iou(boxes[row], clusters) 54 | # 对每个ground truth,选取距离最小的那个Anchor,并存下索引 55 | nearest_clusters = np.argmin(distances, axis=1) 56 | # 如果当前每个ground truth"距离"最近的Anchor索引和上一次一样,聚类结束 57 | if (last_clusters == nearest_clusters).all(): 58 | break 59 | # 更新簇中心为簇里面所有的ground truth框的均值 60 | for cluster in range(k): 61 | clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0) 62 | # 更新每个ground truth"距离"最近的Anchor索引 63 | last_clusters = nearest_clusters 64 | 65 | return clusters 66 | 67 | # 加载自己的数据集,只需要所有labelimg标注出来的xml文件即可 68 | def load_dataset(path): 69 | dataset = [] 70 | for xml_file in glob.glob("{}/*xml".format(path)): 71 | tree = ET.parse(xml_file) 72 | # 图片高度 73 | height = int(tree.findtext("./size/height")) 74 | # 图片宽度 75 | width = int(tree.findtext("./size/width")) 76 | 77 | for obj in tree.iter("object"): 78 | # 偏移量 79 | xmin = int(obj.findtext("bndbox/xmin")) / width 80 | ymin = int(obj.findtext("bndbox/ymin")) / height 81 | xmax = int(obj.findtext("bndbox/xmax")) / width 82 | ymax = int(obj.findtext("bndbox/ymax")) / height 83 | xmin = np.float64(xmin) 84 | ymin = np.float64(ymin) 85 | xmax = np.float64(xmax) 86 | ymax = np.float64(ymax) 87 | if xmax == xmin or ymax == ymin: 88 | print(xml_file) 89 | # 将Anchor的长宽放入dateset,运行kmeans获得Anchor 90 | dataset.append([xmax - xmin, ymax - ymin]) 91 | return np.array(dataset) 92 | 93 | if __name__ == '__main__': 94 | 95 | ANNOTATIONS_PATH = "F:\Annotations" #xml文件所在文件夹 96 | CLUSTERS = 9 #聚类数量,anchor数量 97 | INPUTDIM = 416 #输入网络大小 98 | 99 | data = load_dataset(ANNOTATIONS_PATH) 100 | out = kmeans(data, k=CLUSTERS) 101 | print('Boxes:') 102 | print(np.array(out)*INPUTDIM) 103 | print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100)) 104 | final_anchors = np.around(out[:, 0] / out[:, 1], decimals=2).tolist() 105 | print("Before Sort Ratios:\n {}".format(final_anchors)) 106 | print("After Sort Ratios:\n {}".format(sorted(final_anchors))) 107 | -------------------------------------------------------------------------------- /darknet2pb/yolo_v3_tiny.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \ 6 | _detection_layer, _upsample 7 | 8 | slim = tf.contrib.slim 9 | 10 | _BATCH_NORM_DECAY = 0.9 11 | _BATCH_NORM_EPSILON = 1e-05 12 | _LEAKY_RELU = 0.1 13 | 14 | _ANCHORS = [(10, 14), (23, 27), (37, 58), 15 | (81, 82), (135, 169), (344, 319)] 16 | 17 | 18 | def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): 19 | """ 20 | Creates YOLO v3 tiny model. 21 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 22 | Dimension batch_size may be undefined. The channel order is RGB. 23 | :param num_classes: number of predicted classes. 24 | :param is_training: whether is training or not. 25 | :param data_format: data format NCHW or NHWC. 26 | :param reuse: whether or not the network and its variables should be reused. 27 | :return: 28 | """ 29 | # it will be needed later on 30 | img_size = inputs.get_shape().as_list()[1:3] 31 | 32 | # transpose the inputs to NCHW 33 | if data_format == 'NCHW': 34 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 35 | 36 | # normalize values to range [0..1] 37 | inputs = inputs / 255 38 | 39 | # set batch norm params 40 | batch_norm_params = { 41 | 'decay': _BATCH_NORM_DECAY, 42 | 'epsilon': _BATCH_NORM_EPSILON, 43 | 'scale': True, 44 | 'is_training': is_training, 45 | 'fused': None, # Use fused batch norm if possible. 46 | } 47 | 48 | with tf.variable_scope('yolo-v3-tiny'): 49 | for i in range(6): 50 | inputs = slim.conv2d(inputs, 16 * pow(2, i), 3, 1, padding='SAME', biases_initializer=None, 51 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 52 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) 53 | 54 | if i == 4: 55 | route_1 = inputs 56 | 57 | if i == 5: 58 | inputs = slim.max_pool2d( 59 | inputs, [2, 2], stride=1, padding="SAME", scope='pool2') 60 | else: 61 | inputs = slim.max_pool2d( 62 | inputs, [2, 2], scope='pool2') 63 | 64 | # inputs = _conv2d_fixed_padding(inputs, 1024, 3) 65 | inputs = slim.separable_conv2d(inputs, num_outputs=None, kernel_size=3, depth_multiplier=1, stride=1, biases_initializer=None, 66 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 67 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, 68 | padding='SAME') 69 | 70 | inputs = slim.conv2d(inputs, 1024, 1, 1, biases_initializer=None, 71 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 72 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='VALID') 73 | 74 | inputs = slim.conv2d(inputs, 256, 1, 1, padding='SAME', biases_initializer=None, 75 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 76 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) 77 | route_2 = inputs 78 | 79 | inputs = slim.conv2d(inputs, 512, 3, 1, padding='SAME', biases_initializer=None, 80 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 81 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) 82 | # inputs = _conv2d_fixed_padding(inputs, 255, 1) 83 | 84 | detect_1 = _detection_layer( 85 | inputs, num_classes, _ANCHORS[3:6], img_size, data_format) 86 | detect_1 = tf.identity(detect_1, name='detect_1') 87 | 88 | inputs = slim.conv2d(route_2, 128, 1, 1, padding='SAME', biases_initializer=None, 89 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 90 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) 91 | upsample_size = route_1.get_shape().as_list() 92 | inputs = _upsample(inputs, upsample_size, data_format) 93 | 94 | inputs = tf.concat([inputs, route_1], 95 | axis=1 if data_format == 'NCHW' else 3) 96 | 97 | inputs = slim.conv2d(inputs, 256, 3, 1, padding='SAME', biases_initializer=None, 98 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU), 99 | normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) 100 | # inputs = _conv2d_fixed_padding(inputs, 255, 1) 101 | 102 | detect_2 = _detection_layer( 103 | inputs, num_classes, _ANCHORS[0:3], img_size, data_format) 104 | detect_2 = tf.identity(detect_2, name='detect_2') 105 | 106 | detections = tf.concat([detect_1, detect_2], axis=1) 107 | detections = tf.identity(detections, name='detections') 108 | return detections -------------------------------------------------------------------------------- /merge_bn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | import numpy as np 5 | import sys 6 | import os 7 | import os.path as osp 8 | import google.protobuf as pb 9 | import google.protobuf.text_format 10 | from argparse import ArgumentParser 11 | import caffe 12 | 13 | caffe.set_mode_cpu() 14 | 15 | def load_and_fill_biases(src_model, src_weights, dst_model, dst_weights): 16 | with open(src_model) as f: 17 | model = caffe.proto.caffe_pb2.NetParameter() 18 | pb.text_format.Merge(f.read(), model) 19 | 20 | for i, layer in enumerate(model.layer): 21 | if layer.type == 'Convolution': # or layer.type == 'Scale': 22 | # Add bias layer if needed 23 | if layer.convolution_param.bias_term == False: 24 | layer.convolution_param.bias_term = True 25 | layer.convolution_param.bias_filler.type = 'constant' 26 | layer.convolution_param.bias_filler.value = 0.0 27 | 28 | with open(dst_model, 'w') as f: 29 | f.write(pb.text_format.MessageToString(model)) 30 | 31 | caffe.set_mode_cpu() 32 | net_src = caffe.Net(src_model, src_weights, caffe.TEST) 33 | net_dst = caffe.Net(dst_model, caffe.TEST) 34 | for key in net_src.params.keys(): 35 | for i in range(len(net_src.params[key])): 36 | net_dst.params[key][i].data[:] = net_src.params[key][i].data[:] 37 | 38 | if dst_weights is not None: 39 | # Store params 40 | pass 41 | 42 | return net_dst 43 | 44 | 45 | def merge_conv_and_bn(net, i_conv, i_bn, i_scale): 46 | # This is based on Kyeheyon's work 47 | assert(i_conv != None) 48 | assert(i_bn != None) 49 | 50 | def copy_double(data): 51 | return np.array(data, copy=True, dtype=np.double) 52 | 53 | key_conv = net._layer_names[i_conv] 54 | key_bn = net._layer_names[i_bn] 55 | key_scale = net._layer_names[i_scale] if i_scale else None 56 | 57 | # Copy 58 | bn_mean = copy_double(net.params[key_bn][0].data) 59 | bn_variance = copy_double(net.params[key_bn][1].data) 60 | num_bn_samples = copy_double(net.params[key_bn][2].data) 61 | 62 | # and Invalidate the BN layer 63 | net.params[key_bn][0].data[:] = 0 64 | net.params[key_bn][1].data[:] = 1 65 | net.params[key_bn][2].data[:] = 1 66 | 67 | if num_bn_samples[0] == 0: 68 | num_bn_samples[0] = 1 69 | 70 | if net.params.has_key(key_scale): 71 | print 'Combine {:s} + {:s} + {:s}'.format(key_conv, key_bn, key_scale) 72 | scale_weight = copy_double(net.params[key_scale][0].data) 73 | scale_bias = copy_double(net.params[key_scale][1].data) 74 | net.params[key_scale][0].data[:] = 1 75 | net.params[key_scale][1].data[:] = 0 76 | 77 | else: 78 | print 'Combine {:s} + {:s}'.format(key_conv, key_bn) 79 | scale_weight = 1 80 | scale_bias = 0 81 | 82 | weight = copy_double(net.params[key_conv][0].data) 83 | bias = copy_double(net.params[key_conv][1].data) 84 | 85 | alpha = scale_weight / np.sqrt(bn_variance / num_bn_samples[0] + 1e-5) 86 | net.params[key_conv][1].data[:] = bias * alpha + (scale_bias - (bn_mean / num_bn_samples[0]) * alpha) 87 | for i in range(len(alpha)): 88 | net.params[key_conv][0].data[i] = weight[i] * alpha[i] 89 | 90 | 91 | def merge_batchnorms_in_net(net): 92 | # for each BN 93 | for i, layer in enumerate(net.layers): 94 | if layer.type != 'BatchNorm': 95 | continue 96 | 97 | l_name = net._layer_names[i] 98 | 99 | l_bottom = net.bottom_names[l_name] 100 | assert(len(l_bottom) == 1) 101 | l_bottom = l_bottom[0] 102 | l_top = net.top_names[l_name] 103 | assert(len(l_top) == 1) 104 | l_top = l_top[0] 105 | 106 | can_be_absorbed = True 107 | 108 | # Search all (bottom) layers 109 | for j in xrange(i - 1, -1, -1): 110 | tops_of_j = net.top_names[net._layer_names[j]] 111 | if l_bottom in tops_of_j: 112 | if net.layers[j].type not in ['Convolution', 'InnerProduct']: 113 | can_be_absorbed = False 114 | else: 115 | # There must be only one layer 116 | conv_ind = j 117 | break 118 | 119 | if not can_be_absorbed: 120 | continue 121 | 122 | # find the following Scale 123 | scale_ind = None 124 | for j in xrange(i + 1, len(net.layers)): 125 | bottoms_of_j = net.bottom_names[net._layer_names[j]] 126 | if l_top in bottoms_of_j: 127 | if scale_ind: 128 | # Followed by two or more layers 129 | scale_ind = None 130 | break 131 | 132 | if net.layers[j].type in ['Scale']: 133 | scale_ind = j 134 | 135 | top_of_j = net.top_names[net._layer_names[j]][0] 136 | if top_of_j == bottoms_of_j[0]: 137 | # On-the-fly => Can be merged 138 | break 139 | 140 | else: 141 | # Followed by a layer which is not 'Scale' 142 | scale_ind = None 143 | break 144 | 145 | 146 | merge_conv_and_bn(net, conv_ind, i, scale_ind) 147 | 148 | return net 149 | 150 | 151 | def process_model(net, src_model, dst_model, func_loop, func_finally): 152 | with open(src_model) as f: 153 | model = caffe.proto.caffe_pb2.NetParameter() 154 | pb.text_format.Merge(f.read(), model) 155 | 156 | for i, layer in enumerate(model.layer): 157 | map(lambda x: x(layer, net, model, i), func_loop) 158 | 159 | map(lambda x: x(net, model), func_finally) 160 | 161 | with open(dst_model, 'w') as f: 162 | f.write(pb.text_format.MessageToString(model)) 163 | 164 | 165 | # Functions to remove (redundant) BN and Scale layers 166 | to_delete_empty = [] 167 | def pick_empty_layers(layer, net, model, i): 168 | if layer.type not in ['BatchNorm', 'Scale']: 169 | return 170 | 171 | bottom = layer.bottom[0] 172 | top = layer.top[0] 173 | 174 | if (bottom != top): 175 | # Not supperted yet 176 | return 177 | 178 | if layer.type == 'BatchNorm': 179 | zero_mean = np.all(net.params[layer.name][0].data == 0) 180 | one_var = np.all(net.params[layer.name][1].data == 1) 181 | 182 | if zero_mean and one_var: 183 | print 'Delete layer: {}'.format(layer.name) 184 | to_delete_empty.append(layer) 185 | 186 | if layer.type == 'Scale': 187 | no_scaling = np.all(net.params[layer.name][0].data == 1) 188 | zero_bias = np.all(net.params[layer.name][1].data == 0) 189 | 190 | if no_scaling and zero_bias: 191 | print 'Delete layer: {}'.format(layer.name) 192 | to_delete_empty.append(layer) 193 | 194 | 195 | def remove_empty_layers(net, model): 196 | map(model.layer.remove, to_delete_empty) 197 | 198 | 199 | # A function to add 'engine: CAFFE' param into 1x1 convolutions 200 | def set_engine_caffe(layer, net, model, i): 201 | if layer.type == 'Convolution': 202 | if layer.convolution_param.kernel_size == 1\ 203 | or (layer.convolution_param.kernel_h == layer.convolution_param.kernel_w == 1): 204 | layer.convolution_param.engine = dict(layer.convolution_param.Engine.items())['CAFFE'] 205 | 206 | 207 | def main(): 208 | # Set default output file names 209 | if args.output_model is None: 210 | file_name = osp.splitext(args.model)[0] 211 | args.output_model = file_name + '_inference.prototxt' 212 | if args.output_weights is None: 213 | file_name = osp.splitext(args.weights)[0] 214 | args.output_weights = file_name + '_inference.caffemodel' 215 | 216 | net = load_and_fill_biases(args.model, args.weights, args.model + '.temp.pt', None) 217 | net = merge_batchnorms_in_net(net) 218 | 219 | process_model(net, args.model + '.temp.pt', args.output_model, 220 | [pick_empty_layers, set_engine_caffe], 221 | [remove_empty_layers]) 222 | 223 | # Store params 224 | net.save(args.output_weights) 225 | 226 | 227 | if __name__ == '__main__': 228 | parser = ArgumentParser( 229 | description="Generate Batch Normalized model for inference") 230 | parser.add_argument('--model', default="MobileNetSSD_deploy.prototxt", help="The net definition prototxt") 231 | parser.add_argument('--weights', default="MobileNetSSD_deploy.caffemodel", help="The weights caffemodel") 232 | parser.add_argument('--output_model') 233 | parser.add_argument('--output_weights') 234 | args = parser.parse_args() 235 | main() -------------------------------------------------------------------------------- /darknet2pb/yolo_v3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | slim = tf.contrib.slim 7 | 8 | _BATCH_NORM_DECAY = 0.9 9 | _BATCH_NORM_EPSILON = 1e-05 10 | _LEAKY_RELU = 0.1 11 | 12 | _ANCHORS = [(10, 13), (16, 30), (33, 23), 13 | (30, 61), (62, 45), (59, 119), 14 | (116, 90), (156, 198), (373, 326)] 15 | 16 | 17 | def darknet53(inputs): 18 | """ 19 | Builds Darknet-53 model. 20 | """ 21 | inputs = _conv2d_fixed_padding(inputs, 32, 3) 22 | inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2) 23 | inputs = _darknet53_block(inputs, 32) 24 | inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2) 25 | 26 | for i in range(2): 27 | inputs = _darknet53_block(inputs, 64) 28 | 29 | inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2) 30 | 31 | for i in range(8): 32 | inputs = _darknet53_block(inputs, 128) 33 | 34 | route_1 = inputs 35 | inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2) 36 | 37 | for i in range(8): 38 | inputs = _darknet53_block(inputs, 256) 39 | 40 | route_2 = inputs 41 | inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2) 42 | 43 | for i in range(4): 44 | inputs = _darknet53_block(inputs, 512) 45 | 46 | return route_1, route_2, inputs 47 | 48 | 49 | def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1): 50 | if strides > 1: 51 | inputs = _fixed_padding(inputs, kernel_size) 52 | inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides, 53 | padding=('SAME' if strides == 1 else 'VALID')) 54 | return inputs 55 | 56 | 57 | def _darknet53_block(inputs, filters): 58 | shortcut = inputs 59 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 60 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 61 | 62 | inputs = inputs + shortcut 63 | return inputs 64 | 65 | 66 | def _spp_block(inputs, data_format='NCHW'): 67 | return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'), 68 | slim.max_pool2d(inputs, 9, 1, 'SAME'), 69 | slim.max_pool2d(inputs, 5, 1, 'SAME'), 70 | inputs], 71 | axis=1 if data_format == 'NCHW' else 3) 72 | 73 | 74 | @tf.contrib.framework.add_arg_scope 75 | def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs): 76 | """ 77 | Pads the input along the spatial dimensions independently of input size. 78 | 79 | Args: 80 | inputs: A tensor of size [batch, channels, height_in, width_in] or 81 | [batch, height_in, width_in, channels] depending on data_format. 82 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation. 83 | Should be a positive integer. 84 | data_format: The input format ('NHWC' or 'NCHW'). 85 | mode: The mode for tf.pad. 86 | 87 | Returns: 88 | A tensor with the same format as the input with the data either intact 89 | (if kernel_size == 1) or padded (if kernel_size > 1). 90 | """ 91 | pad_total = kernel_size - 1 92 | pad_beg = pad_total // 2 93 | pad_end = pad_total - pad_beg 94 | 95 | if kwargs['data_format'] == 'NCHW': 96 | padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], 97 | [pad_beg, pad_end], 98 | [pad_beg, pad_end]], 99 | mode=mode) 100 | else: 101 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 102 | [pad_beg, pad_end], [0, 0]], mode=mode) 103 | return padded_inputs 104 | 105 | 106 | def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False): 107 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 108 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 109 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 110 | 111 | if with_spp: 112 | inputs = _spp_block(inputs, data_format) 113 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 114 | 115 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 116 | inputs = _conv2d_fixed_padding(inputs, filters, 1) 117 | route = inputs 118 | inputs = _conv2d_fixed_padding(inputs, filters * 2, 3) 119 | return route, inputs 120 | 121 | 122 | def _get_size(shape, data_format): 123 | if len(shape) == 4: 124 | shape = shape[1:] 125 | return shape[1:3] if data_format == 'NCHW' else shape[0:2] 126 | 127 | 128 | def _detection_layer(inputs, num_classes, anchors, img_size, data_format): 129 | num_anchors = len(anchors) 130 | predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1, 131 | stride=1, normalizer_fn=None, 132 | activation_fn=None, 133 | biases_initializer=tf.zeros_initializer()) 134 | 135 | shape = predictions.get_shape().as_list() 136 | grid_size = _get_size(shape, data_format) 137 | dim = grid_size[0] * grid_size[1] 138 | bbox_attrs = 5 + num_classes 139 | 140 | if data_format == 'NCHW': 141 | predictions = tf.reshape( 142 | predictions, [-1, num_anchors * bbox_attrs, dim]) 143 | predictions = tf.transpose(predictions, [0, 2, 1]) 144 | 145 | predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs]) 146 | 147 | stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1]) 148 | 149 | anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors] 150 | 151 | box_centers, box_sizes, confidence, classes = tf.split( 152 | predictions, [2, 2, 1, num_classes], axis=-1) 153 | 154 | box_centers = tf.nn.sigmoid(box_centers) 155 | confidence = tf.nn.sigmoid(confidence) 156 | 157 | grid_x = tf.range(grid_size[0], dtype=tf.float32) 158 | grid_y = tf.range(grid_size[1], dtype=tf.float32) 159 | a, b = tf.meshgrid(grid_x, grid_y) 160 | 161 | x_offset = tf.reshape(a, (-1, 1)) 162 | y_offset = tf.reshape(b, (-1, 1)) 163 | 164 | x_y_offset = tf.concat([x_offset, y_offset], axis=-1) 165 | x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2]) 166 | 167 | box_centers = box_centers + x_y_offset 168 | box_centers = box_centers * stride 169 | 170 | anchors = tf.tile(anchors, [dim, 1]) 171 | box_sizes = tf.exp(box_sizes) * anchors 172 | box_sizes = box_sizes * stride 173 | 174 | detections = tf.concat([box_centers, box_sizes, confidence], axis=-1) 175 | 176 | classes = tf.nn.sigmoid(classes) 177 | predictions = tf.concat([detections, classes], axis=-1) 178 | return predictions 179 | 180 | 181 | def _upsample(inputs, out_shape, data_format='NCHW'): 182 | # tf.image.resize_nearest_neighbor accepts input in format NHWC 183 | if data_format == 'NCHW': 184 | inputs = tf.transpose(inputs, [0, 2, 3, 1]) 185 | 186 | if data_format == 'NCHW': 187 | new_height = out_shape[3] 188 | new_width = out_shape[2] 189 | else: 190 | new_height = out_shape[2] 191 | new_width = out_shape[1] 192 | 193 | inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width)) 194 | 195 | # back to NCHW if needed 196 | if data_format == 'NCHW': 197 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 198 | 199 | inputs = tf.identity(inputs, name='upsampled') 200 | return inputs 201 | 202 | 203 | def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False): 204 | """ 205 | Creates YOLO v3 model. 206 | 207 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 208 | Dimension batch_size may be undefined. The channel order is RGB. 209 | :param num_classes: number of predicted classes. 210 | :param is_training: whether is training or not. 211 | :param data_format: data format NCHW or NHWC. 212 | :param reuse: whether or not the network and its variables should be reused. 213 | :param with_spp: whether or not is using spp layer. 214 | :return: 215 | """ 216 | # it will be needed later on 217 | img_size = inputs.get_shape().as_list()[1:3] 218 | 219 | # transpose the inputs to NCHW 220 | if data_format == 'NCHW': 221 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 222 | 223 | # normalize values to range [0..1] 224 | inputs = inputs / 255 225 | 226 | # set batch norm params 227 | batch_norm_params = { 228 | 'decay': _BATCH_NORM_DECAY, 229 | 'epsilon': _BATCH_NORM_EPSILON, 230 | 'scale': True, 231 | 'is_training': is_training, 232 | 'fused': None, # Use fused batch norm if possible. 233 | } 234 | 235 | # Set activation_fn and parameters for conv2d, batch_norm. 236 | with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse): 237 | with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, 238 | normalizer_params=batch_norm_params, 239 | biases_initializer=None, 240 | activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)): 241 | with tf.variable_scope('darknet-53'): 242 | route_1, route_2, inputs = darknet53(inputs) 243 | 244 | with tf.variable_scope('yolo-v3'): 245 | route, inputs = _yolo_block(inputs, 512, data_format, with_spp) 246 | 247 | detect_1 = _detection_layer( 248 | inputs, num_classes, _ANCHORS[6:9], img_size, data_format) 249 | detect_1 = tf.identity(detect_1, name='detect_1') 250 | 251 | inputs = _conv2d_fixed_padding(route, 256, 1) 252 | upsample_size = route_2.get_shape().as_list() 253 | inputs = _upsample(inputs, upsample_size, data_format) 254 | inputs = tf.concat([inputs, route_2], 255 | axis=1 if data_format == 'NCHW' else 3) 256 | 257 | route, inputs = _yolo_block(inputs, 256) 258 | 259 | detect_2 = _detection_layer( 260 | inputs, num_classes, _ANCHORS[3:6], img_size, data_format) 261 | detect_2 = tf.identity(detect_2, name='detect_2') 262 | 263 | inputs = _conv2d_fixed_padding(route, 128, 1) 264 | upsample_size = route_1.get_shape().as_list() 265 | inputs = _upsample(inputs, upsample_size, data_format) 266 | inputs = tf.concat([inputs, route_1], 267 | axis=1 if data_format == 'NCHW' else 3) 268 | 269 | _, inputs = _yolo_block(inputs, 128) 270 | 271 | detect_3 = _detection_layer( 272 | inputs, num_classes, _ANCHORS[0:3], img_size, data_format) 273 | detect_3 = tf.identity(detect_3, name='detect_3') 274 | 275 | detections = tf.concat([detect_1, detect_2, detect_3], axis=1) 276 | detections = tf.identity(detections, name='detections') 277 | return detections 278 | 279 | 280 | def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False): 281 | """ 282 | Creates YOLO v3 with SPP model. 283 | 284 | :param inputs: a 4-D tensor of size [batch_size, height, width, channels]. 285 | Dimension batch_size may be undefined. The channel order is RGB. 286 | :param num_classes: number of predicted classes. 287 | :param is_training: whether is training or not. 288 | :param data_format: data format NCHW or NHWC. 289 | :param reuse: whether or not the network and its variables should be reused. 290 | :return: 291 | """ 292 | return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True) 293 | -------------------------------------------------------------------------------- /darknet2pb/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from PIL import ImageDraw, Image 6 | 7 | 8 | def get_boxes_and_inputs_pb(frozen_graph): 9 | 10 | with frozen_graph.as_default(): 11 | boxes = tf.get_default_graph().get_tensor_by_name("output_boxes:0") 12 | inputs = tf.get_default_graph().get_tensor_by_name("inputs:0") 13 | 14 | return boxes, inputs 15 | 16 | 17 | def get_boxes_and_inputs(model, num_classes, size, data_format): 18 | 19 | inputs = tf.placeholder(tf.float32, [1, size, size, 3]) 20 | 21 | with tf.variable_scope('detector'): 22 | detections = model(inputs, num_classes, 23 | data_format=data_format) 24 | 25 | boxes = detections_boxes(detections) 26 | 27 | return boxes, inputs 28 | 29 | 30 | def load_graph(frozen_graph_filename): 31 | 32 | with tf.gfile.GFile(frozen_graph_filename, "rb") as f: 33 | graph_def = tf.GraphDef() 34 | graph_def.ParseFromString(f.read()) 35 | 36 | with tf.Graph().as_default() as graph: 37 | tf.import_graph_def(graph_def, name="") 38 | 39 | return graph 40 | 41 | 42 | def freeze_graph(sess, output_graph): 43 | 44 | output_node_names = [ 45 | "output_boxes", 46 | "inputs", 47 | ] 48 | output_node_names = ",".join(output_node_names) 49 | 50 | output_graph_def = tf.graph_util.convert_variables_to_constants( 51 | sess, 52 | tf.get_default_graph().as_graph_def(), 53 | output_node_names.split(",") 54 | ) 55 | 56 | with tf.gfile.GFile(output_graph, "wb") as f: 57 | f.write(output_graph_def.SerializeToString()) 58 | 59 | print("{} ops written to {}.".format(len(output_graph_def.node), output_graph)) 60 | 61 | 62 | def load_weights(var_list, weights_file): 63 | """ 64 | Loads and converts pre-trained weights. 65 | :param var_list: list of network variables. 66 | :param weights_file: name of the binary file. 67 | :return: list of assign ops 68 | """ 69 | with open(weights_file, "rb") as fp: 70 | _ = np.fromfile(fp, dtype=np.int32, count=5) 71 | 72 | weights = np.fromfile(fp, dtype=np.float32) 73 | 74 | ptr = 0 75 | i = 0 76 | assign_ops = [] 77 | while i < len(var_list) - 1: 78 | var1 = var_list[i] 79 | var2 = var_list[i + 1] 80 | # do something only if we process conv layer 81 | if 'Conv' in var1.name.split('/')[-2]: 82 | # check type of next layer 83 | if 'BatchNorm' in var2.name.split('/')[-2]: 84 | # load batch norm params 85 | gamma, beta, mean, var = var_list[i + 1:i + 5] 86 | batch_norm_vars = [beta, gamma, mean, var] 87 | for var in batch_norm_vars: 88 | shape = var.shape.as_list() 89 | num_params = np.prod(shape) 90 | var_weights = weights[ptr:ptr + num_params].reshape(shape) 91 | ptr += num_params 92 | assign_ops.append( 93 | tf.assign(var, var_weights, validate_shape=True)) 94 | 95 | # we move the pointer by 4, because we loaded 4 variables 96 | i += 4 97 | elif 'Conv' in var2.name.split('/')[-2]: 98 | # load biases 99 | bias = var2 100 | bias_shape = bias.shape.as_list() 101 | bias_params = np.prod(bias_shape) 102 | bias_weights = weights[ptr:ptr + 103 | bias_params].reshape(bias_shape) 104 | ptr += bias_params 105 | assign_ops.append( 106 | tf.assign(bias, bias_weights, validate_shape=True)) 107 | 108 | # we loaded 1 variable 109 | i += 1 110 | # we can load weights of conv layer 111 | shape = var1.shape.as_list() 112 | num_params = np.prod(shape) 113 | 114 | var_weights = weights[ptr:ptr + num_params].reshape( 115 | (shape[3], shape[2], shape[0], shape[1])) 116 | # remember to transpose to column-major 117 | var_weights = np.transpose(var_weights, (2, 3, 1, 0)) 118 | ptr += num_params 119 | assign_ops.append( 120 | tf.assign(var1, var_weights, validate_shape=True)) 121 | i += 1 122 | 123 | return assign_ops 124 | 125 | 126 | def detections_boxes(detections): 127 | """ 128 | Converts center x, center y, width and height values to coordinates of top left and bottom right points. 129 | 130 | :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5)) 131 | :return: converted detections of same shape as input 132 | """ 133 | center_x, center_y, width, height, attrs = tf.split( 134 | detections, [1, 1, 1, 1, -1], axis=-1) 135 | w2 = width / 2 136 | h2 = height / 2 137 | x0 = center_x - w2 138 | y0 = center_y - h2 139 | x1 = center_x + w2 140 | y1 = center_y + h2 141 | 142 | boxes = tf.concat([x0, y0, x1, y1], axis=-1) 143 | detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes") 144 | return detections 145 | 146 | 147 | def _iou(box1, box2): 148 | """ 149 | Computes Intersection over Union value for 2 bounding boxes 150 | 151 | :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2] 152 | :param box2: same as box1 153 | :return: IoU 154 | """ 155 | b1_x0, b1_y0, b1_x1, b1_y1 = box1 156 | b2_x0, b2_y0, b2_x1, b2_y1 = box2 157 | 158 | int_x0 = max(b1_x0, b2_x0) 159 | int_y0 = max(b1_y0, b2_y0) 160 | int_x1 = min(b1_x1, b2_x1) 161 | int_y1 = min(b1_y1, b2_y1) 162 | 163 | int_area = max(int_x1 - int_x0, 0) * max(int_y1 - int_y0, 0) 164 | 165 | b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0) 166 | b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0) 167 | 168 | # we add small epsilon of 1e-05 to avoid division by 0 169 | iou = int_area / (b1_area + b2_area - int_area + 1e-05) 170 | return iou 171 | 172 | 173 | def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4): 174 | """ 175 | Applies Non-max suppression to prediction boxes. 176 | 177 | :param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence 178 | :param confidence_threshold: the threshold for deciding if prediction is valid 179 | :param iou_threshold: the threshold for deciding if two boxes overlap 180 | :return: dict: class -> [(box, score)] 181 | """ 182 | conf_mask = np.expand_dims( 183 | (predictions_with_boxes[:, :, 4] > confidence_threshold), -1) 184 | predictions = predictions_with_boxes * conf_mask 185 | 186 | result = {} 187 | for i, image_pred in enumerate(predictions): 188 | shape = image_pred.shape 189 | non_zero_idxs = np.nonzero(image_pred) 190 | image_pred = image_pred[non_zero_idxs] 191 | image_pred = image_pred.reshape(-1, shape[-1]) 192 | 193 | bbox_attrs = image_pred[:, :5] 194 | classes = image_pred[:, 5:] 195 | classes = np.argmax(classes, axis=-1) 196 | 197 | unique_classes = list(set(classes.reshape(-1))) 198 | 199 | for cls in unique_classes: 200 | cls_mask = classes == cls 201 | cls_boxes = bbox_attrs[np.nonzero(cls_mask)] 202 | cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]] 203 | cls_scores = cls_boxes[:, -1] 204 | cls_boxes = cls_boxes[:, :-1] 205 | 206 | while len(cls_boxes) > 0: 207 | box = cls_boxes[0] 208 | score = cls_scores[0] 209 | if cls not in result: 210 | result[cls] = [] 211 | result[cls].append((box, score)) 212 | cls_boxes = cls_boxes[1:] 213 | cls_scores = cls_scores[1:] 214 | ious = np.array([_iou(box, x) for x in cls_boxes]) 215 | iou_mask = ious < iou_threshold 216 | cls_boxes = cls_boxes[np.nonzero(iou_mask)] 217 | cls_scores = cls_scores[np.nonzero(iou_mask)] 218 | 219 | return result 220 | 221 | 222 | def load_coco_names(file_name): 223 | names = {} 224 | with open(file_name) as f: 225 | for id, name in enumerate(f): 226 | names[id] = name 227 | return names 228 | 229 | 230 | def draw_boxes(boxes, img, cls_names, detection_size, is_letter_box_image): 231 | draw = ImageDraw.Draw(img) 232 | 233 | for cls, bboxs in boxes.items(): 234 | color = tuple(np.random.randint(0, 256, 3)) 235 | for box, score in bboxs: 236 | box = convert_to_original_size(box, np.array(detection_size), 237 | np.array(img.size), 238 | is_letter_box_image) 239 | draw.rectangle(box, outline=color) 240 | draw.text(box[:2], '{} {:.2f}%'.format( 241 | cls_names[cls], score * 100), fill=color) 242 | 243 | 244 | def convert_to_original_size(box, size, original_size, is_letter_box_image): 245 | if is_letter_box_image: 246 | box = box.reshape(2, 2) 247 | box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size) 248 | box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size) 249 | else: 250 | ratio = original_size / size 251 | box = box.reshape(2, 2) * ratio 252 | return list(box.reshape(-1)) 253 | 254 | 255 | def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray: 256 | """ 257 | Fit image with final image with output_width and output_height. 258 | :param image: PILLOW Image object. 259 | :param output_height: width of the final image. 260 | :param output_width: height of the final image. 261 | :param fill_value: fill value for empty area. Can be uint8 or np.ndarray 262 | :return: numpy image fit within letterbox. dtype=uint8, shape=(output_height, output_width) 263 | """ 264 | 265 | height_ratio = float(output_height)/image.size[1] 266 | width_ratio = float(output_width)/image.size[0] 267 | fit_ratio = min(width_ratio, height_ratio) 268 | fit_height = int(image.size[1] * fit_ratio) 269 | fit_width = int(image.size[0] * fit_ratio) 270 | fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR)) 271 | 272 | if isinstance(fill_value, int): 273 | fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype) 274 | 275 | to_return = np.tile(fill_value, (output_height, output_width, 1)) 276 | pad_top = int(0.5 * (output_height - fit_height)) 277 | pad_left = int(0.5 * (output_width - fit_width)) 278 | to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image 279 | return to_return 280 | 281 | 282 | def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray: 283 | """ 284 | Parameters should have same shape and dimension space. (Width, Height) or (Height, Width) 285 | :param letter_pos: The current position within letterbox image including fill value area. 286 | :param current_size: The size of whole image including fill value area. 287 | :param ori_image_size: The size of image before being letter boxed. 288 | :return: 289 | """ 290 | letter_pos = np.asarray(letter_pos, dtype=np.float) 291 | current_size = np.asarray(current_size, dtype=np.float) 292 | ori_image_size = np.asarray(ori_image_size, dtype=np.float) 293 | final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1]) 294 | pad = 0.5 * (current_size - final_ratio * ori_image_size) 295 | pad = pad.astype(np.int32) 296 | to_return_pos = (letter_pos - pad) / final_ratio 297 | return to_return_pos 298 | -------------------------------------------------------------------------------- /darknet2pb/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /trt_yolov3_tiny.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "NvInfer.h" 14 | #include "NvOnnxParser.h" 15 | #include "NvOnnxParserRuntime.h" 16 | #include "argsParser.h" 17 | #include "logger.h" 18 | #include "common.h" 19 | 20 | using namespace std; 21 | using namespace nvinfer1; 22 | using namespace nvonnxparser; 23 | using namespace cv; 24 | 25 | 26 | // origin params 27 | samplesCommon::Args gArgs; 28 | 29 | // Res params 30 | string onnxFile = "F:/TensorRT-6.0.1.5/data/v3tiny/yolov3-tiny.onnx"; 31 | string engineFile = "F:/TensorRT-6.0.1.5/data/v3tiny/yolov3-tiny.trt"; 32 | 33 | vector labels = { "abnormal" }; 34 | 35 | vector > output_shape = { { 1, 18, 13, 13 },{ 1, 18, 26, 26 } }; 36 | vector > g_masks = { { 3, 4, 5 },{ 0, 1, 2 } }; 37 | vector > g_anchors = { { 10, 14 },{ 23, 27 },{ 37, 58 },{ 81, 82 },{ 135, 169 },{ 344, 319} }; 38 | float obj_threshold = 0.10; 39 | float nms_threshold = 0.45; 40 | 41 | int CATEGORY = 1; 42 | int BATCH_SIZE = 1; 43 | int INPUT_CHANNEL = 3; 44 | int DETECT_WIDTH = 416; 45 | int DETECT_HEIGHT = 416; 46 | 47 | // Res struct & function 48 | typedef struct DetectionRes { 49 | float x, y, w, h, prob; 50 | } DetectionRes; 51 | 52 | float sigmoid(float in) { 53 | return 1.f / (1.f + exp(-in)); 54 | } 55 | float exponential(float in) { 56 | return exp(in); 57 | } 58 | 59 | float* merge(float* out1, float* out2, int bsize_out1, int bsize_out2) 60 | { 61 | float* out_total = new float[bsize_out1 + bsize_out2]; 62 | 63 | for (int j = 0; j < bsize_out1; ++j) 64 | { 65 | int index = j; 66 | out_total[index] = out1[j]; 67 | } 68 | 69 | for (int j = 0; j < bsize_out2; ++j) 70 | { 71 | int index = j + bsize_out1; 72 | out_total[index] = out2[j]; 73 | } 74 | return out_total; 75 | } 76 | 77 | vector split(const string& str, char delim) 78 | { 79 | stringstream ss(str); 80 | string token; 81 | vector container; 82 | while (getline(ss, token, delim)) 83 | { 84 | container.push_back(token); 85 | } 86 | 87 | return container; 88 | } 89 | 90 | 91 | 92 | void DoNms(vector& detections, float nmsThresh) { 93 | auto iouCompute = [](float * lbox, float* rbox) { 94 | float interBox[] = { 95 | max(lbox[0], rbox[0]), //left 96 | min(lbox[0] + lbox[2], rbox[0] + rbox[2]), //right 97 | max(lbox[1], rbox[1]), //top 98 | min(lbox[1] + lbox[3], rbox[1] + rbox[3]), //bottom 99 | }; 100 | 101 | if (interBox[2] >= interBox[3] || interBox[0] >= interBox[1]) 102 | return 0.0f; 103 | 104 | float interBoxS = (interBox[1] - interBox[0] + 1) * (interBox[3] - interBox[2] + 1); 105 | return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS); 106 | }; 107 | 108 | sort(detections.begin(), detections.end(), [=](const DetectionRes & left, const DetectionRes & right) { 109 | return left.prob > right.prob; 110 | }); 111 | 112 | vector result; 113 | for (unsigned int m = 0; m < detections.size(); ++m) { 114 | result.push_back(detections[m]); 115 | for (unsigned int n = m + 1; n < detections.size(); ++n) { 116 | if (iouCompute((float *)(&detections[m]), (float *)(&detections[n])) > nmsThresh) { 117 | detections.erase(detections.begin() + n); 118 | --n; 119 | } 120 | } 121 | } 122 | detections = move(result); 123 | } 124 | 125 | vector postProcess(cv::Mat& image, float * output) { 126 | vector detections; 127 | int total_size = 0; 128 | for (int i = 0; i < output_shape.size(); i++) { 129 | auto shape = output_shape[i]; 130 | int size = 1; 131 | for (int j = 0; j < shape.size(); j++) { 132 | size *= shape[j]; 133 | } 134 | total_size += size; 135 | } 136 | 137 | int offset = 0; 138 | float * transposed_output = new float[total_size]; 139 | float * transposed_output_t = transposed_output; 140 | for (int i = 0; i < output_shape.size(); i++) { 141 | auto shape = output_shape[i]; // nchw 142 | int chw = shape[1] * shape[2] * shape[3]; 143 | int hw = shape[2] * shape[3]; 144 | for (int n = 0; n < shape[0]; n++) { 145 | int offset_n = offset + n * chw; 146 | for (int h = 0; h < shape[2]; h++) { 147 | for (int w = 0; w < shape[3]; w++) { 148 | int h_w = h * shape[3] + w; 149 | for (int c = 0; c < shape[1]; c++) { 150 | int offset_c = offset_n + hw * c + h_w; 151 | *transposed_output_t++ = output[offset_c]; 152 | } 153 | } 154 | } 155 | } 156 | offset += shape[0] * chw; 157 | } 158 | vector > shapes; 159 | for (int i = 0; i < output_shape.size(); i++) { 160 | auto shape = output_shape[i]; 161 | vector tmp = { shape[2], shape[3], 3, 6 }; 162 | shapes.push_back(tmp); 163 | } 164 | 165 | offset = 0; 166 | for (int i = 0; i < output_shape.size(); i++) { 167 | auto masks = g_masks[i]; 168 | vector > anchors; 169 | for (auto mask : masks) 170 | anchors.push_back(g_anchors[mask]); 171 | auto shape = shapes[i]; 172 | for (int h = 0; h < shape[0]; h++) { 173 | int offset_h = offset + h * shape[1] * shape[2] * shape[3]; 174 | for (int w = 0; w < shape[1]; w++) { 175 | int offset_w = offset_h + w * shape[2] * shape[3]; 176 | for (int c = 0; c < shape[2]; c++) { 177 | int offset_c = offset_w + c * shape[3]; 178 | float * ptr = transposed_output + offset_c; 179 | ptr[4] = sigmoid(ptr[4]); 180 | ptr[5] = sigmoid(ptr[5]); 181 | float score = ptr[4] * ptr[5]; 182 | if (score < obj_threshold) 183 | continue; 184 | ptr[0] = sigmoid(ptr[0]); 185 | ptr[1] = sigmoid(ptr[1]); 186 | ptr[2] = exponential(ptr[2]) * anchors[c][0]; 187 | ptr[3] = exponential(ptr[3]) * anchors[c][1]; 188 | 189 | ptr[0] += w; 190 | ptr[1] += h; 191 | ptr[0] /= shape[0]; 192 | ptr[1] /= shape[1]; 193 | ptr[2] /= DETECT_WIDTH; 194 | ptr[3] /= DETECT_WIDTH; 195 | ptr[0] -= ptr[2] / 2; 196 | ptr[1] -= ptr[3] / 2; 197 | 198 | DetectionRes det;; 199 | det.x = ptr[0]; 200 | det.y = ptr[1]; 201 | det.w = ptr[2]; 202 | det.h = ptr[3]; 203 | det.prob = score; 204 | detections.push_back(det); 205 | } 206 | } 207 | } 208 | offset += shape[0] * shape[1] * shape[2] * shape[3]; 209 | } 210 | delete[]transposed_output; 211 | 212 | int h = DETECT_WIDTH; //net h 213 | int w = DETECT_WIDTH; //net w 214 | 215 | //scale bbox to img 216 | int width = image.cols; 217 | int height = image.rows; 218 | float scale = min(float(w) / width, float(h) / height); 219 | float scaleSize[] = { width * scale, height * scale }; 220 | 221 | //correct box 222 | for (auto& bbox : detections) { 223 | bbox.x = (bbox.x * w - (w - scaleSize[0]) / 2.f) / scale; 224 | bbox.y = (bbox.y * h - (h - scaleSize[1]) / 2.f) / scale; 225 | bbox.w *= w; 226 | bbox.h *= h; 227 | bbox.w /= scale; 228 | bbox.h /= scale; 229 | } 230 | 231 | //nms 232 | float nmsThresh = nms_threshold; 233 | if (nmsThresh > 0) 234 | DoNms(detections, nmsThresh); 235 | 236 | return detections; 237 | } 238 | 239 | 240 | // prepare img 241 | vector prepareImage(cv::Mat& img) { 242 | int c = 3; 243 | int h = DETECT_WIDTH; //net h 244 | int w = DETECT_WIDTH; //net w 245 | 246 | float scale = min(float(w) / img.cols, float(h) / img.rows); 247 | auto scaleSize = cv::Size(img.cols * scale, img.rows * scale); 248 | 249 | cv::Mat rgb; 250 | cv::cvtColor(img, rgb, CV_BGR2RGB); 251 | cv::Mat resized; 252 | cv::resize(rgb, resized, scaleSize, 0, 0, INTER_CUBIC); 253 | 254 | cv::Mat cropped(h, w, CV_8UC3, 127); 255 | Rect rect((w - scaleSize.width) / 2, (h - scaleSize.height) / 2, scaleSize.width, scaleSize.height); 256 | resized.copyTo(cropped(rect)); 257 | 258 | cv::Mat img_float; 259 | cropped.convertTo(img_float, CV_32FC3, 1.f / 255.0); 260 | 261 | 262 | //HWC TO CHW 263 | vector input_channels(c); 264 | cv::split(img_float, input_channels); 265 | 266 | vector result(h * w * c); 267 | auto data = result.data(); 268 | int channelLength = h * w; 269 | for (int i = 0; i < c; ++i) { 270 | memcpy(data, input_channels[i].data, channelLength * sizeof(float)); 271 | data += channelLength; 272 | } 273 | return result; 274 | } 275 | 276 | 277 | // load engine file 278 | bool readTrtFile(const std::string& engineFile, //name of the engine file 279 | IHostMemory*& trtModelStream) //output buffer for the TensorRT model 280 | { 281 | using namespace std; 282 | fstream file; 283 | cout << "loading filename from:" << engineFile << endl; 284 | nvinfer1::IRuntime* trtRuntime; 285 | nvonnxparser::IPluginFactory* onnxPlugin = createPluginFactory(gLogger.getTRTLogger()); 286 | file.open(engineFile, ios::binary | ios::in); 287 | file.seekg(0, ios::end); 288 | int length = file.tellg(); 289 | //cout << "length:" << length << endl; 290 | file.seekg(0, ios::beg); 291 | std::unique_ptr data(new char[length]); 292 | file.read(data.get(), length); 293 | file.close(); 294 | cout << "load engine done" << endl; 295 | std::cout << "deserializing" << endl; 296 | trtRuntime = createInferRuntime(gLogger.getTRTLogger()); 297 | ICudaEngine* engine = trtRuntime->deserializeCudaEngine(data.get(), length, onnxPlugin); 298 | cout << "deserialize done" << endl; 299 | trtModelStream = engine->serialize(); 300 | 301 | return true; 302 | } 303 | 304 | 305 | // ONNX模型转为TensorRT引擎 306 | bool onnxToTRTModel(const std::string& modelFile, // onnx文件的名字 307 | const std::string& filename, // TensorRT引擎的名字 308 | IHostMemory*& trtModelStream) // output buffer for the TensorRT model 309 | { 310 | // 创建builder 311 | IBuilder* builder = createInferBuilder(gLogger.getTRTLogger()); 312 | assert(builder != nullptr); 313 | nvinfer1::INetworkDefinition* network = builder->createNetwork(); 314 | 315 | // 解析ONNX模型 316 | auto parser = nvonnxparser::createParser(*network, gLogger.getTRTLogger()); 317 | 318 | 319 | //可选的 - 取消下面的注释可以查看网络中每层的星系信息 320 | //config->setPrintLayerInfo(true); 321 | //parser->reportParsingInfo(); 322 | 323 | //判断是否成功解析ONNX模型 324 | if (!parser->parseFromFile(modelFile.c_str(), static_cast(gLogger.getReportableSeverity()))) 325 | { 326 | gLogError << "Failure while parsing ONNX file" << std::endl; 327 | return false; 328 | } 329 | 330 | // 建立推理引擎 331 | builder->setMaxBatchSize(BATCH_SIZE); 332 | builder->setMaxWorkspaceSize(1 << 30); 333 | builder->setFp16Mode(true); 334 | builder->setInt8Mode(gArgs.runInInt8); 335 | 336 | if (gArgs.runInInt8) 337 | { 338 | samplesCommon::setAllTensorScales(network, 127.0f, 127.0f); 339 | } 340 | 341 | cout << "start building engine" << endl; 342 | ICudaEngine* engine = builder->buildCudaEngine(*network); 343 | cout << "build engine done" << endl; 344 | assert(engine); 345 | 346 | // 销毁模型解释器 347 | parser->destroy(); 348 | 349 | // 序列化引擎 350 | trtModelStream = engine->serialize(); 351 | 352 | // 保存引擎 353 | nvinfer1::IHostMemory* data = engine->serialize(); 354 | std::ofstream file; 355 | file.open(filename, std::ios::binary | std::ios::out); 356 | cout << "writing engine file..." << endl; 357 | file.write((const char*)data->data(), data->size()); 358 | cout << "save engine file done" << endl; 359 | file.close(); 360 | 361 | // 销毁所有相关的东西 362 | engine->destroy(); 363 | network->destroy(); 364 | builder->destroy(); 365 | 366 | return true; 367 | } 368 | 369 | inline int64_t volume(const nvinfer1::Dims& d) 370 | { 371 | return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); 372 | } 373 | 374 | inline unsigned int getElementSize(nvinfer1::DataType t) 375 | { 376 | switch (t) 377 | { 378 | case nvinfer1::DataType::kINT32: return 4; 379 | case nvinfer1::DataType::kFLOAT: return 4; 380 | case nvinfer1::DataType::kHALF: return 2; 381 | case nvinfer1::DataType::kINT8: return 1; 382 | } 383 | throw std::runtime_error("Invalid DataType."); 384 | return 0; 385 | } 386 | 387 | //执行前向推理 388 | void doInferenceFrieza(IHostMemory* trtModelStream) 389 | { 390 | //get engine 391 | assert(trtModelStream != nullptr); 392 | IRuntime* runtime = createInferRuntime(gLogger); 393 | nvonnxparser::IPluginFactory* onnxPlugin = createPluginFactory(gLogger.getTRTLogger()); 394 | assert(runtime != nullptr); 395 | if (gArgs.useDLACore >= 0) 396 | { 397 | runtime->setDLACore(gArgs.useDLACore); 398 | } 399 | ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), onnxPlugin); 400 | 401 | // 创建推理引擎 402 | assert(engine != nullptr); 403 | trtModelStream->destroy(); 404 | IExecutionContext* context = engine->createExecutionContext(); 405 | assert(context != nullptr); 406 | 407 | //读取输入数据到缓冲区管理对象中 408 | assert(engine->getNbBindings() == 3); 409 | void* buffers[3]; 410 | std::vector bufferSize; 411 | int nbBindings = engine->getNbBindings(); 412 | bufferSize.resize(nbBindings); 413 | 414 | for (int i = 0; i < nbBindings; ++i) 415 | { 416 | nvinfer1::Dims dims = engine->getBindingDimensions(i); 417 | nvinfer1::DataType dtype = engine->getBindingDataType(i); 418 | int64_t totalSize = volume(dims) * 1 * getElementSize(dtype); 419 | bufferSize[i] = totalSize; 420 | CHECK(cudaMalloc(&buffers[i], totalSize)); 421 | } 422 | 423 | // 创建CUDA流以执行此推断 424 | cudaStream_t stream; 425 | CHECK(cudaStreamCreate(&stream)); 426 | 427 | //define inputImgs inputData outputDetections ... 428 | //vector inputData; 429 | //inputData.reserve(DETECT_HEIGHT*DETECT_WIDTH*INPUT_CHANNEL*BATCH_SIZE); 430 | vector inputImgs; 431 | vector outputs; 432 | int outSize1 = bufferSize[1] / sizeof(float); 433 | int outSize2 = bufferSize[2] / sizeof(float); 434 | float* out1 = new float[outSize1]; 435 | float* out2 = new float[outSize2]; 436 | 437 | int index = 1, 438 | batchCount = 0; 439 | 440 | cv::Mat img = cv::imread("F:/TensorRT-6.0.1.5/data/v3tiny/1.jpg"); 441 | inputImgs.push_back(img); 442 | auto t_start_pre = std::chrono::high_resolution_clock::now(); 443 | vector curInput = prepareImage(img); 444 | auto t_end_pre = std::chrono::high_resolution_clock::now(); 445 | float total_pre = std::chrono::duration(t_end_pre - t_start_pre).count(); 446 | std::cout << "prepare image take: " << total_pre << " ms." << endl; 447 | 448 | /* 449 | inputData.insert(inputData.end(), curInput.begin(), curInput.end()); 450 | batchCount++; 451 | if (batchCount < BATCH_SIZE && i + 1 < fileNames.size()) 452 | continue; 453 | */ 454 | 455 | // DMA the input to the GPU, execute the batch asynchronously, and DMA it back: 456 | // 将数据从主机输入缓冲区异步复制到设备输入缓冲区 457 | CHECK(cudaMemcpyAsync(buffers[0], curInput.data(), bufferSize[0], cudaMemcpyHostToDevice, stream)); 458 | 459 | // 执行推理 460 | auto t_start = std::chrono::high_resolution_clock::now(); 461 | context->execute(BATCH_SIZE, buffers); 462 | auto t_end = std::chrono::high_resolution_clock::now(); 463 | float total = std::chrono::duration(t_end - t_start).count(); 464 | std::cout << "Inference take: " << total << " ms." << endl; 465 | 466 | CHECK(cudaMemcpyAsync(out1, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream)); 467 | CHECK(cudaMemcpyAsync(out2, buffers[2], bufferSize[2], cudaMemcpyDeviceToHost, stream)); 468 | cudaStreamSynchronize(stream); 469 | 470 | float* out = new float[outSize1 + outSize2]; 471 | out = merge(out1, out2, outSize1, outSize2); 472 | 473 | // postprocess 474 | auto t_start_post = std::chrono::high_resolution_clock::now(); 475 | auto boxes = postProcess(img, out); 476 | auto t_end_post = std::chrono::high_resolution_clock::now(); 477 | float total_post = std::chrono::duration(t_end_post - t_start_post).count(); 478 | std::cout << "Postprocess take: " << total_post << " ms." << endl; 479 | 480 | //print boxes 481 | for (int i = 0; i < boxes.size(); ++i) 482 | { 483 | cout << boxes[i].prob << ", " << boxes[i].x << ", " << boxes[i].y << ", " << boxes[i].w << ", " << boxes[i].h << endl; 484 | int x = boxes[i].x, 485 | y = boxes[i].y, 486 | w = boxes[i].w, 487 | h = boxes[i].h; 488 | cv::Rect rect = { x, y, w, h }; 489 | cv::rectangle(img, rect, cv::Scalar(255, 255, 0), 2); 490 | } 491 | 492 | cout << "\n" << endl; 493 | 494 | 495 | // release the stream and the buffers 496 | cudaStreamDestroy(stream); 497 | CHECK(cudaFree(buffers[0])); 498 | CHECK(cudaFree(buffers[1])); 499 | CHECK(cudaFree(buffers[2])); 500 | 501 | // destroy the engine 502 | context->destroy(); 503 | engine->destroy(); 504 | runtime->destroy(); 505 | 506 | cv::imshow("result", img); 507 | waitKey(0); 508 | 509 | } 510 | 511 | int main() 512 | { 513 | // read imgs list 514 | 515 | // create a TensorRT model from the onnx model and serialize it to a stream 516 | IHostMemory* trtModelStream{ nullptr }; 517 | 518 | // create and load engine 519 | fstream existEngine; 520 | existEngine.open(engineFile, ios::in); 521 | if (existEngine) 522 | { 523 | readTrtFile(engineFile, trtModelStream); 524 | assert(trtModelStream != nullptr); 525 | } 526 | else 527 | { 528 | onnxToTRTModel(onnxFile, engineFile, trtModelStream); 529 | assert(trtModelStream != nullptr); 530 | } 531 | 532 | //onnxToTRTModel(onnxFile, engineFile, trtModelStream); 533 | 534 | //do inference 535 | doInferenceFrieza(trtModelStream); 536 | 537 | return 0; 538 | } --------------------------------------------------------------------------------