├── README.md ├── tf_rfcn_dynamic ├── checkpoint ├── cnn_tools │ ├── __init__.py │ ├── __init__.pyc │ ├── tools.py │ └── tools.pyc ├── psroi_pool_tools │ ├── __init__.py │ ├── __init__.pyc │ ├── psroi_pooling_op.py │ ├── psroi_pooling_op.pyc │ ├── psroi_pooling_op.py~ │ ├── psroi_pooling_op_grad.py │ └── psroi_pooling_op_grad.pyc ├── resnet_rfcn_v2.py └── rpn_tools │ ├── __init__.py │ ├── __init__.pyc │ ├── __init__.py~ │ ├── anchor_target_layer.py │ ├── anchor_target_layer.pyc │ ├── anchor_target_layer.py~ │ ├── fast_rcnn │ ├── __init__.py │ ├── __init__.pyc │ ├── bbox_transform.py │ ├── bbox_transform.pyc │ ├── config.py │ ├── config.pyc │ ├── nms │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── cpu_nms.c │ │ ├── cpu_nms.pyx │ │ ├── cpu_nms.so │ │ ├── gpu_mv.cpp │ │ ├── gpu_mv.hpp │ │ ├── gpu_mv.pyx │ │ ├── gpu_nms.cpp │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── gpu_nms.so │ │ ├── mnc_config.py │ │ ├── mnc_config.pyc │ │ ├── mv.so │ │ ├── mv_kernel.cu │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ ├── nms_wrapper.pyc │ │ └── py_cpu_nms.py │ ├── nms_wrapper.py │ ├── nms_wrapper.pyc │ ├── nms_wrapper.py~ │ ├── test.py │ ├── test.pyc │ ├── train.py │ └── train.pyc │ ├── generate.py │ ├── generate_anchors.py │ ├── generate_anchors.pyc │ ├── mnc_data_layer.py │ ├── mnc_data_layer.pyc │ ├── mnc_data_layer.py~ │ ├── nms │ ├── __init__.py │ ├── __init__.pyc │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── cpu_nms.so │ ├── gpu_mv.cpp │ ├── gpu_mv.hpp │ ├── gpu_mv.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── gpu_nms.so │ ├── mnc_config.py │ ├── mnc_config.pyc │ ├── mv.so │ ├── mv_kernel.cu │ ├── nms_kernel.cu │ ├── nms_wrapper.py │ ├── nms_wrapper.pyc │ └── py_cpu_nms.py │ ├── proposal_layer.py │ ├── proposal_layer.pyc │ ├── proposal_layer.py~ │ ├── proposal_target_layer.py │ ├── proposal_target_layer.pyc │ ├── proposal_target_layer.py~ │ └── utils │ ├── __init__.py │ ├── __init__.pyc │ ├── bbox.c │ ├── bbox.pyx │ ├── blob.py │ ├── blob.pyc │ ├── cython_bbox.so │ ├── mnc_config.py │ ├── mnc_config.pyc │ ├── timer.py │ ├── timer.pyc │ ├── unmap.py │ ├── unmap.pyc │ ├── vis_seg.py │ └── voc_eval.py └── tf_rfcn_fixed ├── cnn_tools ├── __init__.py ├── __init__.pyc ├── tools.py └── tools.pyc ├── psroi_pool_tools ├── __init__.py ├── __init__.pyc ├── psroi_pooling_op.py ├── psroi_pooling_op.pyc ├── psroi_pooling_op_grad.py └── psroi_pooling_op_grad.pyc ├── resnet_rfcn.py └── rpn_tools ├── __init__.py ├── __init__.pyc ├── __init__.py~ ├── anchor_target_layer.py~ ├── anchor_target_layer_modified.py ├── anchor_target_layer_modified.pyc ├── anchor_target_layer_modified3.py~ ├── fast_rcnn ├── __init__.py ├── __init__.pyc ├── bbox_transform.py ├── bbox_transform.pyc ├── config.py ├── config.pyc ├── nms │ ├── __init__.py │ ├── __init__.pyc │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── cpu_nms.so │ ├── gpu_mv.cpp │ ├── gpu_mv.hpp │ ├── gpu_mv.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── gpu_nms.so │ ├── mnc_config.py │ ├── mnc_config.pyc │ ├── mv.so │ ├── mv_kernel.cu │ ├── nms_kernel.cu │ ├── nms_wrapper.py │ ├── nms_wrapper.pyc │ └── py_cpu_nms.py ├── nms_wrapper.py ├── nms_wrapper.pyc ├── nms_wrapper.py~ ├── test.py ├── test.pyc ├── train.py └── train.pyc ├── generate_anchors.py ├── generate_anchors.pyc ├── my_anchor_target_layer_modified.py ├── my_anchor_target_layer_modified.pyc ├── nms ├── __init__.py ├── __init__.pyc ├── cpu_nms.c ├── cpu_nms.pyx ├── cpu_nms.so ├── gpu_mv.cpp ├── gpu_mv.hpp ├── gpu_mv.pyx ├── gpu_nms.cpp ├── gpu_nms.hpp ├── gpu_nms.pyx ├── gpu_nms.so ├── mnc_config.py ├── mnc_config.pyc ├── mv.so ├── mv_kernel.cu ├── nms_kernel.cu ├── nms_wrapper.py ├── nms_wrapper.pyc └── py_cpu_nms.py ├── proposal_layer_modified.py ├── proposal_layer_modified.pyc ├── proposal_target_layer_modified.py ├── proposal_target_layer_modified.pyc ├── proposal_target_layer_modified.py~ ├── roi_pooling_op_grad.py~ └── utils ├── __init__.py ├── __init__.pyc ├── bbox.c ├── bbox.pyx ├── blob.py ├── blob.pyc ├── cython_bbox.so ├── mnc_config.py ├── mnc_config.pyc ├── timer.py ├── timer.pyc ├── unmap.py ├── unmap.pyc ├── utils ├── __init__.py ├── __init__.pyc ├── bbox.pyx ├── blob.py ├── blob.pyc ├── timer.py └── timer.pyc ├── vis_seg.py └── voc_eval.py /README.md: -------------------------------------------------------------------------------- 1 | # tf_rfcn 2 | 3 | This is an experimental tensorflow implementation of R-FCN by: Dai, Jifeng, et al. "R-FCN: Object Detection via Region-based Fully Convolutional Networks." arXiv preprint arXiv:1605.06409 (2016). 4 | 5 | Base trunk is a ResNet (can be 50-101-152 layers). Training is done end-to-end. 6 | 7 | Anchor, proposal, and proposal target layers are based on Ross Girshick's py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn), with some modifications by Orpine in (https://github.com/Orpine/py-R-FCN) for the proposal target layer. 8 | 9 | Training only for the moment, no testing phase yet. 10 | 11 | created by A. Labao under Pros Naval of CVMIG Lab, University of the Philippines 12 | 13 | # Specs 14 | tf_rfcn_fixed : accepts any image (as specified in source folder) and resacles to 600 x 1000, input is JPEG image
15 | tf_rfcn_dynamic : accepts any image size and tensors are adjusted to a size of 600 for the shorter side, input is roidb pkl file. A sample code for making the imdb is [here](https://github.com/alfonsolink/roidb_maker), adopted from the original MNC [code](https://github.com/daijifeng001/MNC) - code has to be modified to your local PASCAL VOC datasets folders. 16 | 17 | # Performance 18 | In terms of end cls accuracy, tf_rfcn_dynamic has an accuracy of 93% after ~70k iterations, with an anchor accuracy of 99% given the PASCAL VOC 2012 SDS dataset, and a 101-layer ResNet trunk. Results are obtained with ImageNet pretrained [weights](https://1drv.ms/f/s!AtPFjf_hfC81kUrPD2Kazg1Gtkz6), which can be called using saver_all_trunkrcnn.restore() -- which sets the base trunk and "rcnn" layers to ImageNet weights ("fc" layers are not included in ImageNet initialization) 19 | 20 | # Requirements 21 | GTX 1070
22 | OpenCV 3.1
23 | Cuda 7.5+
24 | Cudnn 5.0+
25 | tensorflow v10+
26 | and psroi_pooling_op.so installed - check my other git repository [here] (https://github.com/alfonsolink/tensorflow_user_ops) for the psroi_pooling tensorflow wrap) 27 | 28 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "rfcn_end_to_end.ckpt" 2 | all_model_checkpoint_paths: "rfcn_end_to_end.ckpt" 3 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/cnn_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/cnn_tools/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_dynamic/cnn_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/cnn_tools/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/cnn_tools/tools.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/cnn_tools/tools.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/psroi_pool_tools/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = '/home/cvmig_core/tensorflow/bazel-bin/tensorflow/core/user_ops/ps_roipool/psroi_pooling.so' 5 | _psroi_pooling_module = tf.load_op_library(filename) 6 | psroi_pool = _psroi_pooling_module.psroi_pool 7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad 8 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.py~: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = '/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/ps_roipool/psroi_pooling.so' 5 | _psroi_pooling_module = tf.load_op_library(filename) 6 | psroi_pool = _psroi_pooling_module.psroi_pool 7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad 8 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import psroi_pooling_op 4 | 5 | @tf.RegisterShape("PSROIPool") 6 | def _psroi_pool_shape(op): 7 | """Shape function for the RoiPool op. 8 | 9 | """ 10 | dims_data = op.inputs[0].get_shape().as_list() 11 | channels = dims_data[1] 12 | 13 | dims_rois = op.inputs[1].get_shape().as_list() 14 | num_rois = dims_rois[0] 15 | 16 | output_dim = op.get_attr('output_dim') 17 | group_size = op.get_attr('group_size') 18 | pooled_height = group_size 19 | pooled_width = group_size 20 | 21 | output_shape = tf.TensorShape([num_rois, output_dim, pooled_height, pooled_width]) 22 | return [output_shape, output_shape] 23 | 24 | @ops.RegisterGradient("PSROIPool") 25 | def _psroi_pool_grad(op, grad, _): 26 | """The gradients for `roi_pool`. 27 | Args: 28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 29 | to find the inputs and outputs of the original op. 30 | grad: Gradient with respect to the output of the `roi_pool` op. 31 | Returns: 32 | Gradients with respect to the input of `zero_out`. 33 | """ 34 | data = op.inputs[0] 35 | rois = op.inputs[1] 36 | mapping_channel = op.outputs[1] 37 | spatial_scale = op.get_attr('spatial_scale') 38 | 39 | # compute gradient 40 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 41 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale) 42 | 43 | return [data_grad, None] # List of one Tensor, since we have one input 44 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op_grad.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op_grad.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/__init__.py~: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from tools import anchor_target_layer 4 | 5 | def weight_variable(shape): 6 | initial = tf.truncated_normal(shape, stddev=0.1) 7 | return tf.Variable(initial) 8 | 9 | def bias_variable(shape): 10 | initial = tf.constant(0.1, shape=shape) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | def max_pool_2x2(x): 17 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 18 | 19 | def process_anno(x): 20 | x = np.fromstring(x, dtype=int, sep=" ") 21 | x = np.reshape(x,(-1,5)) 22 | l = x[:,0] 23 | gt = x[:,1:] 24 | return l, gt 25 | 26 | ''' 27 | num_labels = 2 28 | batch_size = 10 29 | filename_queue = tf.train.string_input_producer(["train_files.csv"], num_epochs=None, shuffle=False) 30 | reader = tf.TextLineReader() 31 | key, value = reader.read(filename_queue) 32 | record_defaults = [[""],[0]] 33 | image_path, label = tf.decode_csv(value, field_delim=",", record_defaults=record_defaults) 34 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3) 35 | my_img = tf.cast(my_img,tf.float32) / 255 36 | my_img = tf.image.resize_images(my_img,224,224) 37 | min_after_dequeue = 5 38 | capacity = min_after_dequeue + 3 * batch_size 39 | im_batch, lb_batch = tf.train.batch([my_img,label],batch_size=batch_size,capacity=capacity) 40 | ''' 41 | 42 | num_labels = 25 43 | batch_size = 1 44 | reader = tf.TextLineReader() 45 | filename_queue = tf.train.string_input_producer(["train_rcnn_files.csv"], num_epochs=None, shuffle=False) 46 | key, value = reader.read(filename_queue) 47 | image_path, anno_path = tf.decode_csv(value, record_defaults=[[""],[""]], field_delim=",") 48 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3) 49 | my_img = tf.cast(my_img,tf.float32) / 255 50 | my_img = tf.image.resize_images(my_img,224,224) 51 | anno = tf.read_file(anno_path) 52 | labels, gt_box = tf.py_func(process_anno,[anno],[tf.int64,tf.int64]) 53 | labels = tf.reshape(tf.concat(1, labels), [-1,1]) 54 | gt_box = tf.reshape(tf.concat(1, gt_box), [-1,4]) 55 | 56 | 57 | ''' 58 | x, y1_ = im_batch, lb_batch 59 | l_b = tf.to_int64(y1_) 60 | l = tf.one_hot(indices=l_b,depth=num_labels,on_value=1.0,off_value=0.0,axis=-1) 61 | l = tf.cast(l,tf.float32) 62 | 63 | W_conv1 = weight_variable([3,3,3,64]) 64 | b_conv1 = bias_variable([64]) 65 | h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1) 66 | 67 | W_conv2 = weight_variable([3,3,64,64]) 68 | b_conv2 = bias_variable([64]) 69 | h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2) 70 | 71 | h_max1 = max_pool_2x2(h_conv2) 72 | 73 | W_conv3 = weight_variable([3,3,64,128]) 74 | b_conv3 = bias_variable([128]) 75 | h_conv3 = tf.nn.relu(conv2d(h_max1, W_conv3) + b_conv3) 76 | 77 | W_conv4 = weight_variable([3,3,128,128]) 78 | b_conv4 = bias_variable([128]) 79 | h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4) 80 | 81 | h_max2 = max_pool_2x2(h_conv4) 82 | 83 | W_conv5 = weight_variable([3,3,128,256]) 84 | b_conv5 = bias_variable([256]) 85 | h_conv5 = tf.nn.relu(conv2d(h_max2, W_conv5) + b_conv5) 86 | 87 | W_conv6 = weight_variable([3,3,256,256]) 88 | b_conv6 = bias_variable([256]) 89 | h_conv6 = tf.nn.relu(conv2d(h_conv5, W_conv6) + b_conv6) 90 | 91 | W_conv7 = weight_variable([3,3,256,256]) 92 | b_conv7 = bias_variable([256]) 93 | h_conv7 = tf.nn.relu(conv2d(h_conv6, W_conv7) + b_conv7) 94 | 95 | h_max3 = max_pool_2x2(h_conv7) 96 | 97 | W_conv7 = weight_variable([3,3,256,512]) 98 | b_conv7 = bias_variable([512]) 99 | h_conv7 = tf.nn.relu(conv2d(h_max3, W_conv7) + b_conv7) 100 | 101 | W_conv8 = weight_variable([3,3,512,512]) 102 | b_conv8 = bias_variable([512]) 103 | h_conv8 = tf.nn.relu(conv2d(h_conv7, W_conv8) + b_conv8) 104 | 105 | W_conv9 = weight_variable([3,3,512,512]) 106 | b_conv9 = bias_variable([512]) 107 | h_conv9 = tf.nn.relu(conv2d(h_conv8, W_conv9) + b_conv9) 108 | 109 | h_max4 = max_pool_2x2(h_conv9) 110 | 111 | W_conv10 = weight_variable([3,3,512,512]) 112 | b_conv10 = bias_variable([512]) 113 | h_conv10 = tf.nn.relu(conv2d(h_max4, W_conv10) + b_conv10) 114 | 115 | W_conv11 = weight_variable([3,3,512,512]) 116 | b_conv11 = bias_variable([512]) 117 | h_conv11 = tf.nn.relu(conv2d(h_conv10, W_conv11) + b_conv11) 118 | 119 | W_conv12 = weight_variable([3,3,512,512]) 120 | b_conv12 = bias_variable([512]) 121 | h_conv12 = tf.nn.relu(conv2d(h_conv11, W_conv12) + b_conv12) 122 | 123 | #RPN 124 | 125 | W_rpn3 = weight_variable([3,3,512,512]) 126 | b_rpn3 = bias_variable([512]) 127 | h_rpn3 = tf.nn.relu(conv2d(h_conv12, W_rpn3) + b_rpn3) 128 | 129 | W_cls_score = weight_variable([1,1,512,18]) 130 | b_cls_score = bias_variable([18]) 131 | h_cls_score = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score) 132 | 133 | W_bbox_pred = weight_variable([1,1,512,36]) 134 | b_bbox_pred = bias_variable([36]) 135 | h_bbox_pred = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score) 136 | 137 | h_cls_score_reshape = tf.reshape(h_cls_score, [2,-1]) 138 | 139 | 140 | 141 | #print h_cls_score 142 | 143 | h_fc1 = tf.reshape(h_cls_score_reshape, [-1, 14*14*18]) 144 | W_fc1 = weight_variable([14*14*18,2]) 145 | b_fc1 = bias_variable([2]) 146 | y_conv = tf.matmul(h_fc1, W_fc1) + b_fc1 147 | 148 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y1_) 149 | loss = tf.reduce_mean(cross_entropy) 150 | train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) 151 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(l,1)) 152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 153 | 154 | 155 | ''' 156 | init = tf.initialize_all_variables() 157 | with tf.Session() as sess: 158 | sess.run(init) 159 | coord = tf.train.Coordinator() 160 | threads = tf.train.start_queue_runners(sess=sess,coord=coord) 161 | 162 | for i in range(10000000): 163 | print labels.eval() 164 | #sess.run(train_step) 165 | #if i%10 == 0: 166 | #print "Iteration " + str(i) 167 | #print "Loss: " + str(loss.eval()) 168 | #print "Accuracy: " + str(accuracy.eval()) 169 | #print "" 170 | 171 | coord.request_stop() 172 | coord.join(threads) 173 | sess.close() 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/anchor_target_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/anchor_target_layer.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/bbox_transform.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/config.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/cpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_mv.hpp: -------------------------------------------------------------------------------- 1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num, 2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num, 3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num, 4 | float* finalize_output_mask, int* finalize_output_box, const int device_id); 5 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_mv.pyx: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | assert sizeof(int) == sizeof(np.int32_t) 6 | 7 | cdef extern from "gpu_mv.hpp": 8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id); 9 | 10 | # boxes: n * 4 11 | # masks: n * 1 * 21 * 21 12 | # scores: n * 21 13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes, 14 | np.ndarray[np.float32_t, ndim=4] all_masks, 15 | np.ndarray[np.int32_t, ndim=1] candidate_inds, 16 | np.ndarray[np.int32_t, ndim=1] candidate_start, 17 | np.ndarray[np.float32_t, ndim=1] candidate_weights, 18 | np.int32_t image_height, 19 | np.int32_t image_width, 20 | np.int32_t device_id = 0): 21 | cdef int all_box_num = all_boxes.shape[0] 22 | cdef int boxes_dim = all_boxes.shape[1] 23 | cdef int mask_size = all_masks.shape[3] 24 | cdef int candidate_num = candidate_inds.shape[0] 25 | cdef int result_num = candidate_start.shape[0] 26 | cdef np.ndarray[np.float32_t, ndim=4] \ 27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32) 28 | cdef np.ndarray[np.int32_t, ndim=2] \ 29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32) 30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id) 31 | return result_mask, result_box 32 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mnc_config.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mv.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mv.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // Multitask Network Cascade 3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 4 | // Copyright (c) 2016, Haozhi Qi 5 | // Licensed under The MIT License [see LICENSE for details] 6 | // -------------------------------------------------------- 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | from mnc_config import cfg 9 | from gpu_nms import gpu_nms 10 | from cpu_nms import cpu_nms 11 | 12 | 13 | def nms(dets, thresh): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | 24 | def apply_nms(all_boxes, thresh): 25 | """Apply non-maximum suppression to all predicted boxes output by the 26 | test_net method. 27 | """ 28 | num_classes = len(all_boxes) 29 | num_images = len(all_boxes[0]) 30 | nms_boxes = [[[] for _ in xrange(num_images)] 31 | for _ in xrange(num_classes)] 32 | for cls_ind in xrange(num_classes): 33 | for im_ind in xrange(num_images): 34 | dets = all_boxes[cls_ind][im_ind] 35 | if dets == []: 36 | continue 37 | keep = nms(dets, thresh) 38 | if len(keep) == 0: 39 | continue 40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 41 | return nms_boxes 42 | 43 | 44 | def apply_nms_mask(all_boxes, all_masks, thresh): 45 | num_classes = len(all_boxes) 46 | num_images = len(all_boxes[0]) 47 | nms_boxes = [[[] for _ in xrange(num_images)] 48 | for _ in xrange(num_classes)] 49 | nms_masks = [[[] for _ in xrange(num_images)] 50 | for _ in xrange(num_classes)] 51 | for cls_ind in xrange(num_classes): 52 | for im_ind in xrange(num_images): 53 | dets = all_boxes[cls_ind][im_ind] 54 | masks = all_masks[cls_ind][im_ind] 55 | if dets == []: 56 | continue 57 | keep = nms(dets, thresh) 58 | if len(keep) == 0: 59 | continue 60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy() 62 | return nms_boxes, nms_masks 63 | 64 | 65 | def apply_nms_mask_single(box, mask, thresh): 66 | if box == []: 67 | return box, mask 68 | keep = nms(box, thresh) 69 | if len(keep) == 0: 70 | return box, mask 71 | return box[keep, :].copy(), mask[keep, :].copy() 72 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from config import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh, force_cpu=False): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS and not force_cpu: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.py~: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh, force_cpu=False): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS and not force_cpu: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/test.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network.""" 9 | 10 | import caffe 11 | from fast_rcnn.config import cfg 12 | import roi_data_layer.roidb as rdl_roidb 13 | from utils.timer import Timer 14 | import numpy as np 15 | import os 16 | 17 | from caffe.proto import caffe_pb2 18 | import google.protobuf as pb2 19 | 20 | class SolverWrapper(object): 21 | """A simple wrapper around Caffe's solver. 22 | This wrapper gives us control over he snapshotting process, which we 23 | use to unnormalize the learned bounding-box regression weights. 24 | """ 25 | 26 | def __init__(self, solver_prototxt, roidb, output_dir, 27 | pretrained_model=None): 28 | """Initialize the SolverWrapper.""" 29 | self.output_dir = output_dir 30 | 31 | if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and 32 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS): 33 | # RPN can only use precomputed normalization because there are no 34 | # fixed statistics to compute a priori 35 | assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED 36 | 37 | if cfg.TRAIN.BBOX_REG: 38 | print 'Computing bounding-box regression targets...' 39 | self.bbox_means, self.bbox_stds = \ 40 | rdl_roidb.add_bbox_regression_targets(roidb) 41 | print 'done' 42 | 43 | self.solver = caffe.SGDSolver(solver_prototxt) 44 | if pretrained_model is not None: 45 | print ('Loading pretrained model ' 46 | 'weights from {:s}').format(pretrained_model) 47 | self.solver.net.copy_from(pretrained_model) 48 | 49 | self.solver_param = caffe_pb2.SolverParameter() 50 | with open(solver_prototxt, 'rt') as f: 51 | pb2.text_format.Merge(f.read(), self.solver_param) 52 | 53 | self.solver.net.layers[0].set_roidb(roidb) 54 | 55 | def snapshot(self): 56 | """Take a snapshot of the network after unnormalizing the learned 57 | bounding-box regression weights. This enables easy use at test-time. 58 | """ 59 | net = self.solver.net 60 | 61 | scale_bbox_params = (cfg.TRAIN.BBOX_REG and 62 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS and 63 | net.params.has_key('bbox_pred')) 64 | 65 | if scale_bbox_params: 66 | # save original values 67 | orig_0 = net.params['bbox_pred'][0].data.copy() 68 | orig_1 = net.params['bbox_pred'][1].data.copy() 69 | 70 | # scale and shift with bbox reg unnormalization; then save snapshot 71 | net.params['bbox_pred'][0].data[...] = \ 72 | (net.params['bbox_pred'][0].data * 73 | self.bbox_stds[:, np.newaxis]) 74 | net.params['bbox_pred'][1].data[...] = \ 75 | (net.params['bbox_pred'][1].data * 76 | self.bbox_stds + self.bbox_means) 77 | 78 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX 79 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') 80 | filename = (self.solver_param.snapshot_prefix + infix + 81 | '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') 82 | filename = os.path.join(self.output_dir, filename) 83 | 84 | net.save(str(filename)) 85 | print 'Wrote snapshot to: {:s}'.format(filename) 86 | 87 | if scale_bbox_params: 88 | # restore net to original state 89 | net.params['bbox_pred'][0].data[...] = orig_0 90 | net.params['bbox_pred'][1].data[...] = orig_1 91 | return filename 92 | 93 | def train_model(self, max_iters): 94 | """Network training loop.""" 95 | last_snapshot_iter = -1 96 | timer = Timer() 97 | model_paths = [] 98 | while self.solver.iter < max_iters: 99 | # Make one SGD update 100 | timer.tic() 101 | self.solver.step(1) 102 | timer.toc() 103 | if self.solver.iter % (10 * self.solver_param.display) == 0: 104 | print 'speed: {:.3f}s / iter'.format(timer.average_time) 105 | 106 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: 107 | last_snapshot_iter = self.solver.iter 108 | model_paths.append(self.snapshot()) 109 | 110 | if last_snapshot_iter != self.solver.iter: 111 | model_paths.append(self.snapshot()) 112 | return model_paths 113 | 114 | def get_training_roidb(imdb): 115 | """Returns a roidb (Region of Interest database) for use in training.""" 116 | if cfg.TRAIN.USE_FLIPPED: 117 | print 'Appending horizontally-flipped training examples...' 118 | imdb.append_flipped_images() 119 | print 'done' 120 | 121 | print 'Preparing training data...' 122 | rdl_roidb.prepare_roidb(imdb) 123 | print 'done' 124 | 125 | return imdb.roidb 126 | 127 | def filter_roidb(roidb): 128 | """Remove roidb entries that have no usable RoIs.""" 129 | 130 | def is_valid(entry): 131 | # Valid images have: 132 | # (1) At least one foreground RoI OR 133 | # (2) At least one background RoI 134 | overlaps = entry['max_overlaps'] 135 | # find boxes with sufficient overlap 136 | fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] 137 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 138 | bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & 139 | (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 140 | # image is only valid if such boxes exist 141 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 142 | return valid 143 | 144 | num = len(roidb) 145 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 146 | num_after = len(filtered_roidb) 147 | print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, 148 | num, num_after) 149 | return filtered_roidb 150 | 151 | def train_net(solver_prototxt, roidb, output_dir, 152 | pretrained_model=None, max_iters=40000): 153 | """Train a Fast R-CNN network.""" 154 | 155 | roidb = filter_roidb(roidb) 156 | sw = SolverWrapper(solver_prototxt, roidb, output_dir, 157 | pretrained_model=pretrained_model) 158 | 159 | print 'Solving...' 160 | model_paths = sw.train_model(max_iters) 161 | print 'done solving' 162 | return model_paths 163 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/fast_rcnn/train.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/train.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/generate.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from fast_rcnn.config import cfg 9 | from utils.blob import im_list_to_blob 10 | from utils.timer import Timer 11 | import numpy as np 12 | import cv2 13 | 14 | def _vis_proposals(im, dets, thresh=0.5): 15 | """Draw detected bounding boxes.""" 16 | inds = np.where(dets[:, -1] >= thresh)[0] 17 | if len(inds) == 0: 18 | return 19 | 20 | class_name = 'obj' 21 | im = im[:, :, (2, 1, 0)] 22 | fig, ax = plt.subplots(figsize=(12, 12)) 23 | ax.imshow(im, aspect='equal') 24 | for i in inds: 25 | bbox = dets[i, :4] 26 | score = dets[i, -1] 27 | 28 | ax.add_patch( 29 | plt.Rectangle((bbox[0], bbox[1]), 30 | bbox[2] - bbox[0], 31 | bbox[3] - bbox[1], fill=False, 32 | edgecolor='red', linewidth=3.5) 33 | ) 34 | ax.text(bbox[0], bbox[1] - 2, 35 | '{:s} {:.3f}'.format(class_name, score), 36 | bbox=dict(facecolor='blue', alpha=0.5), 37 | fontsize=14, color='white') 38 | 39 | ax.set_title(('{} detections with ' 40 | 'p({} | box) >= {:.1f}').format(class_name, class_name, 41 | thresh), 42 | fontsize=14) 43 | plt.axis('off') 44 | plt.tight_layout() 45 | plt.draw() 46 | 47 | def _get_image_blob(im): 48 | """Converts an image into a network input. 49 | 50 | Arguments: 51 | im (ndarray): a color image in BGR order 52 | 53 | Returns: 54 | blob (ndarray): a data blob holding an image pyramid 55 | im_scale_factors (list): list of image scales (relative to im) used 56 | in the image pyramid 57 | """ 58 | im_orig = im.astype(np.float32, copy=True) 59 | im_orig -= cfg.PIXEL_MEANS 60 | 61 | im_shape = im_orig.shape 62 | im_size_min = np.min(im_shape[0:2]) 63 | im_size_max = np.max(im_shape[0:2]) 64 | 65 | processed_ims = [] 66 | 67 | assert len(cfg.TEST.SCALES) == 1 68 | target_size = cfg.TEST.SCALES[0] 69 | 70 | im_scale = float(target_size) / float(im_size_min) 71 | # Prevent the biggest axis from being more than MAX_SIZE 72 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 73 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 74 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 75 | interpolation=cv2.INTER_LINEAR) 76 | im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] 77 | processed_ims.append(im) 78 | 79 | # Create a blob to hold the input images 80 | blob = im_list_to_blob(processed_ims) 81 | 82 | return blob, im_info 83 | 84 | def im_proposals(net, im): 85 | """Generate RPN proposals on a single image.""" 86 | blobs = {} 87 | blobs['data'], blobs['im_info'] = _get_image_blob(im) 88 | net.blobs['data'].reshape(*(blobs['data'].shape)) 89 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) 90 | blobs_out = net.forward( 91 | data=blobs['data'].astype(np.float32, copy=False), 92 | im_info=blobs['im_info'].astype(np.float32, copy=False)) 93 | 94 | scale = blobs['im_info'][0, 2] 95 | boxes = blobs_out['rois'][:, 1:].copy() / scale 96 | scores = blobs_out['scores'].copy() 97 | return boxes, scores 98 | 99 | def imdb_proposals(net, imdb): 100 | """Generate RPN proposals on all images in an imdb.""" 101 | 102 | _t = Timer() 103 | imdb_boxes = [[] for _ in xrange(imdb.num_images)] 104 | for i in xrange(imdb.num_images): 105 | im = cv2.imread(imdb.image_path_at(i)) 106 | _t.tic() 107 | imdb_boxes[i], scores = im_proposals(net, im) 108 | _t.toc() 109 | print 'im_proposals: {:d}/{:d} {:.3f}s' \ 110 | .format(i + 1, imdb.num_images, _t.average_time) 111 | if 0: 112 | dets = np.hstack((imdb_boxes[i], scores)) 113 | # from IPython import embed; embed() 114 | _vis_proposals(im, dets[:3, :], thresh=0.9) 115 | plt.show() 116 | 117 | return imdb_boxes 118 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 11 | # 12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 13 | # >> anchors 14 | # 15 | # anchors = 16 | # 17 | # -83 -39 100 56 18 | # -175 -87 192 104 19 | # -359 -183 376 200 20 | # -55 -55 72 72 21 | # -119 -119 136 136 22 | # -247 -247 264 264 23 | # -35 -79 52 96 24 | # -79 -167 96 184 25 | # -167 -343 184 360 26 | 27 | #array([[ -83., -39., 100., 56.], 28 | # [-175., -87., 192., 104.], 29 | # [-359., -183., 376., 200.], 30 | # [ -55., -55., 72., 72.], 31 | # [-119., -119., 136., 136.], 32 | # [-247., -247., 264., 264.], 33 | # [ -35., -79., 52., 96.], 34 | # [ -79., -167., 96., 184.], 35 | # [-167., -343., 184., 360.]]) 36 | 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 38 | scales=2**np.arange(3, 6)): 39 | """ 40 | Generate anchor (reference) windows by enumerating aspect ratios X 41 | scales wrt a reference (0, 0, 15, 15) window. 42 | """ 43 | 44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 45 | ratio_anchors = _ratio_enum(base_anchor, ratios) 46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 47 | for i in xrange(ratio_anchors.shape[0])]) 48 | return anchors 49 | 50 | def _whctrs(anchor): 51 | """ 52 | Return width, height, x center, and y center for an anchor (window). 53 | """ 54 | 55 | w = anchor[2] - anchor[0] + 1 56 | h = anchor[3] - anchor[1] + 1 57 | x_ctr = anchor[0] + 0.5 * (w - 1) 58 | y_ctr = anchor[1] + 0.5 * (h - 1) 59 | return w, h, x_ctr, y_ctr 60 | 61 | def _mkanchors(ws, hs, x_ctr, y_ctr): 62 | """ 63 | Given a vector of widths (ws) and heights (hs) around a center 64 | (x_ctr, y_ctr), output a set of anchors (windows). 65 | """ 66 | 67 | ws = ws[:, np.newaxis] 68 | hs = hs[:, np.newaxis] 69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 70 | y_ctr - 0.5 * (hs - 1), 71 | x_ctr + 0.5 * (ws - 1), 72 | y_ctr + 0.5 * (hs - 1))) 73 | return anchors 74 | 75 | def _ratio_enum(anchor, ratios): 76 | """ 77 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 78 | """ 79 | 80 | w, h, x_ctr, y_ctr = _whctrs(anchor) 81 | size = w * h 82 | size_ratios = size / ratios 83 | ws = np.round(np.sqrt(size_ratios)) 84 | hs = np.round(ws * ratios) 85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 86 | return anchors 87 | 88 | def _scale_enum(anchor, scales): 89 | """ 90 | Enumerate a set of anchors for each scale wrt an anchor. 91 | """ 92 | 93 | w, h, x_ctr, y_ctr = _whctrs(anchor) 94 | ws = w * scales 95 | hs = h * scales 96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 97 | return anchors 98 | 99 | if __name__ == '__main__': 100 | import time 101 | t = time.time() 102 | a = generate_anchors() 103 | print time.time() - t 104 | print a 105 | from IPython import embed; embed() 106 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/generate_anchors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/generate_anchors.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/mnc_data_layer.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import yaml 4 | 5 | #import caffe 6 | from mnc_config import cfg 7 | from utils.blob import prep_im_for_blob, im_list_to_blob 8 | 9 | MNC_MODE = False 10 | 11 | class MNCDataLayer(): 12 | """ 13 | Provide image, image w/h/scale, gt boxes/masks and mask info to upper layers 14 | """ 15 | 16 | def setup(self, roidb): 17 | #layer_params = yaml.load(self.param_str_) 18 | self._cur = 0 19 | self.set_roidb(roidb) 20 | self._num_classes = 21 21 | self._name_to_top_map = {} 22 | # data blob: holds a batch of N images, each with 3 channels 23 | #top[0].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) 24 | self._name_to_top_map['data'] = 0 25 | assert(cfg.TRAIN.HAS_RPN, 'Use RPN for this project') 26 | # Just pseudo setup 27 | #top[1].reshape(1, 3) 28 | self._name_to_top_map['im_info'] = 1 29 | #top[2].reshape(1, 4) 30 | self._name_to_top_map['gt_boxes'] = 2 31 | if MNC_MODE: 32 | top[3].reshape(1, 21, 21) 33 | self._name_to_top_map['gt_masks'] = 3 34 | top[4].reshape(1, 3) 35 | self._name_to_top_map['mask_info'] = 4 36 | #assert len(top) == len(self._name_to_top_map) 37 | 38 | #def reshape(self, bottom, top): 39 | # """Reshaping happens during the call to forward.""" 40 | # pass 41 | 42 | def forward(self): 43 | """Get blobs and copy them into this layer's top blob vector.""" 44 | blobs = self._get_next_minibatch() 45 | return blobs 46 | #for blob_name, blob in blobs.iteritems(): 47 | #top_ind = self._name_to_top_map[blob_name] 48 | # Reshape net's input blobs 49 | #top[top_ind].reshape(*blob.shape) 50 | # Copy data into net's input blobs 51 | #top[top_ind].data[...] = blob.astype(np.float32, copy=False) 52 | 53 | 54 | def backward(self, top, propagate_down, bottom): 55 | """This layer does not propagate gradients.""" 56 | pass 57 | 58 | def set_roidb(self, roidb): 59 | """Set the roidb to be used by this layer during training.""" 60 | self._roidb = roidb 61 | self._shuffle_roidb_inds() 62 | 63 | def set_maskdb(self, maskdb): 64 | self._maskdb = maskdb 65 | self._shuffle_roidb_inds() 66 | 67 | def _shuffle_roidb_inds(self): 68 | """Randomly permute the training roidb.""" 69 | if cfg.TRAIN.ASPECT_GROUPING: 70 | widths = np.array([r['width'] for r in self._roidb]) 71 | heights = np.array([r['height'] for r in self._roidb]) 72 | horz = (widths >= heights) 73 | vert = np.logical_not(horz) 74 | horz_inds = np.where(horz)[0] 75 | vert_inds = np.where(vert)[0] 76 | inds = np.hstack(( 77 | np.random.permutation(horz_inds), 78 | np.random.permutation(vert_inds))) 79 | inds = np.reshape(inds, (-1, 2)) 80 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 81 | inds = np.reshape(inds[row_perm, :], (-1,)) 82 | self._perm = inds 83 | else: 84 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 85 | self._cur = 0 86 | 87 | def _get_image_blob(self, roidb, scale_inds): 88 | """Builds an input blob from the images in the roidb at the specified 89 | scales. 90 | """ 91 | num_images = 1 # len(roidb) 92 | processed_ims = [] 93 | im_scales = [] 94 | for i in xrange(num_images): 95 | im = cv2.imread(roidb['image']) 96 | if roidb['flipped']: 97 | im = im[:, ::-1, :] 98 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 99 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 100 | cfg.TRAIN.MAX_SIZE) 101 | im_scales.append(im_scale) 102 | processed_ims.append(im) 103 | # Create a blob to hold the input images 104 | blob = im_list_to_blob(processed_ims) 105 | return blob, im_scales 106 | 107 | def _get_next_minibatch(self): 108 | """ 109 | Return the blobs to be used for the next minibatch. 110 | """ 111 | assert cfg.TRAIN.IMS_PER_BATCH == 1, 'Only single batch forwarding is supported' 112 | 113 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 114 | self._shuffle_roidb_inds() 115 | db_inds = self._perm[self._cur] 116 | self._cur += 1 117 | #print self._cur 118 | roidb = self._roidb[db_inds] 119 | 120 | random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1) 121 | im_blob, im_scales = self._get_image_blob(roidb, random_scale_inds) 122 | 123 | gt_label = np.where(roidb['gt_classes'] != 0)[0] 124 | gt_boxes = np.hstack((roidb['boxes'][gt_label, :] * im_scales[0], 125 | roidb['gt_classes'][gt_label, np.newaxis])).astype(np.float32) 126 | blobs = { 127 | 'data': im_blob, 128 | 'gt_boxes': gt_boxes, 129 | 'im_info': np.array([[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) 130 | } 131 | 132 | if MNC_MODE: 133 | maskdb = self._maskdb[db_inds] 134 | mask_list = maskdb['gt_masks'] 135 | mask_max_x = maskdb['mask_max'][0] 136 | mask_max_y = maskdb['mask_max'][1] 137 | gt_masks = np.zeros((len(mask_list), mask_max_y, mask_max_x)) 138 | mask_info = np.zeros((len(mask_list), 2)) 139 | for j in xrange(len(mask_list)): 140 | mask = mask_list[j] 141 | mask_x = mask.shape[1] 142 | mask_y = mask.shape[0] 143 | gt_masks[j, 0:mask_y, 0:mask_x] = mask 144 | mask_info[j, 0] = mask_y 145 | mask_info[j, 1] = mask_x 146 | blobs['gt_masks'] = gt_masks 147 | blobs['mask_info'] = mask_info 148 | 149 | return blobs 150 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/mnc_data_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/mnc_data_layer.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/mnc_data_layer.py~: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import yaml 4 | 5 | import caffe 6 | from mnc_config import cfg 7 | from utils.blob import prep_im_for_blob, im_list_to_blob 8 | 9 | MNC_MODE = False 10 | 11 | class MNCDataLayer(): 12 | """ 13 | Provide image, image w/h/scale, gt boxes/masks and mask info to upper layers 14 | """ 15 | 16 | def setup(self, roidb): 17 | #layer_params = yaml.load(self.param_str_) 18 | self._cur = 0 19 | self.set_roidb(roidb) 20 | self._num_classes = 21 21 | self._name_to_top_map = {} 22 | # data blob: holds a batch of N images, each with 3 channels 23 | #top[0].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) 24 | self._name_to_top_map['data'] = 0 25 | assert(cfg.TRAIN.HAS_RPN, 'Use RPN for this project') 26 | # Just pseudo setup 27 | #top[1].reshape(1, 3) 28 | self._name_to_top_map['im_info'] = 1 29 | #top[2].reshape(1, 4) 30 | self._name_to_top_map['gt_boxes'] = 2 31 | if MNC_MODE: 32 | top[3].reshape(1, 21, 21) 33 | self._name_to_top_map['gt_masks'] = 3 34 | top[4].reshape(1, 3) 35 | self._name_to_top_map['mask_info'] = 4 36 | #assert len(top) == len(self._name_to_top_map) 37 | 38 | #def reshape(self, bottom, top): 39 | # """Reshaping happens during the call to forward.""" 40 | # pass 41 | 42 | def forward(self): 43 | """Get blobs and copy them into this layer's top blob vector.""" 44 | blobs = self._get_next_minibatch() 45 | return blobs 46 | #for blob_name, blob in blobs.iteritems(): 47 | #top_ind = self._name_to_top_map[blob_name] 48 | # Reshape net's input blobs 49 | #top[top_ind].reshape(*blob.shape) 50 | # Copy data into net's input blobs 51 | #top[top_ind].data[...] = blob.astype(np.float32, copy=False) 52 | 53 | 54 | def backward(self, top, propagate_down, bottom): 55 | """This layer does not propagate gradients.""" 56 | pass 57 | 58 | def set_roidb(self, roidb): 59 | """Set the roidb to be used by this layer during training.""" 60 | self._roidb = roidb 61 | self._shuffle_roidb_inds() 62 | 63 | def set_maskdb(self, maskdb): 64 | self._maskdb = maskdb 65 | self._shuffle_roidb_inds() 66 | 67 | def _shuffle_roidb_inds(self): 68 | """Randomly permute the training roidb.""" 69 | if cfg.TRAIN.ASPECT_GROUPING: 70 | widths = np.array([r['width'] for r in self._roidb]) 71 | heights = np.array([r['height'] for r in self._roidb]) 72 | horz = (widths >= heights) 73 | vert = np.logical_not(horz) 74 | horz_inds = np.where(horz)[0] 75 | vert_inds = np.where(vert)[0] 76 | inds = np.hstack(( 77 | np.random.permutation(horz_inds), 78 | np.random.permutation(vert_inds))) 79 | inds = np.reshape(inds, (-1, 2)) 80 | row_perm = np.random.permutation(np.arange(inds.shape[0])) 81 | inds = np.reshape(inds[row_perm, :], (-1,)) 82 | self._perm = inds 83 | else: 84 | self._perm = np.random.permutation(np.arange(len(self._roidb))) 85 | self._cur = 0 86 | 87 | def _get_image_blob(self, roidb, scale_inds): 88 | """Builds an input blob from the images in the roidb at the specified 89 | scales. 90 | """ 91 | num_images = 1 # len(roidb) 92 | processed_ims = [] 93 | im_scales = [] 94 | for i in xrange(num_images): 95 | im = cv2.imread(roidb['image']) 96 | if roidb['flipped']: 97 | im = im[:, ::-1, :] 98 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 99 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 100 | cfg.TRAIN.MAX_SIZE) 101 | im_scales.append(im_scale) 102 | processed_ims.append(im) 103 | # Create a blob to hold the input images 104 | blob = im_list_to_blob(processed_ims) 105 | return blob, im_scales 106 | 107 | def _get_next_minibatch(self): 108 | """ 109 | Return the blobs to be used for the next minibatch. 110 | """ 111 | assert cfg.TRAIN.IMS_PER_BATCH == 1, 'Only single batch forwarding is supported' 112 | 113 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): 114 | self._shuffle_roidb_inds() 115 | db_inds = self._perm[self._cur] 116 | self._cur += 1 117 | #print self._cur 118 | roidb = self._roidb[db_inds] 119 | 120 | random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1) 121 | im_blob, im_scales = self._get_image_blob(roidb, random_scale_inds) 122 | 123 | gt_label = np.where(roidb['gt_classes'] != 0)[0] 124 | gt_boxes = np.hstack((roidb['boxes'][gt_label, :] * im_scales[0], 125 | roidb['gt_classes'][gt_label, np.newaxis])).astype(np.float32) 126 | blobs = { 127 | 'data': im_blob, 128 | 'gt_boxes': gt_boxes, 129 | 'im_info': np.array([[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) 130 | } 131 | 132 | if MNC_MODE: 133 | maskdb = self._maskdb[db_inds] 134 | mask_list = maskdb['gt_masks'] 135 | mask_max_x = maskdb['mask_max'][0] 136 | mask_max_y = maskdb['mask_max'][1] 137 | gt_masks = np.zeros((len(mask_list), mask_max_y, mask_max_x)) 138 | mask_info = np.zeros((len(mask_list), 2)) 139 | for j in xrange(len(mask_list)): 140 | mask = mask_list[j] 141 | mask_x = mask.shape[1] 142 | mask_y = mask.shape[0] 143 | gt_masks[j, 0:mask_y, 0:mask_x] = mask 144 | mask_info[j, 0] = mask_y 145 | mask_info[j, 1] = mask_x 146 | blobs['gt_masks'] = gt_masks 147 | blobs['mask_info'] = mask_info 148 | 149 | return blobs -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/cpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/gpu_mv.hpp: -------------------------------------------------------------------------------- 1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num, 2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num, 3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num, 4 | float* finalize_output_mask, int* finalize_output_box, const int device_id); 5 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/gpu_mv.pyx: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | assert sizeof(int) == sizeof(np.int32_t) 6 | 7 | cdef extern from "gpu_mv.hpp": 8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id); 9 | 10 | # boxes: n * 4 11 | # masks: n * 1 * 21 * 21 12 | # scores: n * 21 13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes, 14 | np.ndarray[np.float32_t, ndim=4] all_masks, 15 | np.ndarray[np.int32_t, ndim=1] candidate_inds, 16 | np.ndarray[np.int32_t, ndim=1] candidate_start, 17 | np.ndarray[np.float32_t, ndim=1] candidate_weights, 18 | np.int32_t image_height, 19 | np.int32_t image_width, 20 | np.int32_t device_id = 0): 21 | cdef int all_box_num = all_boxes.shape[0] 22 | cdef int boxes_dim = all_boxes.shape[1] 23 | cdef int mask_size = all_masks.shape[3] 24 | cdef int candidate_num = candidate_inds.shape[0] 25 | cdef int result_num = candidate_start.shape[0] 26 | cdef np.ndarray[np.float32_t, ndim=4] \ 27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32) 28 | cdef np.ndarray[np.int32_t, ndim=2] \ 29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32) 30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id) 31 | return result_mask, result_box 32 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/mnc_config.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/mv.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/mv.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // Multitask Network Cascade 3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 4 | // Copyright (c) 2016, Haozhi Qi 5 | // Licensed under The MIT License [see LICENSE for details] 6 | // -------------------------------------------------------- 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | from mnc_config import cfg 9 | from gpu_nms import gpu_nms 10 | from cpu_nms import cpu_nms 11 | 12 | 13 | def nms(dets, thresh): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | 24 | def apply_nms(all_boxes, thresh): 25 | """Apply non-maximum suppression to all predicted boxes output by the 26 | test_net method. 27 | """ 28 | num_classes = len(all_boxes) 29 | num_images = len(all_boxes[0]) 30 | nms_boxes = [[[] for _ in xrange(num_images)] 31 | for _ in xrange(num_classes)] 32 | for cls_ind in xrange(num_classes): 33 | for im_ind in xrange(num_images): 34 | dets = all_boxes[cls_ind][im_ind] 35 | if dets == []: 36 | continue 37 | keep = nms(dets, thresh) 38 | if len(keep) == 0: 39 | continue 40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 41 | return nms_boxes 42 | 43 | 44 | def apply_nms_mask(all_boxes, all_masks, thresh): 45 | num_classes = len(all_boxes) 46 | num_images = len(all_boxes[0]) 47 | nms_boxes = [[[] for _ in xrange(num_images)] 48 | for _ in xrange(num_classes)] 49 | nms_masks = [[[] for _ in xrange(num_images)] 50 | for _ in xrange(num_classes)] 51 | for cls_ind in xrange(num_classes): 52 | for im_ind in xrange(num_images): 53 | dets = all_boxes[cls_ind][im_ind] 54 | masks = all_masks[cls_ind][im_ind] 55 | if dets == []: 56 | continue 57 | keep = nms(dets, thresh) 58 | if len(keep) == 0: 59 | continue 60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy() 62 | return nms_boxes, nms_masks 63 | 64 | 65 | def apply_nms_mask_single(box, mask, thresh): 66 | if box == []: 67 | return box, mask 68 | keep = nms(box, thresh) 69 | if len(keep) == 0: 70 | return box, mask 71 | return box[keep, :].copy(), mask[keep, :].copy() 72 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/nms_wrapper.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | #import caffe 9 | import numpy as np 10 | import yaml 11 | from fast_rcnn.config import cfg 12 | from generate_anchors import generate_anchors 13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes 14 | from fast_rcnn.nms_wrapper import nms 15 | 16 | DEBUG = False 17 | 18 | 19 | """ 20 | Outputs object detection proposals by applying estimated bounding-box 21 | transformations to a set of regular boxes (called "anchors"). 22 | """ 23 | 24 | 25 | _feat_stride = 16 26 | _anchors = generate_anchors() 27 | _num_anchors = 9 28 | phase = 'TRAIN' 29 | 30 | 31 | def forward_proposal_op(bottom_0, bottom_1, bottom_2): 32 | # Algorithm: 33 | # 34 | # for each (H, W) location i 35 | # generate A anchor boxes centered on cell i 36 | # apply predicted bbox deltas at cell i to each of the A anchors 37 | # clip predicted boxes to image 38 | # remove predicted boxes with either height or width < threshold 39 | # sort all (proposal, score) pairs by score from highest to lowest 40 | # take top pre_nms_topN proposals before NMS 41 | # apply NMS with threshold 0.7 to remaining proposals 42 | # take after_nms_topN proposals after NMS 43 | # return the top proposals (-> RoIs top, scores top) 44 | 45 | assert bottom_0.shape[0] == 1, \ 46 | 'Only single item batches are supported' 47 | 48 | cfg_key = str(phase) # either 'TRAIN' or 'TEST' 49 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 50 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 51 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 52 | min_size = cfg[cfg_key].RPN_MIN_SIZE 53 | 54 | # the first set of _num_anchors channels are bg probs 55 | # the second set are the fg probs, which we want 56 | scores = bottom_0[:, _num_anchors:, :, :] 57 | bbox_deltas = bottom_1 58 | im_info = bottom_2 59 | 60 | if DEBUG: 61 | print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) 62 | print 'scale: {}'.format(im_info[2]) 63 | 64 | # 1. Generate proposals from bbox deltas and shifted anchors 65 | height, width = scores.shape[-2:] 66 | 67 | if DEBUG: 68 | print 'score map size: {}'.format(scores.shape) 69 | 70 | # Enumerate all shifts 71 | shift_x = np.arange(0, width) * _feat_stride 72 | shift_y = np.arange(0, height) * _feat_stride 73 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 74 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 75 | shift_x.ravel(), shift_y.ravel())).transpose() 76 | 77 | # Enumerate all shifted anchors: 78 | # 79 | # add A anchors (1, A, 4) to 80 | # cell K shifts (K, 1, 4) to get 81 | # shift anchors (K, A, 4) 82 | # reshape to (K*A, 4) shifted anchors 83 | A = _num_anchors 84 | K = shifts.shape[0] 85 | anchors = _anchors.reshape((1, A, 4)) + \ 86 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 87 | anchors = anchors.reshape((K * A, 4)) 88 | 89 | # Transpose and reshape predicted bbox transformations to get them 90 | # into the same order as the anchors: 91 | # 92 | # bbox deltas will be (1, 4 * A, H, W) format 93 | # transpose to (1, H, W, 4 * A) 94 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 95 | # in slowest to fastest order 96 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 97 | 98 | # Same story for the scores: 99 | # 100 | # scores are (1, A, H, W) format 101 | # transpose to (1, H, W, A) 102 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 103 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 104 | 105 | # Convert anchors into proposals via bbox transformations 106 | proposals = bbox_transform_inv(anchors, bbox_deltas) 107 | 108 | # 2. clip predicted boxes to image 109 | proposals = clip_boxes(proposals, im_info[:2]) 110 | 111 | # 3. remove predicted boxes with either height or width < threshold 112 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 113 | keep = _filter_boxes(proposals, min_size * im_info[2]) 114 | proposals = proposals[keep, :] 115 | scores = scores[keep] 116 | 117 | # 4. sort all (proposal, score) pairs by score from highest to lowest 118 | # 5. take top pre_nms_topN (e.g. 6000) 119 | order = scores.ravel().argsort()[::-1] 120 | if pre_nms_topN > 0: 121 | order = order[:pre_nms_topN] 122 | proposals = proposals[order, :] 123 | scores = scores[order] 124 | 125 | # 6. apply nms (e.g. threshold = 0.7) 126 | # 7. take after_nms_topN (e.g. 300) 127 | # 8. return the top proposals (-> RoIs top) 128 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 129 | if post_nms_topN > 0: 130 | keep = keep[:post_nms_topN] 131 | proposals = proposals[keep, :] 132 | scores = scores[keep] 133 | 134 | # Output rois blob 135 | # Our RPN implementation only supports a single input image, so all 136 | # batch inds are 0 137 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 138 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 139 | 140 | return blob 141 | 142 | # [Optional] output scores blob 143 | 144 | 145 | def backward(top, propagate_down, bottom): 146 | """This layer does not propagate gradients.""" 147 | pass 148 | 149 | def reshape(bottom, top): 150 | """Reshaping happens during the call to forward.""" 151 | pass 152 | 153 | def _filter_boxes(boxes, min_size): 154 | """Remove all boxes with any side smaller than min_size.""" 155 | ws = boxes[:, 2] - boxes[:, 0] + 1 156 | hs = boxes[:, 3] - boxes[:, 1] + 1 157 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 158 | return keep 159 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/proposal_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/proposal_layer.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/proposal_layer.py~: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import caffe 9 | import numpy as np 10 | import yaml 11 | from fast_rcnn.config import cfg 12 | from generate_anchors import generate_anchors 13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes 14 | from fast_rcnn.nms_wrapper import nms 15 | 16 | DEBUG = False 17 | 18 | 19 | """ 20 | Outputs object detection proposals by applying estimated bounding-box 21 | transformations to a set of regular boxes (called "anchors"). 22 | """ 23 | 24 | 25 | _feat_stride = 16 26 | _anchors = generate_anchors() 27 | _num_anchors = 9 28 | phase = 'TRAIN' 29 | 30 | 31 | def forward_proposal_op(bottom_0, bottom_1, bottom_2): 32 | # Algorithm: 33 | # 34 | # for each (H, W) location i 35 | # generate A anchor boxes centered on cell i 36 | # apply predicted bbox deltas at cell i to each of the A anchors 37 | # clip predicted boxes to image 38 | # remove predicted boxes with either height or width < threshold 39 | # sort all (proposal, score) pairs by score from highest to lowest 40 | # take top pre_nms_topN proposals before NMS 41 | # apply NMS with threshold 0.7 to remaining proposals 42 | # take after_nms_topN proposals after NMS 43 | # return the top proposals (-> RoIs top, scores top) 44 | 45 | assert bottom_0.shape[0] == 1, \ 46 | 'Only single item batches are supported' 47 | 48 | cfg_key = str(phase) # either 'TRAIN' or 'TEST' 49 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 50 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 51 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 52 | min_size = cfg[cfg_key].RPN_MIN_SIZE 53 | 54 | # the first set of _num_anchors channels are bg probs 55 | # the second set are the fg probs, which we want 56 | scores = bottom_0[:, _num_anchors:, :, :] 57 | bbox_deltas = bottom_1 58 | im_info = bottom_2 59 | 60 | if DEBUG: 61 | print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) 62 | print 'scale: {}'.format(im_info[2]) 63 | 64 | # 1. Generate proposals from bbox deltas and shifted anchors 65 | height, width = scores.shape[-2:] 66 | 67 | if DEBUG: 68 | print 'score map size: {}'.format(scores.shape) 69 | 70 | # Enumerate all shifts 71 | shift_x = np.arange(0, width) * _feat_stride 72 | shift_y = np.arange(0, height) * _feat_stride 73 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 74 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 75 | shift_x.ravel(), shift_y.ravel())).transpose() 76 | 77 | # Enumerate all shifted anchors: 78 | # 79 | # add A anchors (1, A, 4) to 80 | # cell K shifts (K, 1, 4) to get 81 | # shift anchors (K, A, 4) 82 | # reshape to (K*A, 4) shifted anchors 83 | A = _num_anchors 84 | K = shifts.shape[0] 85 | anchors = _anchors.reshape((1, A, 4)) + \ 86 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 87 | anchors = anchors.reshape((K * A, 4)) 88 | 89 | # Transpose and reshape predicted bbox transformations to get them 90 | # into the same order as the anchors: 91 | # 92 | # bbox deltas will be (1, 4 * A, H, W) format 93 | # transpose to (1, H, W, 4 * A) 94 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 95 | # in slowest to fastest order 96 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 97 | 98 | # Same story for the scores: 99 | # 100 | # scores are (1, A, H, W) format 101 | # transpose to (1, H, W, A) 102 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 103 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 104 | 105 | # Convert anchors into proposals via bbox transformations 106 | proposals = bbox_transform_inv(anchors, bbox_deltas) 107 | 108 | # 2. clip predicted boxes to image 109 | proposals = clip_boxes(proposals, im_info[:2]) 110 | 111 | # 3. remove predicted boxes with either height or width < threshold 112 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 113 | keep = _filter_boxes(proposals, min_size * im_info[2]) 114 | proposals = proposals[keep, :] 115 | scores = scores[keep] 116 | 117 | # 4. sort all (proposal, score) pairs by score from highest to lowest 118 | # 5. take top pre_nms_topN (e.g. 6000) 119 | order = scores.ravel().argsort()[::-1] 120 | if pre_nms_topN > 0: 121 | order = order[:pre_nms_topN] 122 | proposals = proposals[order, :] 123 | scores = scores[order] 124 | 125 | # 6. apply nms (e.g. threshold = 0.7) 126 | # 7. take after_nms_topN (e.g. 300) 127 | # 8. return the top proposals (-> RoIs top) 128 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 129 | if post_nms_topN > 0: 130 | keep = keep[:post_nms_topN] 131 | proposals = proposals[keep, :] 132 | scores = scores[keep] 133 | 134 | # Output rois blob 135 | # Our RPN implementation only supports a single input image, so all 136 | # batch inds are 0 137 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 138 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 139 | 140 | return blob 141 | 142 | # [Optional] output scores blob 143 | 144 | 145 | def backward(top, propagate_down, bottom): 146 | """This layer does not propagate gradients.""" 147 | pass 148 | 149 | def reshape(bottom, top): 150 | """Reshaping happens during the call to forward.""" 151 | pass 152 | 153 | def _filter_boxes(boxes, min_size): 154 | """Remove all boxes with any side smaller than min_size.""" 155 | ws = boxes[:, 2] - boxes[:, 0] + 1 156 | hs = boxes[:, 3] - boxes[:, 1] + 1 157 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 158 | return keep 159 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | #import caffe 9 | import yaml 10 | import numpy as np 11 | import numpy.random as npr 12 | from fast_rcnn.config import cfg 13 | from fast_rcnn.bbox_transform import bbox_transform 14 | from utils.cython_bbox import bbox_overlaps 15 | 16 | DEBUG = False 17 | 18 | """ 19 | Assign object detection proposals to ground-truth targets. Produces proposal 20 | classification labels and bounding-box regression targets. 21 | """ 22 | 23 | _num_classes = 21 24 | 25 | 26 | def forward_proposal_target_op(bottom_0, bottom_1): 27 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 28 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 29 | all_rois = bottom_0 30 | # GT boxes (x1, y1, x2, y2, label) 31 | # TODO(rbg): it's annoying that sometimes I have extra info before 32 | # and other times after box coordinates -- normalize to one format 33 | gt_boxes = bottom_1 34 | 35 | # Include ground-truth boxes in the set of candidate rois 36 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 37 | all_rois = np.vstack( 38 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) 39 | ) 40 | 41 | # Sanity check: single batch only 42 | assert np.all(all_rois[:, 0] == 0), \ 43 | 'Only single item batches are supported' 44 | 45 | num_images = 1 46 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 47 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) 48 | 49 | # Sample rois with classification labels and bounding box regression 50 | # targets 51 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( 52 | all_rois, gt_boxes, fg_rois_per_image, 53 | rois_per_image, _num_classes) 54 | 55 | if DEBUG: 56 | print 'num fg: {}'.format((labels > 0).sum()) 57 | print 'num bg: {}'.format((labels == 0).sum()) 58 | self._count += 1 59 | self._fg_num += (labels > 0).sum() 60 | self._bg_num += (labels == 0).sum() 61 | print 'num fg avg: {}'.format(self._fg_num / self._count) 62 | print 'num bg avg: {}'.format(self._bg_num / self._count) 63 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)) 64 | 65 | bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) 66 | 67 | return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights 68 | 69 | 70 | def backward(top, propagate_down, bottom): 71 | """This layer does not propagate gradients.""" 72 | pass 73 | 74 | def reshape(bottom, top): 75 | """Reshaping happens during the call to forward.""" 76 | pass 77 | 78 | 79 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 80 | """Bounding-box regression targets (bbox_target_data) are stored in a 81 | compact form N x (class, tx, ty, tw, th) 82 | 83 | This function expands those targets into the 4-of-4*K representation used 84 | by the network (i.e. only one class has non-zero targets). 85 | 86 | Returns: 87 | bbox_target (ndarray): N x 4K blob of regression targets 88 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 89 | """ 90 | 91 | clss = bbox_target_data[:, 0] 92 | bbox_targets = np.zeros((clss.size, 4 * 2), dtype=np.float32) 93 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 94 | inds = np.where(clss > 0)[0] 95 | train_agnostic = True 96 | 97 | if train_agnostic: 98 | for ind in inds: 99 | cls = clss[ind] 100 | start = 4 * (1 if cls > 0 else 0) 101 | end = start + 4 102 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 103 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 104 | else: 105 | for ind in inds: 106 | cls = clss[ind] 107 | start = 4 * cls 108 | end = start + 4 109 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 110 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 111 | 112 | return bbox_targets, bbox_inside_weights 113 | 114 | 115 | def _compute_targets(ex_rois, gt_rois, labels): 116 | """Compute bounding-box regression targets for an image.""" 117 | 118 | assert ex_rois.shape[0] == gt_rois.shape[0] 119 | assert ex_rois.shape[1] == 4 120 | assert gt_rois.shape[1] == 4 121 | 122 | targets = bbox_transform(ex_rois, gt_rois) 123 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 124 | # Optionally normalize targets by a precomputed mean and stdev 125 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 126 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 127 | return np.hstack( 128 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) 129 | 130 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): 131 | """Generate a random sample of RoIs comprising foreground and background 132 | examples. 133 | """ 134 | # overlaps: (rois x gt_boxes) 135 | overlaps = bbox_overlaps( 136 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), 137 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 138 | gt_assignment = overlaps.argmax(axis=1) 139 | max_overlaps = overlaps.max(axis=1) 140 | labels = gt_boxes[gt_assignment, 4] 141 | 142 | # Select foreground RoIs as those with >= FG_THRESH overlap 143 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] 144 | # Guard against the case when an image has fewer than fg_rois_per_image 145 | # foreground RoIs 146 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) 147 | # Sample foreground regions without replacement 148 | if fg_inds.size > 0: 149 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 150 | 151 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 152 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & 153 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 154 | # Compute number of background RoIs to take from this image (guarding 155 | # against there being fewer than desired) 156 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 157 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 158 | # Sample background regions without replacement 159 | if bg_inds.size > 0: 160 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 161 | 162 | # The indices that we're selecting (both fg and bg) 163 | keep_inds = np.append(fg_inds, bg_inds) 164 | # Select sampled values from various arrays: 165 | labels = labels[keep_inds] 166 | # Clamp labels for the background RoIs to 0 167 | labels[fg_rois_per_this_image:] = 0 168 | rois = all_rois[keep_inds] 169 | 170 | bbox_target_data = _compute_targets( 171 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) 172 | 173 | bbox_targets, bbox_inside_weights = \ 174 | _get_bbox_regression_labels(bbox_target_data, num_classes) 175 | 176 | return labels, rois, bbox_targets, bbox_inside_weights 177 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/proposal_target_layer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/proposal_target_layer.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/proposal_target_layer.py~: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import caffe 9 | import yaml 10 | import numpy as np 11 | import numpy.random as npr 12 | from fast_rcnn.config import cfg 13 | from fast_rcnn.bbox_transform import bbox_transform 14 | from utils.cython_bbox import bbox_overlaps 15 | 16 | DEBUG = False 17 | 18 | """ 19 | Assign object detection proposals to ground-truth targets. Produces proposal 20 | classification labels and bounding-box regression targets. 21 | """ 22 | 23 | _num_classes = 21 24 | 25 | 26 | def forward_proposal_target_op(bottom_0, bottom_1): 27 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 28 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 29 | all_rois = bottom_0 30 | # GT boxes (x1, y1, x2, y2, label) 31 | # TODO(rbg): it's annoying that sometimes I have extra info before 32 | # and other times after box coordinates -- normalize to one format 33 | gt_boxes = bottom_1 34 | 35 | # Include ground-truth boxes in the set of candidate rois 36 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 37 | all_rois = np.vstack( 38 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) 39 | ) 40 | 41 | # Sanity check: single batch only 42 | assert np.all(all_rois[:, 0] == 0), \ 43 | 'Only single item batches are supported' 44 | 45 | num_images = 1 46 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 47 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) 48 | 49 | # Sample rois with classification labels and bounding box regression 50 | # targets 51 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( 52 | all_rois, gt_boxes, fg_rois_per_image, 53 | rois_per_image, _num_classes) 54 | 55 | if DEBUG: 56 | print 'num fg: {}'.format((labels > 0).sum()) 57 | print 'num bg: {}'.format((labels == 0).sum()) 58 | self._count += 1 59 | self._fg_num += (labels > 0).sum() 60 | self._bg_num += (labels == 0).sum() 61 | print 'num fg avg: {}'.format(self._fg_num / self._count) 62 | print 'num bg avg: {}'.format(self._bg_num / self._count) 63 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)) 64 | 65 | bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) 66 | 67 | return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights 68 | 69 | 70 | def backward(top, propagate_down, bottom): 71 | """This layer does not propagate gradients.""" 72 | pass 73 | 74 | def reshape(bottom, top): 75 | """Reshaping happens during the call to forward.""" 76 | pass 77 | 78 | 79 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 80 | """Bounding-box regression targets (bbox_target_data) are stored in a 81 | compact form N x (class, tx, ty, tw, th) 82 | 83 | This function expands those targets into the 4-of-4*K representation used 84 | by the network (i.e. only one class has non-zero targets). 85 | 86 | Returns: 87 | bbox_target (ndarray): N x 4K blob of regression targets 88 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 89 | """ 90 | 91 | clss = bbox_target_data[:, 0] 92 | bbox_targets = np.zeros((clss.size, 4 * 2), dtype=np.float32) 93 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 94 | inds = np.where(clss > 0)[0] 95 | train_agnostic = True 96 | 97 | if train_agnostic: 98 | for ind in inds: 99 | cls = clss[ind] 100 | start = 4 * (1 if cls > 0 else 0) 101 | end = start + 4 102 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 103 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 104 | else: 105 | for ind in inds: 106 | cls = clss[ind] 107 | start = 4 * cls 108 | end = start + 4 109 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 110 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 111 | 112 | return bbox_targets, bbox_inside_weights 113 | 114 | 115 | def _compute_targets(ex_rois, gt_rois, labels): 116 | """Compute bounding-box regression targets for an image.""" 117 | 118 | assert ex_rois.shape[0] == gt_rois.shape[0] 119 | assert ex_rois.shape[1] == 4 120 | assert gt_rois.shape[1] == 4 121 | 122 | targets = bbox_transform(ex_rois, gt_rois) 123 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 124 | # Optionally normalize targets by a precomputed mean and stdev 125 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 126 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 127 | return np.hstack( 128 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) 129 | 130 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): 131 | """Generate a random sample of RoIs comprising foreground and background 132 | examples. 133 | """ 134 | # overlaps: (rois x gt_boxes) 135 | overlaps = bbox_overlaps( 136 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), 137 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 138 | gt_assignment = overlaps.argmax(axis=1) 139 | max_overlaps = overlaps.max(axis=1) 140 | labels = gt_boxes[gt_assignment, 4] 141 | 142 | # Select foreground RoIs as those with >= FG_THRESH overlap 143 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] 144 | # Guard against the case when an image has fewer than fg_rois_per_image 145 | # foreground RoIs 146 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) 147 | # Sample foreground regions without replacement 148 | if fg_inds.size > 0: 149 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 150 | 151 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 152 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & 153 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 154 | # Compute number of background RoIs to take from this image (guarding 155 | # against there being fewer than desired) 156 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 157 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 158 | # Sample background regions without replacement 159 | if bg_inds.size > 0: 160 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 161 | 162 | # The indices that we're selecting (both fg and bg) 163 | keep_inds = np.append(fg_inds, bg_inds) 164 | # Select sampled values from various arrays: 165 | labels = labels[keep_inds] 166 | # Clamp labels for the background RoIs to 0 167 | labels[fg_rois_per_this_image:] = 0 168 | rois = all_rois[keep_inds] 169 | 170 | bbox_target_data = _compute_targets( 171 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) 172 | 173 | bbox_targets, bbox_inside_weights = \ 174 | _get_bbox_regression_labels(bbox_target_data, num_classes) 175 | 176 | return labels, rois, bbox_targets, bbox_inside_weights 177 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import random 12 | import cv2 13 | from utils.cython_bbox import bbox_overlaps 14 | from mnc_config import cfg 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """ 19 | Convert a list of images into a network input. 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in xrange(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | # Move channels (axis 3) to axis 1 30 | # Axis order will become: (batch elem, channel, height, width) 31 | channel_swap = (0, 3, 1, 2) 32 | blob = blob.transpose(channel_swap) 33 | return blob 34 | 35 | 36 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 37 | """Mean subtract and scale an image for use in a blob.""" 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | im_shape = im.shape 41 | im_size_min = np.min(im_shape[0:2]) 42 | im_size_max = np.max(im_shape[0:2]) 43 | im_scale = float(target_size) / float(im_size_min) 44 | # Prevent the biggest axis from being more than MAX_SIZE 45 | if np.round(im_scale * im_size_max) > max_size: 46 | im_scale = float(max_size) / float(im_size_max) 47 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 48 | interpolation=cv2.INTER_LINEAR) 49 | 50 | return im, im_scale 51 | 52 | 53 | def prep_im_for_blob_cfm(im, input_scales): 54 | """Converts an image into a network input. 55 | Arguments: 56 | im (ndarray): a color image in BGR order 57 | Returns: 58 | blob (ndarray): a data blob holding an image pyramid 59 | im_scale_factors (list): list of image scales (relative to im) used 60 | in the image pyramid 61 | """ 62 | im_orig = im.astype(np.float32, copy=True) 63 | im_orig -= cfg.PIXEL_MEANS 64 | 65 | im_shape = im_orig.shape 66 | im_size_min = np.min(im_shape[0:2]) 67 | im_size_max = np.max(im_shape[0:2]) 68 | 69 | processed_ims = [] 70 | im_scale_factors = [] 71 | 72 | for target_size in input_scales: 73 | im_scale = float(target_size) / float(im_size_min) 74 | # Prevent the biggest axis from being more than MAX_SIZE 75 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 76 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 77 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 78 | interpolation=cv2.INTER_LINEAR) 79 | im_scale_factors.append(im_scale) 80 | processed_ims.append(im) 81 | 82 | # Create a blob to hold the input images 83 | blob = im_list_to_blob(processed_ims) 84 | 85 | return blob, np.array(im_scale_factors) 86 | 87 | 88 | def pred_rois_for_blob(im_rois, im_scales): 89 | """ 90 | Convert rois to network input 91 | support multi-scale testing 92 | """ 93 | im_rois = im_rois.astype(np.float, copy=False) 94 | if len(im_scales) > 1: 95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1 96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1 97 | 98 | areas = widths * heights 99 | scaled_areas = areas[:, np.newaxis] * (im_scales[np.newaxis, :] ** 2) 100 | diff_areas = np.abs(scaled_areas - 224 * 224) 101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis] 102 | else: 103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) 104 | im_rois = im_rois * im_scales[levels] 105 | rois_blob = np.hstack((levels.astype(np.float), im_rois)) 106 | return rois_blob 107 | 108 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/blob.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/cython_bbox.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/cython_bbox.so -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/mnc_config.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/timer.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/unmap.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def unmap(data, count, inds, fill=0): 12 | """ Unmap a subset of item (data) back to the original set of items (of 13 | size count) """ 14 | if len(data.shape) == 1: 15 | ret = np.empty((count, ), dtype=np.float32) 16 | ret.fill(fill) 17 | ret[inds] = data 18 | else: 19 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 20 | ret.fill(fill) 21 | ret[inds, :] = data 22 | return ret 23 | -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/unmap.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/unmap.pyc -------------------------------------------------------------------------------- /tf_rfcn_dynamic/rpn_tools/utils/vis_seg.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Written by Haozhi Qi 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import cPickle 10 | import os 11 | import cv2 12 | import Image 13 | from mnc_config import cfg 14 | 15 | 16 | def vis_seg(img_names, cls_names, output_dir, gt_dir): 17 | """ 18 | This function plot segmentation results to specific directory 19 | Args: 20 | img_names: list 21 | """ 22 | assert os.path.exists(output_dir) 23 | # a list of dictionary 24 | inst_dir = os.path.join(output_dir, 'SegInst') 25 | cls_dir = os.path.join(output_dir, 'SegCls') 26 | res_dir = os.path.join(output_dir, 'SegRes') 27 | if not os.path.isdir(inst_dir): 28 | os.mkdir(inst_dir) 29 | if not os.path.isdir(cls_dir): 30 | os.mkdir(cls_dir) 31 | if not os.path.isdir(res_dir): 32 | os.mkdir(res_dir) 33 | 34 | res_list = _prepare_dict(img_names, cls_names, output_dir) 35 | for img_ind, image_name in enumerate(img_names): 36 | target_inst_file = os.path.join(inst_dir, image_name + '.jpg') 37 | target_cls_file = os.path.join(cls_dir, image_name + '.jpg') 38 | print image_name 39 | gt_image = gt_dir + '/img/' + image_name + '.jpg' 40 | img_data = cv2.imread(gt_image) 41 | img_width = img_data.shape[1] 42 | img_height = img_data.shape[0] 43 | pred_dict = res_list[img_ind] 44 | inst_img, cls_img = _convert_pred_to_image(img_width, img_height, pred_dict) 45 | color_map = _get_voc_color_map() 46 | inst_out_img = np.zeros((img_height, img_width, 3)) 47 | cls_out_img = np.zeros((img_height, img_width, 3)) 48 | for i in xrange(img_height): 49 | for j in xrange(img_width): 50 | inst_out_img[i][j] = color_map[inst_img[i][j]][::-1] 51 | cls_out_img[i][j] = color_map[cls_img[i][j]][::-1] 52 | 53 | cv2.imwrite(target_inst_file, inst_out_img) 54 | cv2.imwrite(target_cls_file, cls_out_img) 55 | background = Image.open(gt_image) 56 | mask = Image.open(target_cls_file) 57 | background = background.convert('RGBA') 58 | mask = mask.convert('RGBA') 59 | superimpose_image = Image.blend(background, mask, 0.8) 60 | name = os.path.join(res_dir, image_name + '.png') 61 | superimpose_image.save(name, 'PNG') 62 | 63 | 64 | def _prepare_dict(img_names, cls_names, cache_dir, vis_thresh=0.5): 65 | """ 66 | Returns: 67 | list, each list is a dictionary contains mask list, box list 68 | """ 69 | res_list = [] 70 | det_file = os.path.join(cache_dir, 'res_boxes.pkl') 71 | with open(det_file, 'rb') as f: 72 | det_pkl = cPickle.load(f) 73 | seg_file = os.path.join(cache_dir, 'res_masks.pkl') 74 | with open(seg_file, 'rb') as f: 75 | seg_pkl = cPickle.load(f) 76 | 77 | for img_ind, image_name in enumerate(img_names): 78 | box_for_img = [] 79 | mask_for_img = [] 80 | cls_for_img = [] 81 | for cls_ind, cls_name in enumerate(cls_names): 82 | if cls_name == '__background__' or len(det_pkl[cls_ind][img_ind]) == 0: 83 | continue 84 | det_for_img = det_pkl[cls_ind][img_ind] 85 | seg_for_img = seg_pkl[cls_ind][img_ind] 86 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0] 87 | for keep in keep_inds: 88 | box_for_img.append(det_for_img[keep]) 89 | # TODO: remove this annoying 0 90 | mask_for_img.append(seg_for_img[keep][0]) 91 | cls_for_img.append(cls_ind) 92 | res_dict = {'image_name': image_name, 93 | 'cls_name': cls_for_img, 94 | 'boxes': box_for_img, 95 | 'masks': mask_for_img} 96 | res_list.append(res_dict) 97 | 98 | return res_list 99 | 100 | 101 | def _convert_pred_to_image(img_width, img_height, pred_dict): 102 | num_inst = len(pred_dict['boxes']) 103 | inst_img = np.zeros((img_height, img_width)) 104 | cls_img = np.zeros((img_height, img_width)) 105 | for i in xrange(num_inst): 106 | box = np.round(pred_dict['boxes'][i]).astype(int) 107 | mask = pred_dict['masks'][i] 108 | cls_num = pred_dict['cls_name'][i] 109 | # clip box into image space 110 | box[0] = min(max(box[0], 0), img_width - 1) 111 | box[1] = min(max(box[1], 0), img_height - 1) 112 | box[2] = min(max(box[2], 0), img_width - 1) 113 | box[3] = min(max(box[3], 0), img_height - 1) 114 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1)) 115 | mask = mask >= cfg.BINARIZE_THRESH 116 | 117 | part1 = (i+1) * mask.astype(np.float32) 118 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1]) 119 | part3 = np.multiply(np.logical_not(mask), cls_img[box[1]:box[3]+1, box[0]:box[2]+1]) 120 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2 121 | cls_img[box[1]:box[3]+1, box[0]:box[2]+1] = cls_num * mask.astype(np.float32) + part3 122 | # Plot bounding boxes simultaneously 123 | cls_img[box[1]:box[3]+1, box[0]-1:box[0]+1] = 150 124 | cls_img[box[1]:box[3]+1, box[2]-1:box[2]+1] = 150 125 | cls_img[box[1]-1:box[1]+1, box[0]:box[2]+1] = 150 126 | cls_img[box[3]-1:box[3]+1, box[0]:box[2]+1] = 150 127 | 128 | inst_img = inst_img.astype(int) 129 | cls_img = cls_img.astype(int) 130 | return inst_img, cls_img 131 | 132 | 133 | def _get_voc_color_map(n=256): 134 | color_map = np.zeros((n, 3)) 135 | for i in xrange(n): 136 | r = b = g = 0 137 | cid = i 138 | for j in xrange(0, 8): 139 | r = np.bitwise_or(r, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-1], 7-j)) 140 | g = np.bitwise_or(g, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-2], 7-j)) 141 | b = np.bitwise_or(b, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-3], 7-j)) 142 | cid = np.right_shift(cid, 3) 143 | 144 | color_map[i][0] = r 145 | color_map[i][1] = g 146 | color_map[i][2] = b 147 | return color_map 148 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/cnn_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/cnn_tools/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_fixed/cnn_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/cnn_tools/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/cnn_tools/tools.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/cnn_tools/tools.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/psroi_pool_tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/psroi_pool_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/psroi_pool_tools/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = '/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/ps_roipool/psroi_pooling.so' 5 | _psroi_pooling_module = tf.load_op_library(filename) 6 | psroi_pool = _psroi_pooling_module.psroi_pool 7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad 8 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import psroi_pooling_op 4 | 5 | @tf.RegisterShape("PSROIPool") 6 | def _psroi_pool_shape(op): 7 | """Shape function for the RoiPool op. 8 | 9 | """ 10 | dims_data = op.inputs[0].get_shape().as_list() 11 | channels = dims_data[1] 12 | 13 | dims_rois = op.inputs[1].get_shape().as_list() 14 | num_rois = dims_rois[0] 15 | 16 | output_dim = op.get_attr('output_dim') 17 | group_size = op.get_attr('group_size') 18 | pooled_height = group_size 19 | pooled_width = group_size 20 | 21 | output_shape = tf.TensorShape([num_rois, output_dim, pooled_height, pooled_width]) 22 | return [output_shape, output_shape] 23 | 24 | @ops.RegisterGradient("PSROIPool") 25 | def _psroi_pool_grad(op, grad, _): 26 | """The gradients for `roi_pool`. 27 | Args: 28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 29 | to find the inputs and outputs of the original op. 30 | grad: Gradient with respect to the output of the `roi_pool` op. 31 | Returns: 32 | Gradients with respect to the input of `zero_out`. 33 | """ 34 | data = op.inputs[0] 35 | rois = op.inputs[1] 36 | mapping_channel = op.outputs[1] 37 | spatial_scale = op.get_attr('spatial_scale') 38 | 39 | # compute gradient 40 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 41 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale) 42 | 43 | return [data_grad, None] # List of one Tensor, since we have one input 44 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op_grad.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op_grad.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/__init__.py~: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from tools import anchor_target_layer 4 | 5 | def weight_variable(shape): 6 | initial = tf.truncated_normal(shape, stddev=0.1) 7 | return tf.Variable(initial) 8 | 9 | def bias_variable(shape): 10 | initial = tf.constant(0.1, shape=shape) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | def max_pool_2x2(x): 17 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 18 | 19 | def process_anno(x): 20 | x = np.fromstring(x, dtype=int, sep=" ") 21 | x = np.reshape(x,(-1,5)) 22 | l = x[:,0] 23 | gt = x[:,1:] 24 | return l, gt 25 | 26 | ''' 27 | num_labels = 2 28 | batch_size = 10 29 | filename_queue = tf.train.string_input_producer(["train_files.csv"], num_epochs=None, shuffle=False) 30 | reader = tf.TextLineReader() 31 | key, value = reader.read(filename_queue) 32 | record_defaults = [[""],[0]] 33 | image_path, label = tf.decode_csv(value, field_delim=",", record_defaults=record_defaults) 34 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3) 35 | my_img = tf.cast(my_img,tf.float32) / 255 36 | my_img = tf.image.resize_images(my_img,224,224) 37 | min_after_dequeue = 5 38 | capacity = min_after_dequeue + 3 * batch_size 39 | im_batch, lb_batch = tf.train.batch([my_img,label],batch_size=batch_size,capacity=capacity) 40 | ''' 41 | 42 | num_labels = 25 43 | batch_size = 1 44 | reader = tf.TextLineReader() 45 | filename_queue = tf.train.string_input_producer(["train_rcnn_files.csv"], num_epochs=None, shuffle=False) 46 | key, value = reader.read(filename_queue) 47 | image_path, anno_path = tf.decode_csv(value, record_defaults=[[""],[""]], field_delim=",") 48 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3) 49 | my_img = tf.cast(my_img,tf.float32) / 255 50 | my_img = tf.image.resize_images(my_img,224,224) 51 | anno = tf.read_file(anno_path) 52 | labels, gt_box = tf.py_func(process_anno,[anno],[tf.int64,tf.int64]) 53 | labels = tf.reshape(tf.concat(1, labels), [-1,1]) 54 | gt_box = tf.reshape(tf.concat(1, gt_box), [-1,4]) 55 | 56 | 57 | ''' 58 | x, y1_ = im_batch, lb_batch 59 | l_b = tf.to_int64(y1_) 60 | l = tf.one_hot(indices=l_b,depth=num_labels,on_value=1.0,off_value=0.0,axis=-1) 61 | l = tf.cast(l,tf.float32) 62 | 63 | W_conv1 = weight_variable([3,3,3,64]) 64 | b_conv1 = bias_variable([64]) 65 | h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1) 66 | 67 | W_conv2 = weight_variable([3,3,64,64]) 68 | b_conv2 = bias_variable([64]) 69 | h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2) 70 | 71 | h_max1 = max_pool_2x2(h_conv2) 72 | 73 | W_conv3 = weight_variable([3,3,64,128]) 74 | b_conv3 = bias_variable([128]) 75 | h_conv3 = tf.nn.relu(conv2d(h_max1, W_conv3) + b_conv3) 76 | 77 | W_conv4 = weight_variable([3,3,128,128]) 78 | b_conv4 = bias_variable([128]) 79 | h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4) 80 | 81 | h_max2 = max_pool_2x2(h_conv4) 82 | 83 | W_conv5 = weight_variable([3,3,128,256]) 84 | b_conv5 = bias_variable([256]) 85 | h_conv5 = tf.nn.relu(conv2d(h_max2, W_conv5) + b_conv5) 86 | 87 | W_conv6 = weight_variable([3,3,256,256]) 88 | b_conv6 = bias_variable([256]) 89 | h_conv6 = tf.nn.relu(conv2d(h_conv5, W_conv6) + b_conv6) 90 | 91 | W_conv7 = weight_variable([3,3,256,256]) 92 | b_conv7 = bias_variable([256]) 93 | h_conv7 = tf.nn.relu(conv2d(h_conv6, W_conv7) + b_conv7) 94 | 95 | h_max3 = max_pool_2x2(h_conv7) 96 | 97 | W_conv7 = weight_variable([3,3,256,512]) 98 | b_conv7 = bias_variable([512]) 99 | h_conv7 = tf.nn.relu(conv2d(h_max3, W_conv7) + b_conv7) 100 | 101 | W_conv8 = weight_variable([3,3,512,512]) 102 | b_conv8 = bias_variable([512]) 103 | h_conv8 = tf.nn.relu(conv2d(h_conv7, W_conv8) + b_conv8) 104 | 105 | W_conv9 = weight_variable([3,3,512,512]) 106 | b_conv9 = bias_variable([512]) 107 | h_conv9 = tf.nn.relu(conv2d(h_conv8, W_conv9) + b_conv9) 108 | 109 | h_max4 = max_pool_2x2(h_conv9) 110 | 111 | W_conv10 = weight_variable([3,3,512,512]) 112 | b_conv10 = bias_variable([512]) 113 | h_conv10 = tf.nn.relu(conv2d(h_max4, W_conv10) + b_conv10) 114 | 115 | W_conv11 = weight_variable([3,3,512,512]) 116 | b_conv11 = bias_variable([512]) 117 | h_conv11 = tf.nn.relu(conv2d(h_conv10, W_conv11) + b_conv11) 118 | 119 | W_conv12 = weight_variable([3,3,512,512]) 120 | b_conv12 = bias_variable([512]) 121 | h_conv12 = tf.nn.relu(conv2d(h_conv11, W_conv12) + b_conv12) 122 | 123 | #RPN 124 | 125 | W_rpn3 = weight_variable([3,3,512,512]) 126 | b_rpn3 = bias_variable([512]) 127 | h_rpn3 = tf.nn.relu(conv2d(h_conv12, W_rpn3) + b_rpn3) 128 | 129 | W_cls_score = weight_variable([1,1,512,18]) 130 | b_cls_score = bias_variable([18]) 131 | h_cls_score = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score) 132 | 133 | W_bbox_pred = weight_variable([1,1,512,36]) 134 | b_bbox_pred = bias_variable([36]) 135 | h_bbox_pred = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score) 136 | 137 | h_cls_score_reshape = tf.reshape(h_cls_score, [2,-1]) 138 | 139 | 140 | 141 | #print h_cls_score 142 | 143 | h_fc1 = tf.reshape(h_cls_score_reshape, [-1, 14*14*18]) 144 | W_fc1 = weight_variable([14*14*18,2]) 145 | b_fc1 = bias_variable([2]) 146 | y_conv = tf.matmul(h_fc1, W_fc1) + b_fc1 147 | 148 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y1_) 149 | loss = tf.reduce_mean(cross_entropy) 150 | train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) 151 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(l,1)) 152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 153 | 154 | 155 | ''' 156 | init = tf.initialize_all_variables() 157 | with tf.Session() as sess: 158 | sess.run(init) 159 | coord = tf.train.Coordinator() 160 | threads = tf.train.start_queue_runners(sess=sess,coord=coord) 161 | 162 | for i in range(10000000): 163 | print labels.eval() 164 | #sess.run(train_step) 165 | #if i%10 == 0: 166 | #print "Iteration " + str(i) 167 | #print "Loss: " + str(loss.eval()) 168 | #print "Accuracy: " + str(accuracy.eval()) 169 | #print "" 170 | 171 | coord.request_stop() 172 | coord.join(threads) 173 | sess.close() 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/anchor_target_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/anchor_target_layer_modified.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/bbox_transform.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/config.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/cpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_mv.hpp: -------------------------------------------------------------------------------- 1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num, 2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num, 3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num, 4 | float* finalize_output_mask, int* finalize_output_box, const int device_id); 5 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_mv.pyx: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | assert sizeof(int) == sizeof(np.int32_t) 6 | 7 | cdef extern from "gpu_mv.hpp": 8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id); 9 | 10 | # boxes: n * 4 11 | # masks: n * 1 * 21 * 21 12 | # scores: n * 21 13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes, 14 | np.ndarray[np.float32_t, ndim=4] all_masks, 15 | np.ndarray[np.int32_t, ndim=1] candidate_inds, 16 | np.ndarray[np.int32_t, ndim=1] candidate_start, 17 | np.ndarray[np.float32_t, ndim=1] candidate_weights, 18 | np.int32_t image_height, 19 | np.int32_t image_width, 20 | np.int32_t device_id = 0): 21 | cdef int all_box_num = all_boxes.shape[0] 22 | cdef int boxes_dim = all_boxes.shape[1] 23 | cdef int mask_size = all_masks.shape[3] 24 | cdef int candidate_num = candidate_inds.shape[0] 25 | cdef int result_num = candidate_start.shape[0] 26 | cdef np.ndarray[np.float32_t, ndim=4] \ 27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32) 28 | cdef np.ndarray[np.int32_t, ndim=2] \ 29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32) 30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id) 31 | return result_mask, result_box 32 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mnc_config.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mv.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mv.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // Multitask Network Cascade 3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 4 | // Copyright (c) 2016, Haozhi Qi 5 | // Licensed under The MIT License [see LICENSE for details] 6 | // -------------------------------------------------------- 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | from mnc_config import cfg 9 | from gpu_nms import gpu_nms 10 | from cpu_nms import cpu_nms 11 | 12 | 13 | def nms(dets, thresh): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | 24 | def apply_nms(all_boxes, thresh): 25 | """Apply non-maximum suppression to all predicted boxes output by the 26 | test_net method. 27 | """ 28 | num_classes = len(all_boxes) 29 | num_images = len(all_boxes[0]) 30 | nms_boxes = [[[] for _ in xrange(num_images)] 31 | for _ in xrange(num_classes)] 32 | for cls_ind in xrange(num_classes): 33 | for im_ind in xrange(num_images): 34 | dets = all_boxes[cls_ind][im_ind] 35 | if dets == []: 36 | continue 37 | keep = nms(dets, thresh) 38 | if len(keep) == 0: 39 | continue 40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 41 | return nms_boxes 42 | 43 | 44 | def apply_nms_mask(all_boxes, all_masks, thresh): 45 | num_classes = len(all_boxes) 46 | num_images = len(all_boxes[0]) 47 | nms_boxes = [[[] for _ in xrange(num_images)] 48 | for _ in xrange(num_classes)] 49 | nms_masks = [[[] for _ in xrange(num_images)] 50 | for _ in xrange(num_classes)] 51 | for cls_ind in xrange(num_classes): 52 | for im_ind in xrange(num_images): 53 | dets = all_boxes[cls_ind][im_ind] 54 | masks = all_masks[cls_ind][im_ind] 55 | if dets == []: 56 | continue 57 | keep = nms(dets, thresh) 58 | if len(keep) == 0: 59 | continue 60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy() 62 | return nms_boxes, nms_masks 63 | 64 | 65 | def apply_nms_mask_single(box, mask, thresh): 66 | if box == []: 67 | return box, mask 68 | keep = nms(box, thresh) 69 | if len(keep) == 0: 70 | return box, mask 71 | return box[keep, :].copy(), mask[keep, :].copy() 72 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | from config import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh, force_cpu=False): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS and not force_cpu: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.py~: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import cfg 9 | from nms.gpu_nms import gpu_nms 10 | from nms.cpu_nms import cpu_nms 11 | 12 | def nms(dets, thresh, force_cpu=False): 13 | """Dispatch to either CPU or GPU NMS implementations.""" 14 | 15 | if dets.shape[0] == 0: 16 | return [] 17 | if cfg.USE_GPU_NMS and not force_cpu: 18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | else: 20 | return cpu_nms(dets, thresh) 21 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/test.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Train a Fast R-CNN network.""" 9 | 10 | import caffe 11 | from fast_rcnn.config import cfg 12 | import roi_data_layer.roidb as rdl_roidb 13 | from utils.timer import Timer 14 | import numpy as np 15 | import os 16 | 17 | from caffe.proto import caffe_pb2 18 | import google.protobuf as pb2 19 | 20 | class SolverWrapper(object): 21 | """A simple wrapper around Caffe's solver. 22 | This wrapper gives us control over he snapshotting process, which we 23 | use to unnormalize the learned bounding-box regression weights. 24 | """ 25 | 26 | def __init__(self, solver_prototxt, roidb, output_dir, 27 | pretrained_model=None): 28 | """Initialize the SolverWrapper.""" 29 | self.output_dir = output_dir 30 | 31 | if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and 32 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS): 33 | # RPN can only use precomputed normalization because there are no 34 | # fixed statistics to compute a priori 35 | assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED 36 | 37 | if cfg.TRAIN.BBOX_REG: 38 | print 'Computing bounding-box regression targets...' 39 | self.bbox_means, self.bbox_stds = \ 40 | rdl_roidb.add_bbox_regression_targets(roidb) 41 | print 'done' 42 | 43 | self.solver = caffe.SGDSolver(solver_prototxt) 44 | if pretrained_model is not None: 45 | print ('Loading pretrained model ' 46 | 'weights from {:s}').format(pretrained_model) 47 | self.solver.net.copy_from(pretrained_model) 48 | 49 | self.solver_param = caffe_pb2.SolverParameter() 50 | with open(solver_prototxt, 'rt') as f: 51 | pb2.text_format.Merge(f.read(), self.solver_param) 52 | 53 | self.solver.net.layers[0].set_roidb(roidb) 54 | 55 | def snapshot(self): 56 | """Take a snapshot of the network after unnormalizing the learned 57 | bounding-box regression weights. This enables easy use at test-time. 58 | """ 59 | net = self.solver.net 60 | 61 | scale_bbox_params = (cfg.TRAIN.BBOX_REG and 62 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS and 63 | net.params.has_key('bbox_pred')) 64 | 65 | if scale_bbox_params: 66 | # save original values 67 | orig_0 = net.params['bbox_pred'][0].data.copy() 68 | orig_1 = net.params['bbox_pred'][1].data.copy() 69 | 70 | # scale and shift with bbox reg unnormalization; then save snapshot 71 | net.params['bbox_pred'][0].data[...] = \ 72 | (net.params['bbox_pred'][0].data * 73 | self.bbox_stds[:, np.newaxis]) 74 | net.params['bbox_pred'][1].data[...] = \ 75 | (net.params['bbox_pred'][1].data * 76 | self.bbox_stds + self.bbox_means) 77 | 78 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX 79 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') 80 | filename = (self.solver_param.snapshot_prefix + infix + 81 | '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') 82 | filename = os.path.join(self.output_dir, filename) 83 | 84 | net.save(str(filename)) 85 | print 'Wrote snapshot to: {:s}'.format(filename) 86 | 87 | if scale_bbox_params: 88 | # restore net to original state 89 | net.params['bbox_pred'][0].data[...] = orig_0 90 | net.params['bbox_pred'][1].data[...] = orig_1 91 | return filename 92 | 93 | def train_model(self, max_iters): 94 | """Network training loop.""" 95 | last_snapshot_iter = -1 96 | timer = Timer() 97 | model_paths = [] 98 | while self.solver.iter < max_iters: 99 | # Make one SGD update 100 | timer.tic() 101 | self.solver.step(1) 102 | timer.toc() 103 | if self.solver.iter % (10 * self.solver_param.display) == 0: 104 | print 'speed: {:.3f}s / iter'.format(timer.average_time) 105 | 106 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: 107 | last_snapshot_iter = self.solver.iter 108 | model_paths.append(self.snapshot()) 109 | 110 | if last_snapshot_iter != self.solver.iter: 111 | model_paths.append(self.snapshot()) 112 | return model_paths 113 | 114 | def get_training_roidb(imdb): 115 | """Returns a roidb (Region of Interest database) for use in training.""" 116 | if cfg.TRAIN.USE_FLIPPED: 117 | print 'Appending horizontally-flipped training examples...' 118 | imdb.append_flipped_images() 119 | print 'done' 120 | 121 | print 'Preparing training data...' 122 | rdl_roidb.prepare_roidb(imdb) 123 | print 'done' 124 | 125 | return imdb.roidb 126 | 127 | def filter_roidb(roidb): 128 | """Remove roidb entries that have no usable RoIs.""" 129 | 130 | def is_valid(entry): 131 | # Valid images have: 132 | # (1) At least one foreground RoI OR 133 | # (2) At least one background RoI 134 | overlaps = entry['max_overlaps'] 135 | # find boxes with sufficient overlap 136 | fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] 137 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 138 | bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & 139 | (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 140 | # image is only valid if such boxes exist 141 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 142 | return valid 143 | 144 | num = len(roidb) 145 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 146 | num_after = len(filtered_roidb) 147 | print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, 148 | num, num_after) 149 | return filtered_roidb 150 | 151 | def train_net(solver_prototxt, roidb, output_dir, 152 | pretrained_model=None, max_iters=40000): 153 | """Train a Fast R-CNN network.""" 154 | 155 | roidb = filter_roidb(roidb) 156 | sw = SolverWrapper(solver_prototxt, roidb, output_dir, 157 | pretrained_model=pretrained_model) 158 | 159 | print 'Solving...' 160 | model_paths = sw.train_model(max_iters) 161 | print 'done solving' 162 | return model_paths 163 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/fast_rcnn/train.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/train.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 11 | # 12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 13 | # >> anchors 14 | # 15 | # anchors = 16 | # 17 | # -83 -39 100 56 18 | # -175 -87 192 104 19 | # -359 -183 376 200 20 | # -55 -55 72 72 21 | # -119 -119 136 136 22 | # -247 -247 264 264 23 | # -35 -79 52 96 24 | # -79 -167 96 184 25 | # -167 -343 184 360 26 | 27 | #array([[ -83., -39., 100., 56.], 28 | # [-175., -87., 192., 104.], 29 | # [-359., -183., 376., 200.], 30 | # [ -55., -55., 72., 72.], 31 | # [-119., -119., 136., 136.], 32 | # [-247., -247., 264., 264.], 33 | # [ -35., -79., 52., 96.], 34 | # [ -79., -167., 96., 184.], 35 | # [-167., -343., 184., 360.]]) 36 | 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 38 | scales=2**np.arange(3, 6)): 39 | """ 40 | Generate anchor (reference) windows by enumerating aspect ratios X 41 | scales wrt a reference (0, 0, 15, 15) window. 42 | """ 43 | 44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 45 | ratio_anchors = _ratio_enum(base_anchor, ratios) 46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 47 | for i in xrange(ratio_anchors.shape[0])]) 48 | return anchors 49 | 50 | def _whctrs(anchor): 51 | """ 52 | Return width, height, x center, and y center for an anchor (window). 53 | """ 54 | 55 | w = anchor[2] - anchor[0] + 1 56 | h = anchor[3] - anchor[1] + 1 57 | x_ctr = anchor[0] + 0.5 * (w - 1) 58 | y_ctr = anchor[1] + 0.5 * (h - 1) 59 | return w, h, x_ctr, y_ctr 60 | 61 | def _mkanchors(ws, hs, x_ctr, y_ctr): 62 | """ 63 | Given a vector of widths (ws) and heights (hs) around a center 64 | (x_ctr, y_ctr), output a set of anchors (windows). 65 | """ 66 | 67 | ws = ws[:, np.newaxis] 68 | hs = hs[:, np.newaxis] 69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 70 | y_ctr - 0.5 * (hs - 1), 71 | x_ctr + 0.5 * (ws - 1), 72 | y_ctr + 0.5 * (hs - 1))) 73 | return anchors 74 | 75 | def _ratio_enum(anchor, ratios): 76 | """ 77 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 78 | """ 79 | 80 | w, h, x_ctr, y_ctr = _whctrs(anchor) 81 | size = w * h 82 | size_ratios = size / ratios 83 | ws = np.round(np.sqrt(size_ratios)) 84 | hs = np.round(ws * ratios) 85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 86 | return anchors 87 | 88 | def _scale_enum(anchor, scales): 89 | """ 90 | Enumerate a set of anchors for each scale wrt an anchor. 91 | """ 92 | 93 | w, h, x_ctr, y_ctr = _whctrs(anchor) 94 | ws = w * scales 95 | hs = h * scales 96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 97 | return anchors 98 | 99 | if __name__ == '__main__': 100 | import time 101 | t = time.time() 102 | a = generate_anchors() 103 | print time.time() - t 104 | print a 105 | from IPython import embed; embed() 106 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/generate_anchors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/generate_anchors.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/my_anchor_target_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/my_anchor_target_layer_modified.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/cpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/gpu_mv.hpp: -------------------------------------------------------------------------------- 1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num, 2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num, 3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num, 4 | float* finalize_output_mask, int* finalize_output_box, const int device_id); 5 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/gpu_mv.pyx: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | assert sizeof(int) == sizeof(np.int32_t) 6 | 7 | cdef extern from "gpu_mv.hpp": 8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id); 9 | 10 | # boxes: n * 4 11 | # masks: n * 1 * 21 * 21 12 | # scores: n * 21 13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes, 14 | np.ndarray[np.float32_t, ndim=4] all_masks, 15 | np.ndarray[np.int32_t, ndim=1] candidate_inds, 16 | np.ndarray[np.int32_t, ndim=1] candidate_start, 17 | np.ndarray[np.float32_t, ndim=1] candidate_weights, 18 | np.int32_t image_height, 19 | np.int32_t image_width, 20 | np.int32_t device_id = 0): 21 | cdef int all_box_num = all_boxes.shape[0] 22 | cdef int boxes_dim = all_boxes.shape[1] 23 | cdef int mask_size = all_masks.shape[3] 24 | cdef int candidate_num = candidate_inds.shape[0] 25 | cdef int result_num = candidate_start.shape[0] 26 | cdef np.ndarray[np.float32_t, ndim=4] \ 27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32) 28 | cdef np.ndarray[np.int32_t, ndim=2] \ 29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32) 30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id) 31 | return result_mask, result_box 32 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/gpu_nms.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/mnc_config.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/mv.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/mv.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------- 2 | // Multitask Network Cascade 3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 4 | // Copyright (c) 2016, Haozhi Qi 5 | // Licensed under The MIT License [see LICENSE for details] 6 | // -------------------------------------------------------- 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | from mnc_config import cfg 9 | from gpu_nms import gpu_nms 10 | from cpu_nms import cpu_nms 11 | 12 | 13 | def nms(dets, thresh): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | if cfg.USE_GPU_NMS: 19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 20 | else: 21 | return cpu_nms(dets, thresh) 22 | 23 | 24 | def apply_nms(all_boxes, thresh): 25 | """Apply non-maximum suppression to all predicted boxes output by the 26 | test_net method. 27 | """ 28 | num_classes = len(all_boxes) 29 | num_images = len(all_boxes[0]) 30 | nms_boxes = [[[] for _ in xrange(num_images)] 31 | for _ in xrange(num_classes)] 32 | for cls_ind in xrange(num_classes): 33 | for im_ind in xrange(num_images): 34 | dets = all_boxes[cls_ind][im_ind] 35 | if dets == []: 36 | continue 37 | keep = nms(dets, thresh) 38 | if len(keep) == 0: 39 | continue 40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 41 | return nms_boxes 42 | 43 | 44 | def apply_nms_mask(all_boxes, all_masks, thresh): 45 | num_classes = len(all_boxes) 46 | num_images = len(all_boxes[0]) 47 | nms_boxes = [[[] for _ in xrange(num_images)] 48 | for _ in xrange(num_classes)] 49 | nms_masks = [[[] for _ in xrange(num_images)] 50 | for _ in xrange(num_classes)] 51 | for cls_ind in xrange(num_classes): 52 | for im_ind in xrange(num_images): 53 | dets = all_boxes[cls_ind][im_ind] 54 | masks = all_masks[cls_ind][im_ind] 55 | if dets == []: 56 | continue 57 | keep = nms(dets, thresh) 58 | if len(keep) == 0: 59 | continue 60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy() 62 | return nms_boxes, nms_masks 63 | 64 | 65 | def apply_nms_mask_single(box, mask, thresh): 66 | if box == []: 67 | return box, mask 68 | keep = nms(box, thresh) 69 | if len(keep) == 0: 70 | return box, mask 71 | return box[keep, :].copy(), mask[keep, :].copy() 72 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/nms_wrapper.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/proposal_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/proposal_layer_modified.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/proposal_target_layer_modified.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/proposal_target_layer_modified.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/roi_pooling_op_grad.py~: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | 4 | @tf.RegisterShape("RoiPool") 5 | def _roi_pool_shape(op): 6 | """Shape function for the RoiPool op. 7 | 8 | """ 9 | dims_data = op.inputs[0].get_shape().as_list() 10 | channels = dims_data[3] 11 | 12 | dims_rois = op.inputs[1].get_shape().as_list() 13 | num_rois = dims_rois[0] 14 | 15 | pooled_height = op.get_attr('pooled_height') 16 | pooled_width = op.get_attr('pooled_width') 17 | 18 | output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, channels]) 19 | return [output_shape, output_shape] 20 | 21 | @ops.RegisterGradient("RoiPool") 22 | def _roi_pool_grad(op, grad, _): 23 | """The gradients for `roi_pool`. 24 | Args: 25 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 26 | to find the inputs and outputs of the original op. 27 | grad: Gradient with respect to the output of the `roi_pool` op. 28 | Returns: 29 | Gradients with respect to the input of `zero_out`. 30 | """ 31 | data = op.inputs[0] 32 | rois = op.inputs[1] 33 | argmax = op.outputs[1] 34 | pooled_height = op.get_attr('pooled_height') 35 | pooled_width = op.get_attr('pooled_width') 36 | spatial_scale = op.get_attr('spatial_scale') 37 | 38 | # load module 39 | module = tf.load_op_library('/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/roi_pooling.so') 40 | 41 | # compute gradient 42 | data_grad = module.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 43 | 44 | return [data_grad, None] # List of one Tensor, since we have one input 45 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/__init__.py -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import random 12 | import cv2 13 | from utils.cython_bbox import bbox_overlaps 14 | from mnc_config import cfg 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """ 19 | Convert a list of images into a network input. 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in xrange(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | # Move channels (axis 3) to axis 1 30 | # Axis order will become: (batch elem, channel, height, width) 31 | channel_swap = (0, 3, 1, 2) 32 | blob = blob.transpose(channel_swap) 33 | return blob 34 | 35 | 36 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 37 | """Mean subtract and scale an image for use in a blob.""" 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | im_shape = im.shape 41 | im_size_min = np.min(im_shape[0:2]) 42 | im_size_max = np.max(im_shape[0:2]) 43 | im_scale = float(target_size) / float(im_size_min) 44 | # Prevent the biggest axis from being more than MAX_SIZE 45 | if np.round(im_scale * im_size_max) > max_size: 46 | im_scale = float(max_size) / float(im_size_max) 47 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 48 | interpolation=cv2.INTER_LINEAR) 49 | 50 | return im, im_scale 51 | 52 | 53 | def prep_im_for_blob_cfm(im, input_scales): 54 | """Converts an image into a network input. 55 | Arguments: 56 | im (ndarray): a color image in BGR order 57 | Returns: 58 | blob (ndarray): a data blob holding an image pyramid 59 | im_scale_factors (list): list of image scales (relative to im) used 60 | in the image pyramid 61 | """ 62 | im_orig = im.astype(np.float32, copy=True) 63 | im_orig -= cfg.PIXEL_MEANS 64 | 65 | im_shape = im_orig.shape 66 | im_size_min = np.min(im_shape[0:2]) 67 | im_size_max = np.max(im_shape[0:2]) 68 | 69 | processed_ims = [] 70 | im_scale_factors = [] 71 | 72 | for target_size in input_scales: 73 | im_scale = float(target_size) / float(im_size_min) 74 | # Prevent the biggest axis from being more than MAX_SIZE 75 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: 76 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) 77 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, 78 | interpolation=cv2.INTER_LINEAR) 79 | im_scale_factors.append(im_scale) 80 | processed_ims.append(im) 81 | 82 | # Create a blob to hold the input images 83 | blob = im_list_to_blob(processed_ims) 84 | 85 | return blob, np.array(im_scale_factors) 86 | 87 | 88 | def pred_rois_for_blob(im_rois, im_scales): 89 | """ 90 | Convert rois to network input 91 | support multi-scale testing 92 | """ 93 | im_rois = im_rois.astype(np.float, copy=False) 94 | if len(im_scales) > 1: 95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1 96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1 97 | 98 | areas = widths * heights 99 | scaled_areas = areas[:, np.newaxis] * (im_scales[np.newaxis, :] ** 2) 100 | diff_areas = np.abs(scaled_areas - 224 * 224) 101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis] 102 | else: 103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) 104 | im_rois = im_rois * im_scales[levels] 105 | rois_blob = np.hstack((levels.astype(np.float), im_rois)) 106 | return rois_blob 107 | 108 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/blob.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/cython_bbox.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/cython_bbox.so -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/mnc_config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/mnc_config.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/timer.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/unmap.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def unmap(data, count, inds, fill=0): 12 | """ Unmap a subset of item (data) back to the original set of items (of 13 | size count) """ 14 | if len(data.shape) == 1: 15 | ret = np.empty((count, ), dtype=np.float32) 16 | ret.fill(fill) 17 | ret[inds] = data 18 | else: 19 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 20 | ret.fill(fill) 21 | ret[inds, :] = data 22 | return ret 23 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/unmap.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/unmap.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/utils/__init__.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | def im_list_to_blob(ims): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 25 | # Move channels (axis 3) to axis 1 26 | # Axis order will become: (batch elem, channel, height, width) 27 | channel_swap = (0, 3, 1, 2) 28 | blob = blob.transpose(channel_swap) 29 | return blob 30 | 31 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 32 | """Mean subtract and scale an image for use in a blob.""" 33 | im = im.astype(np.float32, copy=False) 34 | im -= pixel_means 35 | im_shape = im.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | im_scale = float(target_size) / float(im_size_min) 39 | # Prevent the biggest axis from being more than MAX_SIZE 40 | if np.round(im_scale * im_size_max) > max_size: 41 | im_scale = float(max_size) / float(im_size_max) 42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 43 | interpolation=cv2.INTER_LINEAR) 44 | 45 | return im, im_scale 46 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/blob.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/utils/blob.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/utils/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/utils/timer.pyc -------------------------------------------------------------------------------- /tf_rfcn_fixed/rpn_tools/utils/vis_seg.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Multitask Network Cascade 3 | # Written by Haozhi Qi 4 | # Copyright (c) 2016, Haozhi Qi 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import cPickle 10 | import os 11 | import cv2 12 | import Image 13 | from mnc_config import cfg 14 | 15 | 16 | def vis_seg(img_names, cls_names, output_dir, gt_dir): 17 | """ 18 | This function plot segmentation results to specific directory 19 | Args: 20 | img_names: list 21 | """ 22 | assert os.path.exists(output_dir) 23 | # a list of dictionary 24 | inst_dir = os.path.join(output_dir, 'SegInst') 25 | cls_dir = os.path.join(output_dir, 'SegCls') 26 | res_dir = os.path.join(output_dir, 'SegRes') 27 | if not os.path.isdir(inst_dir): 28 | os.mkdir(inst_dir) 29 | if not os.path.isdir(cls_dir): 30 | os.mkdir(cls_dir) 31 | if not os.path.isdir(res_dir): 32 | os.mkdir(res_dir) 33 | 34 | res_list = _prepare_dict(img_names, cls_names, output_dir) 35 | for img_ind, image_name in enumerate(img_names): 36 | target_inst_file = os.path.join(inst_dir, image_name + '.jpg') 37 | target_cls_file = os.path.join(cls_dir, image_name + '.jpg') 38 | print image_name 39 | gt_image = gt_dir + '/img/' + image_name + '.jpg' 40 | img_data = cv2.imread(gt_image) 41 | img_width = img_data.shape[1] 42 | img_height = img_data.shape[0] 43 | pred_dict = res_list[img_ind] 44 | inst_img, cls_img = _convert_pred_to_image(img_width, img_height, pred_dict) 45 | color_map = _get_voc_color_map() 46 | inst_out_img = np.zeros((img_height, img_width, 3)) 47 | cls_out_img = np.zeros((img_height, img_width, 3)) 48 | for i in xrange(img_height): 49 | for j in xrange(img_width): 50 | inst_out_img[i][j] = color_map[inst_img[i][j]][::-1] 51 | cls_out_img[i][j] = color_map[cls_img[i][j]][::-1] 52 | 53 | cv2.imwrite(target_inst_file, inst_out_img) 54 | cv2.imwrite(target_cls_file, cls_out_img) 55 | background = Image.open(gt_image) 56 | mask = Image.open(target_cls_file) 57 | background = background.convert('RGBA') 58 | mask = mask.convert('RGBA') 59 | superimpose_image = Image.blend(background, mask, 0.8) 60 | name = os.path.join(res_dir, image_name + '.png') 61 | superimpose_image.save(name, 'PNG') 62 | 63 | 64 | def _prepare_dict(img_names, cls_names, cache_dir, vis_thresh=0.5): 65 | """ 66 | Returns: 67 | list, each list is a dictionary contains mask list, box list 68 | """ 69 | res_list = [] 70 | det_file = os.path.join(cache_dir, 'res_boxes.pkl') 71 | with open(det_file, 'rb') as f: 72 | det_pkl = cPickle.load(f) 73 | seg_file = os.path.join(cache_dir, 'res_masks.pkl') 74 | with open(seg_file, 'rb') as f: 75 | seg_pkl = cPickle.load(f) 76 | 77 | for img_ind, image_name in enumerate(img_names): 78 | box_for_img = [] 79 | mask_for_img = [] 80 | cls_for_img = [] 81 | for cls_ind, cls_name in enumerate(cls_names): 82 | if cls_name == '__background__' or len(det_pkl[cls_ind][img_ind]) == 0: 83 | continue 84 | det_for_img = det_pkl[cls_ind][img_ind] 85 | seg_for_img = seg_pkl[cls_ind][img_ind] 86 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0] 87 | for keep in keep_inds: 88 | box_for_img.append(det_for_img[keep]) 89 | # TODO: remove this annoying 0 90 | mask_for_img.append(seg_for_img[keep][0]) 91 | cls_for_img.append(cls_ind) 92 | res_dict = {'image_name': image_name, 93 | 'cls_name': cls_for_img, 94 | 'boxes': box_for_img, 95 | 'masks': mask_for_img} 96 | res_list.append(res_dict) 97 | 98 | return res_list 99 | 100 | 101 | def _convert_pred_to_image(img_width, img_height, pred_dict): 102 | num_inst = len(pred_dict['boxes']) 103 | inst_img = np.zeros((img_height, img_width)) 104 | cls_img = np.zeros((img_height, img_width)) 105 | for i in xrange(num_inst): 106 | box = np.round(pred_dict['boxes'][i]).astype(int) 107 | mask = pred_dict['masks'][i] 108 | cls_num = pred_dict['cls_name'][i] 109 | # clip box into image space 110 | box[0] = min(max(box[0], 0), img_width - 1) 111 | box[1] = min(max(box[1], 0), img_height - 1) 112 | box[2] = min(max(box[2], 0), img_width - 1) 113 | box[3] = min(max(box[3], 0), img_height - 1) 114 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1)) 115 | mask = mask >= cfg.BINARIZE_THRESH 116 | 117 | part1 = (i+1) * mask.astype(np.float32) 118 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1]) 119 | part3 = np.multiply(np.logical_not(mask), cls_img[box[1]:box[3]+1, box[0]:box[2]+1]) 120 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2 121 | cls_img[box[1]:box[3]+1, box[0]:box[2]+1] = cls_num * mask.astype(np.float32) + part3 122 | # Plot bounding boxes simultaneously 123 | cls_img[box[1]:box[3]+1, box[0]-1:box[0]+1] = 150 124 | cls_img[box[1]:box[3]+1, box[2]-1:box[2]+1] = 150 125 | cls_img[box[1]-1:box[1]+1, box[0]:box[2]+1] = 150 126 | cls_img[box[3]-1:box[3]+1, box[0]:box[2]+1] = 150 127 | 128 | inst_img = inst_img.astype(int) 129 | cls_img = cls_img.astype(int) 130 | return inst_img, cls_img 131 | 132 | 133 | def _get_voc_color_map(n=256): 134 | color_map = np.zeros((n, 3)) 135 | for i in xrange(n): 136 | r = b = g = 0 137 | cid = i 138 | for j in xrange(0, 8): 139 | r = np.bitwise_or(r, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-1], 7-j)) 140 | g = np.bitwise_or(g, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-2], 7-j)) 141 | b = np.bitwise_or(b, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-3], 7-j)) 142 | cid = np.right_shift(cid, 3) 143 | 144 | color_map[i][0] = r 145 | color_map[i][1] = g 146 | color_map[i][2] = b 147 | return color_map 148 | --------------------------------------------------------------------------------