├── README.md
├── tf_rfcn_dynamic
├── checkpoint
├── cnn_tools
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── tools.py
│ └── tools.pyc
├── psroi_pool_tools
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── psroi_pooling_op.py
│ ├── psroi_pooling_op.pyc
│ ├── psroi_pooling_op.py~
│ ├── psroi_pooling_op_grad.py
│ └── psroi_pooling_op_grad.pyc
├── resnet_rfcn_v2.py
└── rpn_tools
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __init__.py~
│ ├── anchor_target_layer.py
│ ├── anchor_target_layer.pyc
│ ├── anchor_target_layer.py~
│ ├── fast_rcnn
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── bbox_transform.py
│ ├── bbox_transform.pyc
│ ├── config.py
│ ├── config.pyc
│ ├── nms
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── cpu_nms.c
│ │ ├── cpu_nms.pyx
│ │ ├── cpu_nms.so
│ │ ├── gpu_mv.cpp
│ │ ├── gpu_mv.hpp
│ │ ├── gpu_mv.pyx
│ │ ├── gpu_nms.cpp
│ │ ├── gpu_nms.hpp
│ │ ├── gpu_nms.pyx
│ │ ├── gpu_nms.so
│ │ ├── mnc_config.py
│ │ ├── mnc_config.pyc
│ │ ├── mv.so
│ │ ├── mv_kernel.cu
│ │ ├── nms_kernel.cu
│ │ ├── nms_wrapper.py
│ │ ├── nms_wrapper.pyc
│ │ └── py_cpu_nms.py
│ ├── nms_wrapper.py
│ ├── nms_wrapper.pyc
│ ├── nms_wrapper.py~
│ ├── test.py
│ ├── test.pyc
│ ├── train.py
│ └── train.pyc
│ ├── generate.py
│ ├── generate_anchors.py
│ ├── generate_anchors.pyc
│ ├── mnc_data_layer.py
│ ├── mnc_data_layer.pyc
│ ├── mnc_data_layer.py~
│ ├── nms
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── cpu_nms.c
│ ├── cpu_nms.pyx
│ ├── cpu_nms.so
│ ├── gpu_mv.cpp
│ ├── gpu_mv.hpp
│ ├── gpu_mv.pyx
│ ├── gpu_nms.cpp
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── gpu_nms.so
│ ├── mnc_config.py
│ ├── mnc_config.pyc
│ ├── mv.so
│ ├── mv_kernel.cu
│ ├── nms_kernel.cu
│ ├── nms_wrapper.py
│ ├── nms_wrapper.pyc
│ └── py_cpu_nms.py
│ ├── proposal_layer.py
│ ├── proposal_layer.pyc
│ ├── proposal_layer.py~
│ ├── proposal_target_layer.py
│ ├── proposal_target_layer.pyc
│ ├── proposal_target_layer.py~
│ └── utils
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── bbox.c
│ ├── bbox.pyx
│ ├── blob.py
│ ├── blob.pyc
│ ├── cython_bbox.so
│ ├── mnc_config.py
│ ├── mnc_config.pyc
│ ├── timer.py
│ ├── timer.pyc
│ ├── unmap.py
│ ├── unmap.pyc
│ ├── vis_seg.py
│ └── voc_eval.py
└── tf_rfcn_fixed
├── cnn_tools
├── __init__.py
├── __init__.pyc
├── tools.py
└── tools.pyc
├── psroi_pool_tools
├── __init__.py
├── __init__.pyc
├── psroi_pooling_op.py
├── psroi_pooling_op.pyc
├── psroi_pooling_op_grad.py
└── psroi_pooling_op_grad.pyc
├── resnet_rfcn.py
└── rpn_tools
├── __init__.py
├── __init__.pyc
├── __init__.py~
├── anchor_target_layer.py~
├── anchor_target_layer_modified.py
├── anchor_target_layer_modified.pyc
├── anchor_target_layer_modified3.py~
├── fast_rcnn
├── __init__.py
├── __init__.pyc
├── bbox_transform.py
├── bbox_transform.pyc
├── config.py
├── config.pyc
├── nms
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── cpu_nms.c
│ ├── cpu_nms.pyx
│ ├── cpu_nms.so
│ ├── gpu_mv.cpp
│ ├── gpu_mv.hpp
│ ├── gpu_mv.pyx
│ ├── gpu_nms.cpp
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── gpu_nms.so
│ ├── mnc_config.py
│ ├── mnc_config.pyc
│ ├── mv.so
│ ├── mv_kernel.cu
│ ├── nms_kernel.cu
│ ├── nms_wrapper.py
│ ├── nms_wrapper.pyc
│ └── py_cpu_nms.py
├── nms_wrapper.py
├── nms_wrapper.pyc
├── nms_wrapper.py~
├── test.py
├── test.pyc
├── train.py
└── train.pyc
├── generate_anchors.py
├── generate_anchors.pyc
├── my_anchor_target_layer_modified.py
├── my_anchor_target_layer_modified.pyc
├── nms
├── __init__.py
├── __init__.pyc
├── cpu_nms.c
├── cpu_nms.pyx
├── cpu_nms.so
├── gpu_mv.cpp
├── gpu_mv.hpp
├── gpu_mv.pyx
├── gpu_nms.cpp
├── gpu_nms.hpp
├── gpu_nms.pyx
├── gpu_nms.so
├── mnc_config.py
├── mnc_config.pyc
├── mv.so
├── mv_kernel.cu
├── nms_kernel.cu
├── nms_wrapper.py
├── nms_wrapper.pyc
└── py_cpu_nms.py
├── proposal_layer_modified.py
├── proposal_layer_modified.pyc
├── proposal_target_layer_modified.py
├── proposal_target_layer_modified.pyc
├── proposal_target_layer_modified.py~
├── roi_pooling_op_grad.py~
└── utils
├── __init__.py
├── __init__.pyc
├── bbox.c
├── bbox.pyx
├── blob.py
├── blob.pyc
├── cython_bbox.so
├── mnc_config.py
├── mnc_config.pyc
├── timer.py
├── timer.pyc
├── unmap.py
├── unmap.pyc
├── utils
├── __init__.py
├── __init__.pyc
├── bbox.pyx
├── blob.py
├── blob.pyc
├── timer.py
└── timer.pyc
├── vis_seg.py
└── voc_eval.py
/README.md:
--------------------------------------------------------------------------------
1 | # tf_rfcn
2 |
3 | This is an experimental tensorflow implementation of R-FCN by: Dai, Jifeng, et al. "R-FCN: Object Detection via Region-based Fully Convolutional Networks." arXiv preprint arXiv:1605.06409 (2016).
4 |
5 | Base trunk is a ResNet (can be 50-101-152 layers). Training is done end-to-end.
6 |
7 | Anchor, proposal, and proposal target layers are based on Ross Girshick's py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn), with some modifications by Orpine in (https://github.com/Orpine/py-R-FCN) for the proposal target layer.
8 |
9 | Training only for the moment, no testing phase yet.
10 |
11 | created by A. Labao under Pros Naval of CVMIG Lab, University of the Philippines
12 |
13 | # Specs
14 | tf_rfcn_fixed : accepts any image (as specified in source folder) and resacles to 600 x 1000, input is JPEG image
15 | tf_rfcn_dynamic : accepts any image size and tensors are adjusted to a size of 600 for the shorter side, input is roidb pkl file. A sample code for making the imdb is [here](https://github.com/alfonsolink/roidb_maker), adopted from the original MNC [code](https://github.com/daijifeng001/MNC) - code has to be modified to your local PASCAL VOC datasets folders.
16 |
17 | # Performance
18 | In terms of end cls accuracy, tf_rfcn_dynamic has an accuracy of 93% after ~70k iterations, with an anchor accuracy of 99% given the PASCAL VOC 2012 SDS dataset, and a 101-layer ResNet trunk. Results are obtained with ImageNet pretrained [weights](https://1drv.ms/f/s!AtPFjf_hfC81kUrPD2Kazg1Gtkz6), which can be called using saver_all_trunkrcnn.restore() -- which sets the base trunk and "rcnn" layers to ImageNet weights ("fc" layers are not included in ImageNet initialization)
19 |
20 | # Requirements
21 | GTX 1070
22 | OpenCV 3.1
23 | Cuda 7.5+
24 | Cudnn 5.0+
25 | tensorflow v10+
26 | and psroi_pooling_op.so installed - check my other git repository [here] (https://github.com/alfonsolink/tensorflow_user_ops) for the psroi_pooling tensorflow wrap)
27 |
28 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "rfcn_end_to_end.ckpt"
2 | all_model_checkpoint_paths: "rfcn_end_to_end.ckpt"
3 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/cnn_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/cnn_tools/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/cnn_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/cnn_tools/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/cnn_tools/tools.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/cnn_tools/tools.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/psroi_pool_tools/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os.path as osp
3 |
4 | filename = '/home/cvmig_core/tensorflow/bazel-bin/tensorflow/core/user_ops/ps_roipool/psroi_pooling.so'
5 | _psroi_pooling_module = tf.load_op_library(filename)
6 | psroi_pool = _psroi_pooling_module.psroi_pool
7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad
8 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op.py~:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os.path as osp
3 |
4 | filename = '/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/ps_roipool/psroi_pooling.so'
5 | _psroi_pooling_module = tf.load_op_library(filename)
6 | psroi_pool = _psroi_pooling_module.psroi_pool
7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad
8 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op_grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | import psroi_pooling_op
4 |
5 | @tf.RegisterShape("PSROIPool")
6 | def _psroi_pool_shape(op):
7 | """Shape function for the RoiPool op.
8 |
9 | """
10 | dims_data = op.inputs[0].get_shape().as_list()
11 | channels = dims_data[1]
12 |
13 | dims_rois = op.inputs[1].get_shape().as_list()
14 | num_rois = dims_rois[0]
15 |
16 | output_dim = op.get_attr('output_dim')
17 | group_size = op.get_attr('group_size')
18 | pooled_height = group_size
19 | pooled_width = group_size
20 |
21 | output_shape = tf.TensorShape([num_rois, output_dim, pooled_height, pooled_width])
22 | return [output_shape, output_shape]
23 |
24 | @ops.RegisterGradient("PSROIPool")
25 | def _psroi_pool_grad(op, grad, _):
26 | """The gradients for `roi_pool`.
27 | Args:
28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use
29 | to find the inputs and outputs of the original op.
30 | grad: Gradient with respect to the output of the `roi_pool` op.
31 | Returns:
32 | Gradients with respect to the input of `zero_out`.
33 | """
34 | data = op.inputs[0]
35 | rois = op.inputs[1]
36 | mapping_channel = op.outputs[1]
37 | spatial_scale = op.get_attr('spatial_scale')
38 |
39 | # compute gradient
40 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
41 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale)
42 |
43 | return [data_grad, None] # List of one Tensor, since we have one input
44 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op_grad.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/psroi_pool_tools/psroi_pooling_op_grad.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/__init__.py~:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from tools import anchor_target_layer
4 |
5 | def weight_variable(shape):
6 | initial = tf.truncated_normal(shape, stddev=0.1)
7 | return tf.Variable(initial)
8 |
9 | def bias_variable(shape):
10 | initial = tf.constant(0.1, shape=shape)
11 | return tf.Variable(initial)
12 |
13 | def conv2d(x, W):
14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
15 |
16 | def max_pool_2x2(x):
17 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
18 |
19 | def process_anno(x):
20 | x = np.fromstring(x, dtype=int, sep=" ")
21 | x = np.reshape(x,(-1,5))
22 | l = x[:,0]
23 | gt = x[:,1:]
24 | return l, gt
25 |
26 | '''
27 | num_labels = 2
28 | batch_size = 10
29 | filename_queue = tf.train.string_input_producer(["train_files.csv"], num_epochs=None, shuffle=False)
30 | reader = tf.TextLineReader()
31 | key, value = reader.read(filename_queue)
32 | record_defaults = [[""],[0]]
33 | image_path, label = tf.decode_csv(value, field_delim=",", record_defaults=record_defaults)
34 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3)
35 | my_img = tf.cast(my_img,tf.float32) / 255
36 | my_img = tf.image.resize_images(my_img,224,224)
37 | min_after_dequeue = 5
38 | capacity = min_after_dequeue + 3 * batch_size
39 | im_batch, lb_batch = tf.train.batch([my_img,label],batch_size=batch_size,capacity=capacity)
40 | '''
41 |
42 | num_labels = 25
43 | batch_size = 1
44 | reader = tf.TextLineReader()
45 | filename_queue = tf.train.string_input_producer(["train_rcnn_files.csv"], num_epochs=None, shuffle=False)
46 | key, value = reader.read(filename_queue)
47 | image_path, anno_path = tf.decode_csv(value, record_defaults=[[""],[""]], field_delim=",")
48 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3)
49 | my_img = tf.cast(my_img,tf.float32) / 255
50 | my_img = tf.image.resize_images(my_img,224,224)
51 | anno = tf.read_file(anno_path)
52 | labels, gt_box = tf.py_func(process_anno,[anno],[tf.int64,tf.int64])
53 | labels = tf.reshape(tf.concat(1, labels), [-1,1])
54 | gt_box = tf.reshape(tf.concat(1, gt_box), [-1,4])
55 |
56 |
57 | '''
58 | x, y1_ = im_batch, lb_batch
59 | l_b = tf.to_int64(y1_)
60 | l = tf.one_hot(indices=l_b,depth=num_labels,on_value=1.0,off_value=0.0,axis=-1)
61 | l = tf.cast(l,tf.float32)
62 |
63 | W_conv1 = weight_variable([3,3,3,64])
64 | b_conv1 = bias_variable([64])
65 | h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
66 |
67 | W_conv2 = weight_variable([3,3,64,64])
68 | b_conv2 = bias_variable([64])
69 | h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
70 |
71 | h_max1 = max_pool_2x2(h_conv2)
72 |
73 | W_conv3 = weight_variable([3,3,64,128])
74 | b_conv3 = bias_variable([128])
75 | h_conv3 = tf.nn.relu(conv2d(h_max1, W_conv3) + b_conv3)
76 |
77 | W_conv4 = weight_variable([3,3,128,128])
78 | b_conv4 = bias_variable([128])
79 | h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
80 |
81 | h_max2 = max_pool_2x2(h_conv4)
82 |
83 | W_conv5 = weight_variable([3,3,128,256])
84 | b_conv5 = bias_variable([256])
85 | h_conv5 = tf.nn.relu(conv2d(h_max2, W_conv5) + b_conv5)
86 |
87 | W_conv6 = weight_variable([3,3,256,256])
88 | b_conv6 = bias_variable([256])
89 | h_conv6 = tf.nn.relu(conv2d(h_conv5, W_conv6) + b_conv6)
90 |
91 | W_conv7 = weight_variable([3,3,256,256])
92 | b_conv7 = bias_variable([256])
93 | h_conv7 = tf.nn.relu(conv2d(h_conv6, W_conv7) + b_conv7)
94 |
95 | h_max3 = max_pool_2x2(h_conv7)
96 |
97 | W_conv7 = weight_variable([3,3,256,512])
98 | b_conv7 = bias_variable([512])
99 | h_conv7 = tf.nn.relu(conv2d(h_max3, W_conv7) + b_conv7)
100 |
101 | W_conv8 = weight_variable([3,3,512,512])
102 | b_conv8 = bias_variable([512])
103 | h_conv8 = tf.nn.relu(conv2d(h_conv7, W_conv8) + b_conv8)
104 |
105 | W_conv9 = weight_variable([3,3,512,512])
106 | b_conv9 = bias_variable([512])
107 | h_conv9 = tf.nn.relu(conv2d(h_conv8, W_conv9) + b_conv9)
108 |
109 | h_max4 = max_pool_2x2(h_conv9)
110 |
111 | W_conv10 = weight_variable([3,3,512,512])
112 | b_conv10 = bias_variable([512])
113 | h_conv10 = tf.nn.relu(conv2d(h_max4, W_conv10) + b_conv10)
114 |
115 | W_conv11 = weight_variable([3,3,512,512])
116 | b_conv11 = bias_variable([512])
117 | h_conv11 = tf.nn.relu(conv2d(h_conv10, W_conv11) + b_conv11)
118 |
119 | W_conv12 = weight_variable([3,3,512,512])
120 | b_conv12 = bias_variable([512])
121 | h_conv12 = tf.nn.relu(conv2d(h_conv11, W_conv12) + b_conv12)
122 |
123 | #RPN
124 |
125 | W_rpn3 = weight_variable([3,3,512,512])
126 | b_rpn3 = bias_variable([512])
127 | h_rpn3 = tf.nn.relu(conv2d(h_conv12, W_rpn3) + b_rpn3)
128 |
129 | W_cls_score = weight_variable([1,1,512,18])
130 | b_cls_score = bias_variable([18])
131 | h_cls_score = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score)
132 |
133 | W_bbox_pred = weight_variable([1,1,512,36])
134 | b_bbox_pred = bias_variable([36])
135 | h_bbox_pred = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score)
136 |
137 | h_cls_score_reshape = tf.reshape(h_cls_score, [2,-1])
138 |
139 |
140 |
141 | #print h_cls_score
142 |
143 | h_fc1 = tf.reshape(h_cls_score_reshape, [-1, 14*14*18])
144 | W_fc1 = weight_variable([14*14*18,2])
145 | b_fc1 = bias_variable([2])
146 | y_conv = tf.matmul(h_fc1, W_fc1) + b_fc1
147 |
148 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y1_)
149 | loss = tf.reduce_mean(cross_entropy)
150 | train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
151 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(l,1))
152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
153 |
154 |
155 | '''
156 | init = tf.initialize_all_variables()
157 | with tf.Session() as sess:
158 | sess.run(init)
159 | coord = tf.train.Coordinator()
160 | threads = tf.train.start_queue_runners(sess=sess,coord=coord)
161 |
162 | for i in range(10000000):
163 | print labels.eval()
164 | #sess.run(train_step)
165 | #if i%10 == 0:
166 | #print "Iteration " + str(i)
167 | #print "Loss: " + str(loss.eval())
168 | #print "Accuracy: " + str(accuracy.eval())
169 | #print ""
170 |
171 | coord.request_stop()
172 | coord.join(threads)
173 | sess.close()
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/anchor_target_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/anchor_target_layer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def bbox_transform(ex_rois, gt_rois):
11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 |
16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 |
21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 | targets_dw = np.log(gt_widths / ex_widths)
24 | targets_dh = np.log(gt_heights / ex_heights)
25 |
26 | targets = np.vstack(
27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 | return targets
29 |
30 | def bbox_transform_inv(boxes, deltas):
31 | if boxes.shape[0] == 0:
32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 |
34 | boxes = boxes.astype(deltas.dtype, copy=False)
35 |
36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 | ctr_x = boxes[:, 0] + 0.5 * widths
39 | ctr_y = boxes[:, 1] + 0.5 * heights
40 |
41 | dx = deltas[:, 0::4]
42 | dy = deltas[:, 1::4]
43 | dw = deltas[:, 2::4]
44 | dh = deltas[:, 3::4]
45 |
46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 | pred_w = np.exp(dw) * widths[:, np.newaxis]
49 | pred_h = np.exp(dh) * heights[:, np.newaxis]
50 |
51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 | # x1
53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 | # y1
55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 | # x2
57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 | # y2
59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 |
61 | return pred_boxes
62 |
63 | def clip_boxes(boxes, im_shape):
64 | """
65 | Clip boxes to image boundaries.
66 | """
67 |
68 | # x1 >= 0
69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 | # y1 >= 0
71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 | # x2 < im_shape[1]
73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 | # y2 < im_shape[0]
75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 | return boxes
77 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/bbox_transform.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/bbox_transform.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/cpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/cpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_mv.hpp:
--------------------------------------------------------------------------------
1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num,
2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num,
3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num,
4 | float* finalize_output_mask, int* finalize_output_box, const int device_id);
5 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_mv.pyx:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | assert sizeof(int) == sizeof(np.int32_t)
6 |
7 | cdef extern from "gpu_mv.hpp":
8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id);
9 |
10 | # boxes: n * 4
11 | # masks: n * 1 * 21 * 21
12 | # scores: n * 21
13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes,
14 | np.ndarray[np.float32_t, ndim=4] all_masks,
15 | np.ndarray[np.int32_t, ndim=1] candidate_inds,
16 | np.ndarray[np.int32_t, ndim=1] candidate_start,
17 | np.ndarray[np.float32_t, ndim=1] candidate_weights,
18 | np.int32_t image_height,
19 | np.int32_t image_width,
20 | np.int32_t device_id = 0):
21 | cdef int all_box_num = all_boxes.shape[0]
22 | cdef int boxes_dim = all_boxes.shape[1]
23 | cdef int mask_size = all_masks.shape[3]
24 | cdef int candidate_num = candidate_inds.shape[0]
25 | cdef int result_num = candidate_start.shape[0]
26 | cdef np.ndarray[np.float32_t, ndim=4] \
27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32)
28 | cdef np.ndarray[np.int32_t, ndim=2] \
29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32)
30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id)
31 | return result_mask, result_box
32 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/gpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mnc_config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mv.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/mv.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------
2 | // Multitask Network Cascade
3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
4 | // Copyright (c) 2016, Haozhi Qi
5 | // Licensed under The MIT License [see LICENSE for details]
6 | // --------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | from mnc_config import cfg
9 | from gpu_nms import gpu_nms
10 | from cpu_nms import cpu_nms
11 |
12 |
13 | def nms(dets, thresh):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 |
24 | def apply_nms(all_boxes, thresh):
25 | """Apply non-maximum suppression to all predicted boxes output by the
26 | test_net method.
27 | """
28 | num_classes = len(all_boxes)
29 | num_images = len(all_boxes[0])
30 | nms_boxes = [[[] for _ in xrange(num_images)]
31 | for _ in xrange(num_classes)]
32 | for cls_ind in xrange(num_classes):
33 | for im_ind in xrange(num_images):
34 | dets = all_boxes[cls_ind][im_ind]
35 | if dets == []:
36 | continue
37 | keep = nms(dets, thresh)
38 | if len(keep) == 0:
39 | continue
40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
41 | return nms_boxes
42 |
43 |
44 | def apply_nms_mask(all_boxes, all_masks, thresh):
45 | num_classes = len(all_boxes)
46 | num_images = len(all_boxes[0])
47 | nms_boxes = [[[] for _ in xrange(num_images)]
48 | for _ in xrange(num_classes)]
49 | nms_masks = [[[] for _ in xrange(num_images)]
50 | for _ in xrange(num_classes)]
51 | for cls_ind in xrange(num_classes):
52 | for im_ind in xrange(num_images):
53 | dets = all_boxes[cls_ind][im_ind]
54 | masks = all_masks[cls_ind][im_ind]
55 | if dets == []:
56 | continue
57 | keep = nms(dets, thresh)
58 | if len(keep) == 0:
59 | continue
60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy()
62 | return nms_boxes, nms_masks
63 |
64 |
65 | def apply_nms_mask_single(box, mask, thresh):
66 | if box == []:
67 | return box, mask
68 | keep = nms(box, thresh)
69 | if len(keep) == 0:
70 | return box, mask
71 | return box[keep, :].copy(), mask[keep, :].copy()
72 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from config import cfg
9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 |
12 | def nms(dets, thresh, force_cpu=False):
13 | """Dispatch to either CPU or GPU NMS implementations."""
14 |
15 | if dets.shape[0] == 0:
16 | return []
17 | if cfg.USE_GPU_NMS and not force_cpu:
18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | else:
20 | return cpu_nms(dets, thresh)
21 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/nms_wrapper.py~:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import cfg
9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 |
12 | def nms(dets, thresh, force_cpu=False):
13 | """Dispatch to either CPU or GPU NMS implementations."""
14 |
15 | if dets.shape[0] == 0:
16 | return []
17 | if cfg.USE_GPU_NMS and not force_cpu:
18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | else:
20 | return cpu_nms(dets, thresh)
21 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/test.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/train.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Train a Fast R-CNN network."""
9 |
10 | import caffe
11 | from fast_rcnn.config import cfg
12 | import roi_data_layer.roidb as rdl_roidb
13 | from utils.timer import Timer
14 | import numpy as np
15 | import os
16 |
17 | from caffe.proto import caffe_pb2
18 | import google.protobuf as pb2
19 |
20 | class SolverWrapper(object):
21 | """A simple wrapper around Caffe's solver.
22 | This wrapper gives us control over he snapshotting process, which we
23 | use to unnormalize the learned bounding-box regression weights.
24 | """
25 |
26 | def __init__(self, solver_prototxt, roidb, output_dir,
27 | pretrained_model=None):
28 | """Initialize the SolverWrapper."""
29 | self.output_dir = output_dir
30 |
31 | if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
32 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
33 | # RPN can only use precomputed normalization because there are no
34 | # fixed statistics to compute a priori
35 | assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED
36 |
37 | if cfg.TRAIN.BBOX_REG:
38 | print 'Computing bounding-box regression targets...'
39 | self.bbox_means, self.bbox_stds = \
40 | rdl_roidb.add_bbox_regression_targets(roidb)
41 | print 'done'
42 |
43 | self.solver = caffe.SGDSolver(solver_prototxt)
44 | if pretrained_model is not None:
45 | print ('Loading pretrained model '
46 | 'weights from {:s}').format(pretrained_model)
47 | self.solver.net.copy_from(pretrained_model)
48 |
49 | self.solver_param = caffe_pb2.SolverParameter()
50 | with open(solver_prototxt, 'rt') as f:
51 | pb2.text_format.Merge(f.read(), self.solver_param)
52 |
53 | self.solver.net.layers[0].set_roidb(roidb)
54 |
55 | def snapshot(self):
56 | """Take a snapshot of the network after unnormalizing the learned
57 | bounding-box regression weights. This enables easy use at test-time.
58 | """
59 | net = self.solver.net
60 |
61 | scale_bbox_params = (cfg.TRAIN.BBOX_REG and
62 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
63 | net.params.has_key('bbox_pred'))
64 |
65 | if scale_bbox_params:
66 | # save original values
67 | orig_0 = net.params['bbox_pred'][0].data.copy()
68 | orig_1 = net.params['bbox_pred'][1].data.copy()
69 |
70 | # scale and shift with bbox reg unnormalization; then save snapshot
71 | net.params['bbox_pred'][0].data[...] = \
72 | (net.params['bbox_pred'][0].data *
73 | self.bbox_stds[:, np.newaxis])
74 | net.params['bbox_pred'][1].data[...] = \
75 | (net.params['bbox_pred'][1].data *
76 | self.bbox_stds + self.bbox_means)
77 |
78 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
79 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
80 | filename = (self.solver_param.snapshot_prefix + infix +
81 | '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
82 | filename = os.path.join(self.output_dir, filename)
83 |
84 | net.save(str(filename))
85 | print 'Wrote snapshot to: {:s}'.format(filename)
86 |
87 | if scale_bbox_params:
88 | # restore net to original state
89 | net.params['bbox_pred'][0].data[...] = orig_0
90 | net.params['bbox_pred'][1].data[...] = orig_1
91 | return filename
92 |
93 | def train_model(self, max_iters):
94 | """Network training loop."""
95 | last_snapshot_iter = -1
96 | timer = Timer()
97 | model_paths = []
98 | while self.solver.iter < max_iters:
99 | # Make one SGD update
100 | timer.tic()
101 | self.solver.step(1)
102 | timer.toc()
103 | if self.solver.iter % (10 * self.solver_param.display) == 0:
104 | print 'speed: {:.3f}s / iter'.format(timer.average_time)
105 |
106 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
107 | last_snapshot_iter = self.solver.iter
108 | model_paths.append(self.snapshot())
109 |
110 | if last_snapshot_iter != self.solver.iter:
111 | model_paths.append(self.snapshot())
112 | return model_paths
113 |
114 | def get_training_roidb(imdb):
115 | """Returns a roidb (Region of Interest database) for use in training."""
116 | if cfg.TRAIN.USE_FLIPPED:
117 | print 'Appending horizontally-flipped training examples...'
118 | imdb.append_flipped_images()
119 | print 'done'
120 |
121 | print 'Preparing training data...'
122 | rdl_roidb.prepare_roidb(imdb)
123 | print 'done'
124 |
125 | return imdb.roidb
126 |
127 | def filter_roidb(roidb):
128 | """Remove roidb entries that have no usable RoIs."""
129 |
130 | def is_valid(entry):
131 | # Valid images have:
132 | # (1) At least one foreground RoI OR
133 | # (2) At least one background RoI
134 | overlaps = entry['max_overlaps']
135 | # find boxes with sufficient overlap
136 | fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
137 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
138 | bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
139 | (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
140 | # image is only valid if such boxes exist
141 | valid = len(fg_inds) > 0 or len(bg_inds) > 0
142 | return valid
143 |
144 | num = len(roidb)
145 | filtered_roidb = [entry for entry in roidb if is_valid(entry)]
146 | num_after = len(filtered_roidb)
147 | print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
148 | num, num_after)
149 | return filtered_roidb
150 |
151 | def train_net(solver_prototxt, roidb, output_dir,
152 | pretrained_model=None, max_iters=40000):
153 | """Train a Fast R-CNN network."""
154 |
155 | roidb = filter_roidb(roidb)
156 | sw = SolverWrapper(solver_prototxt, roidb, output_dir,
157 | pretrained_model=pretrained_model)
158 |
159 | print 'Solving...'
160 | model_paths = sw.train_model(max_iters)
161 | print 'done solving'
162 | return model_paths
163 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/fast_rcnn/train.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/fast_rcnn/train.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/generate.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from fast_rcnn.config import cfg
9 | from utils.blob import im_list_to_blob
10 | from utils.timer import Timer
11 | import numpy as np
12 | import cv2
13 |
14 | def _vis_proposals(im, dets, thresh=0.5):
15 | """Draw detected bounding boxes."""
16 | inds = np.where(dets[:, -1] >= thresh)[0]
17 | if len(inds) == 0:
18 | return
19 |
20 | class_name = 'obj'
21 | im = im[:, :, (2, 1, 0)]
22 | fig, ax = plt.subplots(figsize=(12, 12))
23 | ax.imshow(im, aspect='equal')
24 | for i in inds:
25 | bbox = dets[i, :4]
26 | score = dets[i, -1]
27 |
28 | ax.add_patch(
29 | plt.Rectangle((bbox[0], bbox[1]),
30 | bbox[2] - bbox[0],
31 | bbox[3] - bbox[1], fill=False,
32 | edgecolor='red', linewidth=3.5)
33 | )
34 | ax.text(bbox[0], bbox[1] - 2,
35 | '{:s} {:.3f}'.format(class_name, score),
36 | bbox=dict(facecolor='blue', alpha=0.5),
37 | fontsize=14, color='white')
38 |
39 | ax.set_title(('{} detections with '
40 | 'p({} | box) >= {:.1f}').format(class_name, class_name,
41 | thresh),
42 | fontsize=14)
43 | plt.axis('off')
44 | plt.tight_layout()
45 | plt.draw()
46 |
47 | def _get_image_blob(im):
48 | """Converts an image into a network input.
49 |
50 | Arguments:
51 | im (ndarray): a color image in BGR order
52 |
53 | Returns:
54 | blob (ndarray): a data blob holding an image pyramid
55 | im_scale_factors (list): list of image scales (relative to im) used
56 | in the image pyramid
57 | """
58 | im_orig = im.astype(np.float32, copy=True)
59 | im_orig -= cfg.PIXEL_MEANS
60 |
61 | im_shape = im_orig.shape
62 | im_size_min = np.min(im_shape[0:2])
63 | im_size_max = np.max(im_shape[0:2])
64 |
65 | processed_ims = []
66 |
67 | assert len(cfg.TEST.SCALES) == 1
68 | target_size = cfg.TEST.SCALES[0]
69 |
70 | im_scale = float(target_size) / float(im_size_min)
71 | # Prevent the biggest axis from being more than MAX_SIZE
72 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
73 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
74 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
75 | interpolation=cv2.INTER_LINEAR)
76 | im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
77 | processed_ims.append(im)
78 |
79 | # Create a blob to hold the input images
80 | blob = im_list_to_blob(processed_ims)
81 |
82 | return blob, im_info
83 |
84 | def im_proposals(net, im):
85 | """Generate RPN proposals on a single image."""
86 | blobs = {}
87 | blobs['data'], blobs['im_info'] = _get_image_blob(im)
88 | net.blobs['data'].reshape(*(blobs['data'].shape))
89 | net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
90 | blobs_out = net.forward(
91 | data=blobs['data'].astype(np.float32, copy=False),
92 | im_info=blobs['im_info'].astype(np.float32, copy=False))
93 |
94 | scale = blobs['im_info'][0, 2]
95 | boxes = blobs_out['rois'][:, 1:].copy() / scale
96 | scores = blobs_out['scores'].copy()
97 | return boxes, scores
98 |
99 | def imdb_proposals(net, imdb):
100 | """Generate RPN proposals on all images in an imdb."""
101 |
102 | _t = Timer()
103 | imdb_boxes = [[] for _ in xrange(imdb.num_images)]
104 | for i in xrange(imdb.num_images):
105 | im = cv2.imread(imdb.image_path_at(i))
106 | _t.tic()
107 | imdb_boxes[i], scores = im_proposals(net, im)
108 | _t.toc()
109 | print 'im_proposals: {:d}/{:d} {:.3f}s' \
110 | .format(i + 1, imdb.num_images, _t.average_time)
111 | if 0:
112 | dets = np.hstack((imdb_boxes[i], scores))
113 | # from IPython import embed; embed()
114 | _vis_proposals(im, dets[:3, :], thresh=0.9)
115 | plt.show()
116 |
117 | return imdb_boxes
118 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/generate_anchors.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
11 | #
12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
13 | # >> anchors
14 | #
15 | # anchors =
16 | #
17 | # -83 -39 100 56
18 | # -175 -87 192 104
19 | # -359 -183 376 200
20 | # -55 -55 72 72
21 | # -119 -119 136 136
22 | # -247 -247 264 264
23 | # -35 -79 52 96
24 | # -79 -167 96 184
25 | # -167 -343 184 360
26 |
27 | #array([[ -83., -39., 100., 56.],
28 | # [-175., -87., 192., 104.],
29 | # [-359., -183., 376., 200.],
30 | # [ -55., -55., 72., 72.],
31 | # [-119., -119., 136., 136.],
32 | # [-247., -247., 264., 264.],
33 | # [ -35., -79., 52., 96.],
34 | # [ -79., -167., 96., 184.],
35 | # [-167., -343., 184., 360.]])
36 |
37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
38 | scales=2**np.arange(3, 6)):
39 | """
40 | Generate anchor (reference) windows by enumerating aspect ratios X
41 | scales wrt a reference (0, 0, 15, 15) window.
42 | """
43 |
44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
45 | ratio_anchors = _ratio_enum(base_anchor, ratios)
46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
47 | for i in xrange(ratio_anchors.shape[0])])
48 | return anchors
49 |
50 | def _whctrs(anchor):
51 | """
52 | Return width, height, x center, and y center for an anchor (window).
53 | """
54 |
55 | w = anchor[2] - anchor[0] + 1
56 | h = anchor[3] - anchor[1] + 1
57 | x_ctr = anchor[0] + 0.5 * (w - 1)
58 | y_ctr = anchor[1] + 0.5 * (h - 1)
59 | return w, h, x_ctr, y_ctr
60 |
61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
62 | """
63 | Given a vector of widths (ws) and heights (hs) around a center
64 | (x_ctr, y_ctr), output a set of anchors (windows).
65 | """
66 |
67 | ws = ws[:, np.newaxis]
68 | hs = hs[:, np.newaxis]
69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
70 | y_ctr - 0.5 * (hs - 1),
71 | x_ctr + 0.5 * (ws - 1),
72 | y_ctr + 0.5 * (hs - 1)))
73 | return anchors
74 |
75 | def _ratio_enum(anchor, ratios):
76 | """
77 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
78 | """
79 |
80 | w, h, x_ctr, y_ctr = _whctrs(anchor)
81 | size = w * h
82 | size_ratios = size / ratios
83 | ws = np.round(np.sqrt(size_ratios))
84 | hs = np.round(ws * ratios)
85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
86 | return anchors
87 |
88 | def _scale_enum(anchor, scales):
89 | """
90 | Enumerate a set of anchors for each scale wrt an anchor.
91 | """
92 |
93 | w, h, x_ctr, y_ctr = _whctrs(anchor)
94 | ws = w * scales
95 | hs = h * scales
96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
97 | return anchors
98 |
99 | if __name__ == '__main__':
100 | import time
101 | t = time.time()
102 | a = generate_anchors()
103 | print time.time() - t
104 | print a
105 | from IPython import embed; embed()
106 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/generate_anchors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/generate_anchors.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/mnc_data_layer.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import yaml
4 |
5 | #import caffe
6 | from mnc_config import cfg
7 | from utils.blob import prep_im_for_blob, im_list_to_blob
8 |
9 | MNC_MODE = False
10 |
11 | class MNCDataLayer():
12 | """
13 | Provide image, image w/h/scale, gt boxes/masks and mask info to upper layers
14 | """
15 |
16 | def setup(self, roidb):
17 | #layer_params = yaml.load(self.param_str_)
18 | self._cur = 0
19 | self.set_roidb(roidb)
20 | self._num_classes = 21
21 | self._name_to_top_map = {}
22 | # data blob: holds a batch of N images, each with 3 channels
23 | #top[0].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
24 | self._name_to_top_map['data'] = 0
25 | assert(cfg.TRAIN.HAS_RPN, 'Use RPN for this project')
26 | # Just pseudo setup
27 | #top[1].reshape(1, 3)
28 | self._name_to_top_map['im_info'] = 1
29 | #top[2].reshape(1, 4)
30 | self._name_to_top_map['gt_boxes'] = 2
31 | if MNC_MODE:
32 | top[3].reshape(1, 21, 21)
33 | self._name_to_top_map['gt_masks'] = 3
34 | top[4].reshape(1, 3)
35 | self._name_to_top_map['mask_info'] = 4
36 | #assert len(top) == len(self._name_to_top_map)
37 |
38 | #def reshape(self, bottom, top):
39 | # """Reshaping happens during the call to forward."""
40 | # pass
41 |
42 | def forward(self):
43 | """Get blobs and copy them into this layer's top blob vector."""
44 | blobs = self._get_next_minibatch()
45 | return blobs
46 | #for blob_name, blob in blobs.iteritems():
47 | #top_ind = self._name_to_top_map[blob_name]
48 | # Reshape net's input blobs
49 | #top[top_ind].reshape(*blob.shape)
50 | # Copy data into net's input blobs
51 | #top[top_ind].data[...] = blob.astype(np.float32, copy=False)
52 |
53 |
54 | def backward(self, top, propagate_down, bottom):
55 | """This layer does not propagate gradients."""
56 | pass
57 |
58 | def set_roidb(self, roidb):
59 | """Set the roidb to be used by this layer during training."""
60 | self._roidb = roidb
61 | self._shuffle_roidb_inds()
62 |
63 | def set_maskdb(self, maskdb):
64 | self._maskdb = maskdb
65 | self._shuffle_roidb_inds()
66 |
67 | def _shuffle_roidb_inds(self):
68 | """Randomly permute the training roidb."""
69 | if cfg.TRAIN.ASPECT_GROUPING:
70 | widths = np.array([r['width'] for r in self._roidb])
71 | heights = np.array([r['height'] for r in self._roidb])
72 | horz = (widths >= heights)
73 | vert = np.logical_not(horz)
74 | horz_inds = np.where(horz)[0]
75 | vert_inds = np.where(vert)[0]
76 | inds = np.hstack((
77 | np.random.permutation(horz_inds),
78 | np.random.permutation(vert_inds)))
79 | inds = np.reshape(inds, (-1, 2))
80 | row_perm = np.random.permutation(np.arange(inds.shape[0]))
81 | inds = np.reshape(inds[row_perm, :], (-1,))
82 | self._perm = inds
83 | else:
84 | self._perm = np.random.permutation(np.arange(len(self._roidb)))
85 | self._cur = 0
86 |
87 | def _get_image_blob(self, roidb, scale_inds):
88 | """Builds an input blob from the images in the roidb at the specified
89 | scales.
90 | """
91 | num_images = 1 # len(roidb)
92 | processed_ims = []
93 | im_scales = []
94 | for i in xrange(num_images):
95 | im = cv2.imread(roidb['image'])
96 | if roidb['flipped']:
97 | im = im[:, ::-1, :]
98 | target_size = cfg.TRAIN.SCALES[scale_inds[i]]
99 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
100 | cfg.TRAIN.MAX_SIZE)
101 | im_scales.append(im_scale)
102 | processed_ims.append(im)
103 | # Create a blob to hold the input images
104 | blob = im_list_to_blob(processed_ims)
105 | return blob, im_scales
106 |
107 | def _get_next_minibatch(self):
108 | """
109 | Return the blobs to be used for the next minibatch.
110 | """
111 | assert cfg.TRAIN.IMS_PER_BATCH == 1, 'Only single batch forwarding is supported'
112 |
113 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
114 | self._shuffle_roidb_inds()
115 | db_inds = self._perm[self._cur]
116 | self._cur += 1
117 | #print self._cur
118 | roidb = self._roidb[db_inds]
119 |
120 | random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1)
121 | im_blob, im_scales = self._get_image_blob(roidb, random_scale_inds)
122 |
123 | gt_label = np.where(roidb['gt_classes'] != 0)[0]
124 | gt_boxes = np.hstack((roidb['boxes'][gt_label, :] * im_scales[0],
125 | roidb['gt_classes'][gt_label, np.newaxis])).astype(np.float32)
126 | blobs = {
127 | 'data': im_blob,
128 | 'gt_boxes': gt_boxes,
129 | 'im_info': np.array([[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32)
130 | }
131 |
132 | if MNC_MODE:
133 | maskdb = self._maskdb[db_inds]
134 | mask_list = maskdb['gt_masks']
135 | mask_max_x = maskdb['mask_max'][0]
136 | mask_max_y = maskdb['mask_max'][1]
137 | gt_masks = np.zeros((len(mask_list), mask_max_y, mask_max_x))
138 | mask_info = np.zeros((len(mask_list), 2))
139 | for j in xrange(len(mask_list)):
140 | mask = mask_list[j]
141 | mask_x = mask.shape[1]
142 | mask_y = mask.shape[0]
143 | gt_masks[j, 0:mask_y, 0:mask_x] = mask
144 | mask_info[j, 0] = mask_y
145 | mask_info[j, 1] = mask_x
146 | blobs['gt_masks'] = gt_masks
147 | blobs['mask_info'] = mask_info
148 |
149 | return blobs
150 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/mnc_data_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/mnc_data_layer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/mnc_data_layer.py~:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import yaml
4 |
5 | import caffe
6 | from mnc_config import cfg
7 | from utils.blob import prep_im_for_blob, im_list_to_blob
8 |
9 | MNC_MODE = False
10 |
11 | class MNCDataLayer():
12 | """
13 | Provide image, image w/h/scale, gt boxes/masks and mask info to upper layers
14 | """
15 |
16 | def setup(self, roidb):
17 | #layer_params = yaml.load(self.param_str_)
18 | self._cur = 0
19 | self.set_roidb(roidb)
20 | self._num_classes = 21
21 | self._name_to_top_map = {}
22 | # data blob: holds a batch of N images, each with 3 channels
23 | #top[0].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
24 | self._name_to_top_map['data'] = 0
25 | assert(cfg.TRAIN.HAS_RPN, 'Use RPN for this project')
26 | # Just pseudo setup
27 | #top[1].reshape(1, 3)
28 | self._name_to_top_map['im_info'] = 1
29 | #top[2].reshape(1, 4)
30 | self._name_to_top_map['gt_boxes'] = 2
31 | if MNC_MODE:
32 | top[3].reshape(1, 21, 21)
33 | self._name_to_top_map['gt_masks'] = 3
34 | top[4].reshape(1, 3)
35 | self._name_to_top_map['mask_info'] = 4
36 | #assert len(top) == len(self._name_to_top_map)
37 |
38 | #def reshape(self, bottom, top):
39 | # """Reshaping happens during the call to forward."""
40 | # pass
41 |
42 | def forward(self):
43 | """Get blobs and copy them into this layer's top blob vector."""
44 | blobs = self._get_next_minibatch()
45 | return blobs
46 | #for blob_name, blob in blobs.iteritems():
47 | #top_ind = self._name_to_top_map[blob_name]
48 | # Reshape net's input blobs
49 | #top[top_ind].reshape(*blob.shape)
50 | # Copy data into net's input blobs
51 | #top[top_ind].data[...] = blob.astype(np.float32, copy=False)
52 |
53 |
54 | def backward(self, top, propagate_down, bottom):
55 | """This layer does not propagate gradients."""
56 | pass
57 |
58 | def set_roidb(self, roidb):
59 | """Set the roidb to be used by this layer during training."""
60 | self._roidb = roidb
61 | self._shuffle_roidb_inds()
62 |
63 | def set_maskdb(self, maskdb):
64 | self._maskdb = maskdb
65 | self._shuffle_roidb_inds()
66 |
67 | def _shuffle_roidb_inds(self):
68 | """Randomly permute the training roidb."""
69 | if cfg.TRAIN.ASPECT_GROUPING:
70 | widths = np.array([r['width'] for r in self._roidb])
71 | heights = np.array([r['height'] for r in self._roidb])
72 | horz = (widths >= heights)
73 | vert = np.logical_not(horz)
74 | horz_inds = np.where(horz)[0]
75 | vert_inds = np.where(vert)[0]
76 | inds = np.hstack((
77 | np.random.permutation(horz_inds),
78 | np.random.permutation(vert_inds)))
79 | inds = np.reshape(inds, (-1, 2))
80 | row_perm = np.random.permutation(np.arange(inds.shape[0]))
81 | inds = np.reshape(inds[row_perm, :], (-1,))
82 | self._perm = inds
83 | else:
84 | self._perm = np.random.permutation(np.arange(len(self._roidb)))
85 | self._cur = 0
86 |
87 | def _get_image_blob(self, roidb, scale_inds):
88 | """Builds an input blob from the images in the roidb at the specified
89 | scales.
90 | """
91 | num_images = 1 # len(roidb)
92 | processed_ims = []
93 | im_scales = []
94 | for i in xrange(num_images):
95 | im = cv2.imread(roidb['image'])
96 | if roidb['flipped']:
97 | im = im[:, ::-1, :]
98 | target_size = cfg.TRAIN.SCALES[scale_inds[i]]
99 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
100 | cfg.TRAIN.MAX_SIZE)
101 | im_scales.append(im_scale)
102 | processed_ims.append(im)
103 | # Create a blob to hold the input images
104 | blob = im_list_to_blob(processed_ims)
105 | return blob, im_scales
106 |
107 | def _get_next_minibatch(self):
108 | """
109 | Return the blobs to be used for the next minibatch.
110 | """
111 | assert cfg.TRAIN.IMS_PER_BATCH == 1, 'Only single batch forwarding is supported'
112 |
113 | if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
114 | self._shuffle_roidb_inds()
115 | db_inds = self._perm[self._cur]
116 | self._cur += 1
117 | #print self._cur
118 | roidb = self._roidb[db_inds]
119 |
120 | random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1)
121 | im_blob, im_scales = self._get_image_blob(roidb, random_scale_inds)
122 |
123 | gt_label = np.where(roidb['gt_classes'] != 0)[0]
124 | gt_boxes = np.hstack((roidb['boxes'][gt_label, :] * im_scales[0],
125 | roidb['gt_classes'][gt_label, np.newaxis])).astype(np.float32)
126 | blobs = {
127 | 'data': im_blob,
128 | 'gt_boxes': gt_boxes,
129 | 'im_info': np.array([[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32)
130 | }
131 |
132 | if MNC_MODE:
133 | maskdb = self._maskdb[db_inds]
134 | mask_list = maskdb['gt_masks']
135 | mask_max_x = maskdb['mask_max'][0]
136 | mask_max_y = maskdb['mask_max'][1]
137 | gt_masks = np.zeros((len(mask_list), mask_max_y, mask_max_x))
138 | mask_info = np.zeros((len(mask_list), 2))
139 | for j in xrange(len(mask_list)):
140 | mask = mask_list[j]
141 | mask_x = mask.shape[1]
142 | mask_y = mask.shape[0]
143 | gt_masks[j, 0:mask_y, 0:mask_x] = mask
144 | mask_info[j, 0] = mask_y
145 | mask_info[j, 1] = mask_x
146 | blobs['gt_masks'] = gt_masks
147 | blobs['mask_info'] = mask_info
148 |
149 | return blobs
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/cpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/cpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/gpu_mv.hpp:
--------------------------------------------------------------------------------
1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num,
2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num,
3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num,
4 | float* finalize_output_mask, int* finalize_output_box, const int device_id);
5 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/gpu_mv.pyx:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | assert sizeof(int) == sizeof(np.int32_t)
6 |
7 | cdef extern from "gpu_mv.hpp":
8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id);
9 |
10 | # boxes: n * 4
11 | # masks: n * 1 * 21 * 21
12 | # scores: n * 21
13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes,
14 | np.ndarray[np.float32_t, ndim=4] all_masks,
15 | np.ndarray[np.int32_t, ndim=1] candidate_inds,
16 | np.ndarray[np.int32_t, ndim=1] candidate_start,
17 | np.ndarray[np.float32_t, ndim=1] candidate_weights,
18 | np.int32_t image_height,
19 | np.int32_t image_width,
20 | np.int32_t device_id = 0):
21 | cdef int all_box_num = all_boxes.shape[0]
22 | cdef int boxes_dim = all_boxes.shape[1]
23 | cdef int mask_size = all_masks.shape[3]
24 | cdef int candidate_num = candidate_inds.shape[0]
25 | cdef int result_num = candidate_start.shape[0]
26 | cdef np.ndarray[np.float32_t, ndim=4] \
27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32)
28 | cdef np.ndarray[np.int32_t, ndim=2] \
29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32)
30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id)
31 | return result_mask, result_box
32 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/gpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/mnc_config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/mv.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/mv.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------
2 | // Multitask Network Cascade
3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
4 | // Copyright (c) 2016, Haozhi Qi
5 | // Licensed under The MIT License [see LICENSE for details]
6 | // --------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | from mnc_config import cfg
9 | from gpu_nms import gpu_nms
10 | from cpu_nms import cpu_nms
11 |
12 |
13 | def nms(dets, thresh):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 |
24 | def apply_nms(all_boxes, thresh):
25 | """Apply non-maximum suppression to all predicted boxes output by the
26 | test_net method.
27 | """
28 | num_classes = len(all_boxes)
29 | num_images = len(all_boxes[0])
30 | nms_boxes = [[[] for _ in xrange(num_images)]
31 | for _ in xrange(num_classes)]
32 | for cls_ind in xrange(num_classes):
33 | for im_ind in xrange(num_images):
34 | dets = all_boxes[cls_ind][im_ind]
35 | if dets == []:
36 | continue
37 | keep = nms(dets, thresh)
38 | if len(keep) == 0:
39 | continue
40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
41 | return nms_boxes
42 |
43 |
44 | def apply_nms_mask(all_boxes, all_masks, thresh):
45 | num_classes = len(all_boxes)
46 | num_images = len(all_boxes[0])
47 | nms_boxes = [[[] for _ in xrange(num_images)]
48 | for _ in xrange(num_classes)]
49 | nms_masks = [[[] for _ in xrange(num_images)]
50 | for _ in xrange(num_classes)]
51 | for cls_ind in xrange(num_classes):
52 | for im_ind in xrange(num_images):
53 | dets = all_boxes[cls_ind][im_ind]
54 | masks = all_masks[cls_ind][im_ind]
55 | if dets == []:
56 | continue
57 | keep = nms(dets, thresh)
58 | if len(keep) == 0:
59 | continue
60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy()
62 | return nms_boxes, nms_masks
63 |
64 |
65 | def apply_nms_mask_single(box, mask, thresh):
66 | if box == []:
67 | return box, mask
68 | keep = nms(box, thresh)
69 | if len(keep) == 0:
70 | return box, mask
71 | return box[keep, :].copy(), mask[keep, :].copy()
72 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/nms/nms_wrapper.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/proposal_layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | #import caffe
9 | import numpy as np
10 | import yaml
11 | from fast_rcnn.config import cfg
12 | from generate_anchors import generate_anchors
13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
14 | from fast_rcnn.nms_wrapper import nms
15 |
16 | DEBUG = False
17 |
18 |
19 | """
20 | Outputs object detection proposals by applying estimated bounding-box
21 | transformations to a set of regular boxes (called "anchors").
22 | """
23 |
24 |
25 | _feat_stride = 16
26 | _anchors = generate_anchors()
27 | _num_anchors = 9
28 | phase = 'TRAIN'
29 |
30 |
31 | def forward_proposal_op(bottom_0, bottom_1, bottom_2):
32 | # Algorithm:
33 | #
34 | # for each (H, W) location i
35 | # generate A anchor boxes centered on cell i
36 | # apply predicted bbox deltas at cell i to each of the A anchors
37 | # clip predicted boxes to image
38 | # remove predicted boxes with either height or width < threshold
39 | # sort all (proposal, score) pairs by score from highest to lowest
40 | # take top pre_nms_topN proposals before NMS
41 | # apply NMS with threshold 0.7 to remaining proposals
42 | # take after_nms_topN proposals after NMS
43 | # return the top proposals (-> RoIs top, scores top)
44 |
45 | assert bottom_0.shape[0] == 1, \
46 | 'Only single item batches are supported'
47 |
48 | cfg_key = str(phase) # either 'TRAIN' or 'TEST'
49 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
50 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
51 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
52 | min_size = cfg[cfg_key].RPN_MIN_SIZE
53 |
54 | # the first set of _num_anchors channels are bg probs
55 | # the second set are the fg probs, which we want
56 | scores = bottom_0[:, _num_anchors:, :, :]
57 | bbox_deltas = bottom_1
58 | im_info = bottom_2
59 |
60 | if DEBUG:
61 | print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
62 | print 'scale: {}'.format(im_info[2])
63 |
64 | # 1. Generate proposals from bbox deltas and shifted anchors
65 | height, width = scores.shape[-2:]
66 |
67 | if DEBUG:
68 | print 'score map size: {}'.format(scores.shape)
69 |
70 | # Enumerate all shifts
71 | shift_x = np.arange(0, width) * _feat_stride
72 | shift_y = np.arange(0, height) * _feat_stride
73 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
74 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
75 | shift_x.ravel(), shift_y.ravel())).transpose()
76 |
77 | # Enumerate all shifted anchors:
78 | #
79 | # add A anchors (1, A, 4) to
80 | # cell K shifts (K, 1, 4) to get
81 | # shift anchors (K, A, 4)
82 | # reshape to (K*A, 4) shifted anchors
83 | A = _num_anchors
84 | K = shifts.shape[0]
85 | anchors = _anchors.reshape((1, A, 4)) + \
86 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))
87 | anchors = anchors.reshape((K * A, 4))
88 |
89 | # Transpose and reshape predicted bbox transformations to get them
90 | # into the same order as the anchors:
91 | #
92 | # bbox deltas will be (1, 4 * A, H, W) format
93 | # transpose to (1, H, W, 4 * A)
94 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
95 | # in slowest to fastest order
96 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
97 |
98 | # Same story for the scores:
99 | #
100 | # scores are (1, A, H, W) format
101 | # transpose to (1, H, W, A)
102 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
103 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
104 |
105 | # Convert anchors into proposals via bbox transformations
106 | proposals = bbox_transform_inv(anchors, bbox_deltas)
107 |
108 | # 2. clip predicted boxes to image
109 | proposals = clip_boxes(proposals, im_info[:2])
110 |
111 | # 3. remove predicted boxes with either height or width < threshold
112 | # (NOTE: convert min_size to input image scale stored in im_info[2])
113 | keep = _filter_boxes(proposals, min_size * im_info[2])
114 | proposals = proposals[keep, :]
115 | scores = scores[keep]
116 |
117 | # 4. sort all (proposal, score) pairs by score from highest to lowest
118 | # 5. take top pre_nms_topN (e.g. 6000)
119 | order = scores.ravel().argsort()[::-1]
120 | if pre_nms_topN > 0:
121 | order = order[:pre_nms_topN]
122 | proposals = proposals[order, :]
123 | scores = scores[order]
124 |
125 | # 6. apply nms (e.g. threshold = 0.7)
126 | # 7. take after_nms_topN (e.g. 300)
127 | # 8. return the top proposals (-> RoIs top)
128 | keep = nms(np.hstack((proposals, scores)), nms_thresh)
129 | if post_nms_topN > 0:
130 | keep = keep[:post_nms_topN]
131 | proposals = proposals[keep, :]
132 | scores = scores[keep]
133 |
134 | # Output rois blob
135 | # Our RPN implementation only supports a single input image, so all
136 | # batch inds are 0
137 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
138 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
139 |
140 | return blob
141 |
142 | # [Optional] output scores blob
143 |
144 |
145 | def backward(top, propagate_down, bottom):
146 | """This layer does not propagate gradients."""
147 | pass
148 |
149 | def reshape(bottom, top):
150 | """Reshaping happens during the call to forward."""
151 | pass
152 |
153 | def _filter_boxes(boxes, min_size):
154 | """Remove all boxes with any side smaller than min_size."""
155 | ws = boxes[:, 2] - boxes[:, 0] + 1
156 | hs = boxes[:, 3] - boxes[:, 1] + 1
157 | keep = np.where((ws >= min_size) & (hs >= min_size))[0]
158 | return keep
159 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/proposal_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/proposal_layer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/proposal_layer.py~:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | import caffe
9 | import numpy as np
10 | import yaml
11 | from fast_rcnn.config import cfg
12 | from generate_anchors import generate_anchors
13 | from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
14 | from fast_rcnn.nms_wrapper import nms
15 |
16 | DEBUG = False
17 |
18 |
19 | """
20 | Outputs object detection proposals by applying estimated bounding-box
21 | transformations to a set of regular boxes (called "anchors").
22 | """
23 |
24 |
25 | _feat_stride = 16
26 | _anchors = generate_anchors()
27 | _num_anchors = 9
28 | phase = 'TRAIN'
29 |
30 |
31 | def forward_proposal_op(bottom_0, bottom_1, bottom_2):
32 | # Algorithm:
33 | #
34 | # for each (H, W) location i
35 | # generate A anchor boxes centered on cell i
36 | # apply predicted bbox deltas at cell i to each of the A anchors
37 | # clip predicted boxes to image
38 | # remove predicted boxes with either height or width < threshold
39 | # sort all (proposal, score) pairs by score from highest to lowest
40 | # take top pre_nms_topN proposals before NMS
41 | # apply NMS with threshold 0.7 to remaining proposals
42 | # take after_nms_topN proposals after NMS
43 | # return the top proposals (-> RoIs top, scores top)
44 |
45 | assert bottom_0.shape[0] == 1, \
46 | 'Only single item batches are supported'
47 |
48 | cfg_key = str(phase) # either 'TRAIN' or 'TEST'
49 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
50 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
51 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
52 | min_size = cfg[cfg_key].RPN_MIN_SIZE
53 |
54 | # the first set of _num_anchors channels are bg probs
55 | # the second set are the fg probs, which we want
56 | scores = bottom_0[:, _num_anchors:, :, :]
57 | bbox_deltas = bottom_1
58 | im_info = bottom_2
59 |
60 | if DEBUG:
61 | print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
62 | print 'scale: {}'.format(im_info[2])
63 |
64 | # 1. Generate proposals from bbox deltas and shifted anchors
65 | height, width = scores.shape[-2:]
66 |
67 | if DEBUG:
68 | print 'score map size: {}'.format(scores.shape)
69 |
70 | # Enumerate all shifts
71 | shift_x = np.arange(0, width) * _feat_stride
72 | shift_y = np.arange(0, height) * _feat_stride
73 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
74 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
75 | shift_x.ravel(), shift_y.ravel())).transpose()
76 |
77 | # Enumerate all shifted anchors:
78 | #
79 | # add A anchors (1, A, 4) to
80 | # cell K shifts (K, 1, 4) to get
81 | # shift anchors (K, A, 4)
82 | # reshape to (K*A, 4) shifted anchors
83 | A = _num_anchors
84 | K = shifts.shape[0]
85 | anchors = _anchors.reshape((1, A, 4)) + \
86 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))
87 | anchors = anchors.reshape((K * A, 4))
88 |
89 | # Transpose and reshape predicted bbox transformations to get them
90 | # into the same order as the anchors:
91 | #
92 | # bbox deltas will be (1, 4 * A, H, W) format
93 | # transpose to (1, H, W, 4 * A)
94 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
95 | # in slowest to fastest order
96 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
97 |
98 | # Same story for the scores:
99 | #
100 | # scores are (1, A, H, W) format
101 | # transpose to (1, H, W, A)
102 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
103 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
104 |
105 | # Convert anchors into proposals via bbox transformations
106 | proposals = bbox_transform_inv(anchors, bbox_deltas)
107 |
108 | # 2. clip predicted boxes to image
109 | proposals = clip_boxes(proposals, im_info[:2])
110 |
111 | # 3. remove predicted boxes with either height or width < threshold
112 | # (NOTE: convert min_size to input image scale stored in im_info[2])
113 | keep = _filter_boxes(proposals, min_size * im_info[2])
114 | proposals = proposals[keep, :]
115 | scores = scores[keep]
116 |
117 | # 4. sort all (proposal, score) pairs by score from highest to lowest
118 | # 5. take top pre_nms_topN (e.g. 6000)
119 | order = scores.ravel().argsort()[::-1]
120 | if pre_nms_topN > 0:
121 | order = order[:pre_nms_topN]
122 | proposals = proposals[order, :]
123 | scores = scores[order]
124 |
125 | # 6. apply nms (e.g. threshold = 0.7)
126 | # 7. take after_nms_topN (e.g. 300)
127 | # 8. return the top proposals (-> RoIs top)
128 | keep = nms(np.hstack((proposals, scores)), nms_thresh)
129 | if post_nms_topN > 0:
130 | keep = keep[:post_nms_topN]
131 | proposals = proposals[keep, :]
132 | scores = scores[keep]
133 |
134 | # Output rois blob
135 | # Our RPN implementation only supports a single input image, so all
136 | # batch inds are 0
137 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
138 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
139 |
140 | return blob
141 |
142 | # [Optional] output scores blob
143 |
144 |
145 | def backward(top, propagate_down, bottom):
146 | """This layer does not propagate gradients."""
147 | pass
148 |
149 | def reshape(bottom, top):
150 | """Reshaping happens during the call to forward."""
151 | pass
152 |
153 | def _filter_boxes(boxes, min_size):
154 | """Remove all boxes with any side smaller than min_size."""
155 | ws = boxes[:, 2] - boxes[:, 0] + 1
156 | hs = boxes[:, 3] - boxes[:, 1] + 1
157 | keep = np.where((ws >= min_size) & (hs >= min_size))[0]
158 | return keep
159 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/proposal_target_layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | #import caffe
9 | import yaml
10 | import numpy as np
11 | import numpy.random as npr
12 | from fast_rcnn.config import cfg
13 | from fast_rcnn.bbox_transform import bbox_transform
14 | from utils.cython_bbox import bbox_overlaps
15 |
16 | DEBUG = False
17 |
18 | """
19 | Assign object detection proposals to ground-truth targets. Produces proposal
20 | classification labels and bounding-box regression targets.
21 | """
22 |
23 | _num_classes = 21
24 |
25 |
26 | def forward_proposal_target_op(bottom_0, bottom_1):
27 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
28 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
29 | all_rois = bottom_0
30 | # GT boxes (x1, y1, x2, y2, label)
31 | # TODO(rbg): it's annoying that sometimes I have extra info before
32 | # and other times after box coordinates -- normalize to one format
33 | gt_boxes = bottom_1
34 |
35 | # Include ground-truth boxes in the set of candidate rois
36 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
37 | all_rois = np.vstack(
38 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
39 | )
40 |
41 | # Sanity check: single batch only
42 | assert np.all(all_rois[:, 0] == 0), \
43 | 'Only single item batches are supported'
44 |
45 | num_images = 1
46 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
47 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
48 |
49 | # Sample rois with classification labels and bounding box regression
50 | # targets
51 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
52 | all_rois, gt_boxes, fg_rois_per_image,
53 | rois_per_image, _num_classes)
54 |
55 | if DEBUG:
56 | print 'num fg: {}'.format((labels > 0).sum())
57 | print 'num bg: {}'.format((labels == 0).sum())
58 | self._count += 1
59 | self._fg_num += (labels > 0).sum()
60 | self._bg_num += (labels == 0).sum()
61 | print 'num fg avg: {}'.format(self._fg_num / self._count)
62 | print 'num bg avg: {}'.format(self._bg_num / self._count)
63 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))
64 |
65 | bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
66 |
67 | return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
68 |
69 |
70 | def backward(top, propagate_down, bottom):
71 | """This layer does not propagate gradients."""
72 | pass
73 |
74 | def reshape(bottom, top):
75 | """Reshaping happens during the call to forward."""
76 | pass
77 |
78 |
79 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
80 | """Bounding-box regression targets (bbox_target_data) are stored in a
81 | compact form N x (class, tx, ty, tw, th)
82 |
83 | This function expands those targets into the 4-of-4*K representation used
84 | by the network (i.e. only one class has non-zero targets).
85 |
86 | Returns:
87 | bbox_target (ndarray): N x 4K blob of regression targets
88 | bbox_inside_weights (ndarray): N x 4K blob of loss weights
89 | """
90 |
91 | clss = bbox_target_data[:, 0]
92 | bbox_targets = np.zeros((clss.size, 4 * 2), dtype=np.float32)
93 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
94 | inds = np.where(clss > 0)[0]
95 | train_agnostic = True
96 |
97 | if train_agnostic:
98 | for ind in inds:
99 | cls = clss[ind]
100 | start = 4 * (1 if cls > 0 else 0)
101 | end = start + 4
102 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
103 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
104 | else:
105 | for ind in inds:
106 | cls = clss[ind]
107 | start = 4 * cls
108 | end = start + 4
109 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
110 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
111 |
112 | return bbox_targets, bbox_inside_weights
113 |
114 |
115 | def _compute_targets(ex_rois, gt_rois, labels):
116 | """Compute bounding-box regression targets for an image."""
117 |
118 | assert ex_rois.shape[0] == gt_rois.shape[0]
119 | assert ex_rois.shape[1] == 4
120 | assert gt_rois.shape[1] == 4
121 |
122 | targets = bbox_transform(ex_rois, gt_rois)
123 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
124 | # Optionally normalize targets by a precomputed mean and stdev
125 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
126 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
127 | return np.hstack(
128 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
129 |
130 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
131 | """Generate a random sample of RoIs comprising foreground and background
132 | examples.
133 | """
134 | # overlaps: (rois x gt_boxes)
135 | overlaps = bbox_overlaps(
136 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
137 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
138 | gt_assignment = overlaps.argmax(axis=1)
139 | max_overlaps = overlaps.max(axis=1)
140 | labels = gt_boxes[gt_assignment, 4]
141 |
142 | # Select foreground RoIs as those with >= FG_THRESH overlap
143 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
144 | # Guard against the case when an image has fewer than fg_rois_per_image
145 | # foreground RoIs
146 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
147 | # Sample foreground regions without replacement
148 | if fg_inds.size > 0:
149 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
150 |
151 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
152 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
153 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
154 | # Compute number of background RoIs to take from this image (guarding
155 | # against there being fewer than desired)
156 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
157 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
158 | # Sample background regions without replacement
159 | if bg_inds.size > 0:
160 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
161 |
162 | # The indices that we're selecting (both fg and bg)
163 | keep_inds = np.append(fg_inds, bg_inds)
164 | # Select sampled values from various arrays:
165 | labels = labels[keep_inds]
166 | # Clamp labels for the background RoIs to 0
167 | labels[fg_rois_per_this_image:] = 0
168 | rois = all_rois[keep_inds]
169 |
170 | bbox_target_data = _compute_targets(
171 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
172 |
173 | bbox_targets, bbox_inside_weights = \
174 | _get_bbox_regression_labels(bbox_target_data, num_classes)
175 |
176 | return labels, rois, bbox_targets, bbox_inside_weights
177 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/proposal_target_layer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/proposal_target_layer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/proposal_target_layer.py~:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | import caffe
9 | import yaml
10 | import numpy as np
11 | import numpy.random as npr
12 | from fast_rcnn.config import cfg
13 | from fast_rcnn.bbox_transform import bbox_transform
14 | from utils.cython_bbox import bbox_overlaps
15 |
16 | DEBUG = False
17 |
18 | """
19 | Assign object detection proposals to ground-truth targets. Produces proposal
20 | classification labels and bounding-box regression targets.
21 | """
22 |
23 | _num_classes = 21
24 |
25 |
26 | def forward_proposal_target_op(bottom_0, bottom_1):
27 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
28 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
29 | all_rois = bottom_0
30 | # GT boxes (x1, y1, x2, y2, label)
31 | # TODO(rbg): it's annoying that sometimes I have extra info before
32 | # and other times after box coordinates -- normalize to one format
33 | gt_boxes = bottom_1
34 |
35 | # Include ground-truth boxes in the set of candidate rois
36 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
37 | all_rois = np.vstack(
38 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
39 | )
40 |
41 | # Sanity check: single batch only
42 | assert np.all(all_rois[:, 0] == 0), \
43 | 'Only single item batches are supported'
44 |
45 | num_images = 1
46 | rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
47 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
48 |
49 | # Sample rois with classification labels and bounding box regression
50 | # targets
51 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
52 | all_rois, gt_boxes, fg_rois_per_image,
53 | rois_per_image, _num_classes)
54 |
55 | if DEBUG:
56 | print 'num fg: {}'.format((labels > 0).sum())
57 | print 'num bg: {}'.format((labels == 0).sum())
58 | self._count += 1
59 | self._fg_num += (labels > 0).sum()
60 | self._bg_num += (labels == 0).sum()
61 | print 'num fg avg: {}'.format(self._fg_num / self._count)
62 | print 'num bg avg: {}'.format(self._bg_num / self._count)
63 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))
64 |
65 | bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
66 |
67 | return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
68 |
69 |
70 | def backward(top, propagate_down, bottom):
71 | """This layer does not propagate gradients."""
72 | pass
73 |
74 | def reshape(bottom, top):
75 | """Reshaping happens during the call to forward."""
76 | pass
77 |
78 |
79 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
80 | """Bounding-box regression targets (bbox_target_data) are stored in a
81 | compact form N x (class, tx, ty, tw, th)
82 |
83 | This function expands those targets into the 4-of-4*K representation used
84 | by the network (i.e. only one class has non-zero targets).
85 |
86 | Returns:
87 | bbox_target (ndarray): N x 4K blob of regression targets
88 | bbox_inside_weights (ndarray): N x 4K blob of loss weights
89 | """
90 |
91 | clss = bbox_target_data[:, 0]
92 | bbox_targets = np.zeros((clss.size, 4 * 2), dtype=np.float32)
93 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
94 | inds = np.where(clss > 0)[0]
95 | train_agnostic = True
96 |
97 | if train_agnostic:
98 | for ind in inds:
99 | cls = clss[ind]
100 | start = 4 * (1 if cls > 0 else 0)
101 | end = start + 4
102 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
103 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
104 | else:
105 | for ind in inds:
106 | cls = clss[ind]
107 | start = 4 * cls
108 | end = start + 4
109 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
110 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
111 |
112 | return bbox_targets, bbox_inside_weights
113 |
114 |
115 | def _compute_targets(ex_rois, gt_rois, labels):
116 | """Compute bounding-box regression targets for an image."""
117 |
118 | assert ex_rois.shape[0] == gt_rois.shape[0]
119 | assert ex_rois.shape[1] == 4
120 | assert gt_rois.shape[1] == 4
121 |
122 | targets = bbox_transform(ex_rois, gt_rois)
123 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
124 | # Optionally normalize targets by a precomputed mean and stdev
125 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
126 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
127 | return np.hstack(
128 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
129 |
130 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
131 | """Generate a random sample of RoIs comprising foreground and background
132 | examples.
133 | """
134 | # overlaps: (rois x gt_boxes)
135 | overlaps = bbox_overlaps(
136 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
137 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
138 | gt_assignment = overlaps.argmax(axis=1)
139 | max_overlaps = overlaps.max(axis=1)
140 | labels = gt_boxes[gt_assignment, 4]
141 |
142 | # Select foreground RoIs as those with >= FG_THRESH overlap
143 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
144 | # Guard against the case when an image has fewer than fg_rois_per_image
145 | # foreground RoIs
146 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
147 | # Sample foreground regions without replacement
148 | if fg_inds.size > 0:
149 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
150 |
151 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
152 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
153 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
154 | # Compute number of background RoIs to take from this image (guarding
155 | # against there being fewer than desired)
156 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
157 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
158 | # Sample background regions without replacement
159 | if bg_inds.size > 0:
160 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
161 |
162 | # The indices that we're selecting (both fg and bg)
163 | keep_inds = np.append(fg_inds, bg_inds)
164 | # Select sampled values from various arrays:
165 | labels = labels[keep_inds]
166 | # Clamp labels for the background RoIs to 0
167 | labels[fg_rois_per_this_image:] = 0
168 | rois = all_rois[keep_inds]
169 |
170 | bbox_target_data = _compute_targets(
171 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
172 |
173 | bbox_targets, bbox_inside_weights = \
174 | _get_bbox_regression_labels(bbox_target_data, num_classes)
175 |
176 | return labels, rois, bbox_targets, bbox_inside_weights
177 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import random
12 | import cv2
13 | from utils.cython_bbox import bbox_overlaps
14 | from mnc_config import cfg
15 |
16 |
17 | def im_list_to_blob(ims):
18 | """
19 | Convert a list of images into a network input.
20 | Assumes images are already prepared (means subtracted, BGR order, ...).
21 | """
22 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 | num_images = len(ims)
24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 | dtype=np.float32)
26 | for i in xrange(num_images):
27 | im = ims[i]
28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | # Move channels (axis 3) to axis 1
30 | # Axis order will become: (batch elem, channel, height, width)
31 | channel_swap = (0, 3, 1, 2)
32 | blob = blob.transpose(channel_swap)
33 | return blob
34 |
35 |
36 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
37 | """Mean subtract and scale an image for use in a blob."""
38 | im = im.astype(np.float32, copy=False)
39 | im -= pixel_means
40 | im_shape = im.shape
41 | im_size_min = np.min(im_shape[0:2])
42 | im_size_max = np.max(im_shape[0:2])
43 | im_scale = float(target_size) / float(im_size_min)
44 | # Prevent the biggest axis from being more than MAX_SIZE
45 | if np.round(im_scale * im_size_max) > max_size:
46 | im_scale = float(max_size) / float(im_size_max)
47 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
48 | interpolation=cv2.INTER_LINEAR)
49 |
50 | return im, im_scale
51 |
52 |
53 | def prep_im_for_blob_cfm(im, input_scales):
54 | """Converts an image into a network input.
55 | Arguments:
56 | im (ndarray): a color image in BGR order
57 | Returns:
58 | blob (ndarray): a data blob holding an image pyramid
59 | im_scale_factors (list): list of image scales (relative to im) used
60 | in the image pyramid
61 | """
62 | im_orig = im.astype(np.float32, copy=True)
63 | im_orig -= cfg.PIXEL_MEANS
64 |
65 | im_shape = im_orig.shape
66 | im_size_min = np.min(im_shape[0:2])
67 | im_size_max = np.max(im_shape[0:2])
68 |
69 | processed_ims = []
70 | im_scale_factors = []
71 |
72 | for target_size in input_scales:
73 | im_scale = float(target_size) / float(im_size_min)
74 | # Prevent the biggest axis from being more than MAX_SIZE
75 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
76 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
77 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
78 | interpolation=cv2.INTER_LINEAR)
79 | im_scale_factors.append(im_scale)
80 | processed_ims.append(im)
81 |
82 | # Create a blob to hold the input images
83 | blob = im_list_to_blob(processed_ims)
84 |
85 | return blob, np.array(im_scale_factors)
86 |
87 |
88 | def pred_rois_for_blob(im_rois, im_scales):
89 | """
90 | Convert rois to network input
91 | support multi-scale testing
92 | """
93 | im_rois = im_rois.astype(np.float, copy=False)
94 | if len(im_scales) > 1:
95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1
96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1
97 |
98 | areas = widths * heights
99 | scaled_areas = areas[:, np.newaxis] * (im_scales[np.newaxis, :] ** 2)
100 | diff_areas = np.abs(scaled_areas - 224 * 224)
101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis]
102 | else:
103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
104 | im_rois = im_rois * im_scales[levels]
105 | rois_blob = np.hstack((levels.astype(np.float), im_rois))
106 | return rois_blob
107 |
108 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/blob.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/blob.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/cython_bbox.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/cython_bbox.so
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/mnc_config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | def tic(self):
21 | # using time.time instead of time.clock because time time.clock
22 | # does not normalize for multithreading
23 | self.start_time = time.time()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.time() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 | self.average_time = self.total_time / self.calls
30 | if average:
31 | return self.average_time
32 | else:
33 | return self.diff
34 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/timer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/unmap.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 |
11 | def unmap(data, count, inds, fill=0):
12 | """ Unmap a subset of item (data) back to the original set of items (of
13 | size count) """
14 | if len(data.shape) == 1:
15 | ret = np.empty((count, ), dtype=np.float32)
16 | ret.fill(fill)
17 | ret[inds] = data
18 | else:
19 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
20 | ret.fill(fill)
21 | ret[inds, :] = data
22 | return ret
23 |
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/unmap.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_dynamic/rpn_tools/utils/unmap.pyc
--------------------------------------------------------------------------------
/tf_rfcn_dynamic/rpn_tools/utils/vis_seg.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Written by Haozhi Qi
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import cPickle
10 | import os
11 | import cv2
12 | import Image
13 | from mnc_config import cfg
14 |
15 |
16 | def vis_seg(img_names, cls_names, output_dir, gt_dir):
17 | """
18 | This function plot segmentation results to specific directory
19 | Args:
20 | img_names: list
21 | """
22 | assert os.path.exists(output_dir)
23 | # a list of dictionary
24 | inst_dir = os.path.join(output_dir, 'SegInst')
25 | cls_dir = os.path.join(output_dir, 'SegCls')
26 | res_dir = os.path.join(output_dir, 'SegRes')
27 | if not os.path.isdir(inst_dir):
28 | os.mkdir(inst_dir)
29 | if not os.path.isdir(cls_dir):
30 | os.mkdir(cls_dir)
31 | if not os.path.isdir(res_dir):
32 | os.mkdir(res_dir)
33 |
34 | res_list = _prepare_dict(img_names, cls_names, output_dir)
35 | for img_ind, image_name in enumerate(img_names):
36 | target_inst_file = os.path.join(inst_dir, image_name + '.jpg')
37 | target_cls_file = os.path.join(cls_dir, image_name + '.jpg')
38 | print image_name
39 | gt_image = gt_dir + '/img/' + image_name + '.jpg'
40 | img_data = cv2.imread(gt_image)
41 | img_width = img_data.shape[1]
42 | img_height = img_data.shape[0]
43 | pred_dict = res_list[img_ind]
44 | inst_img, cls_img = _convert_pred_to_image(img_width, img_height, pred_dict)
45 | color_map = _get_voc_color_map()
46 | inst_out_img = np.zeros((img_height, img_width, 3))
47 | cls_out_img = np.zeros((img_height, img_width, 3))
48 | for i in xrange(img_height):
49 | for j in xrange(img_width):
50 | inst_out_img[i][j] = color_map[inst_img[i][j]][::-1]
51 | cls_out_img[i][j] = color_map[cls_img[i][j]][::-1]
52 |
53 | cv2.imwrite(target_inst_file, inst_out_img)
54 | cv2.imwrite(target_cls_file, cls_out_img)
55 | background = Image.open(gt_image)
56 | mask = Image.open(target_cls_file)
57 | background = background.convert('RGBA')
58 | mask = mask.convert('RGBA')
59 | superimpose_image = Image.blend(background, mask, 0.8)
60 | name = os.path.join(res_dir, image_name + '.png')
61 | superimpose_image.save(name, 'PNG')
62 |
63 |
64 | def _prepare_dict(img_names, cls_names, cache_dir, vis_thresh=0.5):
65 | """
66 | Returns:
67 | list, each list is a dictionary contains mask list, box list
68 | """
69 | res_list = []
70 | det_file = os.path.join(cache_dir, 'res_boxes.pkl')
71 | with open(det_file, 'rb') as f:
72 | det_pkl = cPickle.load(f)
73 | seg_file = os.path.join(cache_dir, 'res_masks.pkl')
74 | with open(seg_file, 'rb') as f:
75 | seg_pkl = cPickle.load(f)
76 |
77 | for img_ind, image_name in enumerate(img_names):
78 | box_for_img = []
79 | mask_for_img = []
80 | cls_for_img = []
81 | for cls_ind, cls_name in enumerate(cls_names):
82 | if cls_name == '__background__' or len(det_pkl[cls_ind][img_ind]) == 0:
83 | continue
84 | det_for_img = det_pkl[cls_ind][img_ind]
85 | seg_for_img = seg_pkl[cls_ind][img_ind]
86 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0]
87 | for keep in keep_inds:
88 | box_for_img.append(det_for_img[keep])
89 | # TODO: remove this annoying 0
90 | mask_for_img.append(seg_for_img[keep][0])
91 | cls_for_img.append(cls_ind)
92 | res_dict = {'image_name': image_name,
93 | 'cls_name': cls_for_img,
94 | 'boxes': box_for_img,
95 | 'masks': mask_for_img}
96 | res_list.append(res_dict)
97 |
98 | return res_list
99 |
100 |
101 | def _convert_pred_to_image(img_width, img_height, pred_dict):
102 | num_inst = len(pred_dict['boxes'])
103 | inst_img = np.zeros((img_height, img_width))
104 | cls_img = np.zeros((img_height, img_width))
105 | for i in xrange(num_inst):
106 | box = np.round(pred_dict['boxes'][i]).astype(int)
107 | mask = pred_dict['masks'][i]
108 | cls_num = pred_dict['cls_name'][i]
109 | # clip box into image space
110 | box[0] = min(max(box[0], 0), img_width - 1)
111 | box[1] = min(max(box[1], 0), img_height - 1)
112 | box[2] = min(max(box[2], 0), img_width - 1)
113 | box[3] = min(max(box[3], 0), img_height - 1)
114 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1))
115 | mask = mask >= cfg.BINARIZE_THRESH
116 |
117 | part1 = (i+1) * mask.astype(np.float32)
118 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1])
119 | part3 = np.multiply(np.logical_not(mask), cls_img[box[1]:box[3]+1, box[0]:box[2]+1])
120 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2
121 | cls_img[box[1]:box[3]+1, box[0]:box[2]+1] = cls_num * mask.astype(np.float32) + part3
122 | # Plot bounding boxes simultaneously
123 | cls_img[box[1]:box[3]+1, box[0]-1:box[0]+1] = 150
124 | cls_img[box[1]:box[3]+1, box[2]-1:box[2]+1] = 150
125 | cls_img[box[1]-1:box[1]+1, box[0]:box[2]+1] = 150
126 | cls_img[box[3]-1:box[3]+1, box[0]:box[2]+1] = 150
127 |
128 | inst_img = inst_img.astype(int)
129 | cls_img = cls_img.astype(int)
130 | return inst_img, cls_img
131 |
132 |
133 | def _get_voc_color_map(n=256):
134 | color_map = np.zeros((n, 3))
135 | for i in xrange(n):
136 | r = b = g = 0
137 | cid = i
138 | for j in xrange(0, 8):
139 | r = np.bitwise_or(r, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-1], 7-j))
140 | g = np.bitwise_or(g, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-2], 7-j))
141 | b = np.bitwise_or(b, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-3], 7-j))
142 | cid = np.right_shift(cid, 3)
143 |
144 | color_map[i][0] = r
145 | color_map[i][1] = g
146 | color_map[i][2] = b
147 | return color_map
148 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/cnn_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/cnn_tools/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_fixed/cnn_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/cnn_tools/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/cnn_tools/tools.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/cnn_tools/tools.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/psroi_pool_tools/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/psroi_pool_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/psroi_pool_tools/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os.path as osp
3 |
4 | filename = '/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/ps_roipool/psroi_pooling.so'
5 | _psroi_pooling_module = tf.load_op_library(filename)
6 | psroi_pool = _psroi_pooling_module.psroi_pool
7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad
8 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op_grad.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | import psroi_pooling_op
4 |
5 | @tf.RegisterShape("PSROIPool")
6 | def _psroi_pool_shape(op):
7 | """Shape function for the RoiPool op.
8 |
9 | """
10 | dims_data = op.inputs[0].get_shape().as_list()
11 | channels = dims_data[1]
12 |
13 | dims_rois = op.inputs[1].get_shape().as_list()
14 | num_rois = dims_rois[0]
15 |
16 | output_dim = op.get_attr('output_dim')
17 | group_size = op.get_attr('group_size')
18 | pooled_height = group_size
19 | pooled_width = group_size
20 |
21 | output_shape = tf.TensorShape([num_rois, output_dim, pooled_height, pooled_width])
22 | return [output_shape, output_shape]
23 |
24 | @ops.RegisterGradient("PSROIPool")
25 | def _psroi_pool_grad(op, grad, _):
26 | """The gradients for `roi_pool`.
27 | Args:
28 | op: The `roi_pool` `Operation` that we are differentiating, which we can use
29 | to find the inputs and outputs of the original op.
30 | grad: Gradient with respect to the output of the `roi_pool` op.
31 | Returns:
32 | Gradients with respect to the input of `zero_out`.
33 | """
34 | data = op.inputs[0]
35 | rois = op.inputs[1]
36 | mapping_channel = op.outputs[1]
37 | spatial_scale = op.get_attr('spatial_scale')
38 |
39 | # compute gradient
40 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
41 | data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, mapping_channel, grad, spatial_scale)
42 |
43 | return [data_grad, None] # List of one Tensor, since we have one input
44 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op_grad.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/psroi_pool_tools/psroi_pooling_op_grad.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/__init__.py~:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from tools import anchor_target_layer
4 |
5 | def weight_variable(shape):
6 | initial = tf.truncated_normal(shape, stddev=0.1)
7 | return tf.Variable(initial)
8 |
9 | def bias_variable(shape):
10 | initial = tf.constant(0.1, shape=shape)
11 | return tf.Variable(initial)
12 |
13 | def conv2d(x, W):
14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
15 |
16 | def max_pool_2x2(x):
17 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
18 |
19 | def process_anno(x):
20 | x = np.fromstring(x, dtype=int, sep=" ")
21 | x = np.reshape(x,(-1,5))
22 | l = x[:,0]
23 | gt = x[:,1:]
24 | return l, gt
25 |
26 | '''
27 | num_labels = 2
28 | batch_size = 10
29 | filename_queue = tf.train.string_input_producer(["train_files.csv"], num_epochs=None, shuffle=False)
30 | reader = tf.TextLineReader()
31 | key, value = reader.read(filename_queue)
32 | record_defaults = [[""],[0]]
33 | image_path, label = tf.decode_csv(value, field_delim=",", record_defaults=record_defaults)
34 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3)
35 | my_img = tf.cast(my_img,tf.float32) / 255
36 | my_img = tf.image.resize_images(my_img,224,224)
37 | min_after_dequeue = 5
38 | capacity = min_after_dequeue + 3 * batch_size
39 | im_batch, lb_batch = tf.train.batch([my_img,label],batch_size=batch_size,capacity=capacity)
40 | '''
41 |
42 | num_labels = 25
43 | batch_size = 1
44 | reader = tf.TextLineReader()
45 | filename_queue = tf.train.string_input_producer(["train_rcnn_files.csv"], num_epochs=None, shuffle=False)
46 | key, value = reader.read(filename_queue)
47 | image_path, anno_path = tf.decode_csv(value, record_defaults=[[""],[""]], field_delim=",")
48 | my_img = tf.image.decode_png(tf.read_file(image_path), channels=3)
49 | my_img = tf.cast(my_img,tf.float32) / 255
50 | my_img = tf.image.resize_images(my_img,224,224)
51 | anno = tf.read_file(anno_path)
52 | labels, gt_box = tf.py_func(process_anno,[anno],[tf.int64,tf.int64])
53 | labels = tf.reshape(tf.concat(1, labels), [-1,1])
54 | gt_box = tf.reshape(tf.concat(1, gt_box), [-1,4])
55 |
56 |
57 | '''
58 | x, y1_ = im_batch, lb_batch
59 | l_b = tf.to_int64(y1_)
60 | l = tf.one_hot(indices=l_b,depth=num_labels,on_value=1.0,off_value=0.0,axis=-1)
61 | l = tf.cast(l,tf.float32)
62 |
63 | W_conv1 = weight_variable([3,3,3,64])
64 | b_conv1 = bias_variable([64])
65 | h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
66 |
67 | W_conv2 = weight_variable([3,3,64,64])
68 | b_conv2 = bias_variable([64])
69 | h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2) + b_conv2)
70 |
71 | h_max1 = max_pool_2x2(h_conv2)
72 |
73 | W_conv3 = weight_variable([3,3,64,128])
74 | b_conv3 = bias_variable([128])
75 | h_conv3 = tf.nn.relu(conv2d(h_max1, W_conv3) + b_conv3)
76 |
77 | W_conv4 = weight_variable([3,3,128,128])
78 | b_conv4 = bias_variable([128])
79 | h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)
80 |
81 | h_max2 = max_pool_2x2(h_conv4)
82 |
83 | W_conv5 = weight_variable([3,3,128,256])
84 | b_conv5 = bias_variable([256])
85 | h_conv5 = tf.nn.relu(conv2d(h_max2, W_conv5) + b_conv5)
86 |
87 | W_conv6 = weight_variable([3,3,256,256])
88 | b_conv6 = bias_variable([256])
89 | h_conv6 = tf.nn.relu(conv2d(h_conv5, W_conv6) + b_conv6)
90 |
91 | W_conv7 = weight_variable([3,3,256,256])
92 | b_conv7 = bias_variable([256])
93 | h_conv7 = tf.nn.relu(conv2d(h_conv6, W_conv7) + b_conv7)
94 |
95 | h_max3 = max_pool_2x2(h_conv7)
96 |
97 | W_conv7 = weight_variable([3,3,256,512])
98 | b_conv7 = bias_variable([512])
99 | h_conv7 = tf.nn.relu(conv2d(h_max3, W_conv7) + b_conv7)
100 |
101 | W_conv8 = weight_variable([3,3,512,512])
102 | b_conv8 = bias_variable([512])
103 | h_conv8 = tf.nn.relu(conv2d(h_conv7, W_conv8) + b_conv8)
104 |
105 | W_conv9 = weight_variable([3,3,512,512])
106 | b_conv9 = bias_variable([512])
107 | h_conv9 = tf.nn.relu(conv2d(h_conv8, W_conv9) + b_conv9)
108 |
109 | h_max4 = max_pool_2x2(h_conv9)
110 |
111 | W_conv10 = weight_variable([3,3,512,512])
112 | b_conv10 = bias_variable([512])
113 | h_conv10 = tf.nn.relu(conv2d(h_max4, W_conv10) + b_conv10)
114 |
115 | W_conv11 = weight_variable([3,3,512,512])
116 | b_conv11 = bias_variable([512])
117 | h_conv11 = tf.nn.relu(conv2d(h_conv10, W_conv11) + b_conv11)
118 |
119 | W_conv12 = weight_variable([3,3,512,512])
120 | b_conv12 = bias_variable([512])
121 | h_conv12 = tf.nn.relu(conv2d(h_conv11, W_conv12) + b_conv12)
122 |
123 | #RPN
124 |
125 | W_rpn3 = weight_variable([3,3,512,512])
126 | b_rpn3 = bias_variable([512])
127 | h_rpn3 = tf.nn.relu(conv2d(h_conv12, W_rpn3) + b_rpn3)
128 |
129 | W_cls_score = weight_variable([1,1,512,18])
130 | b_cls_score = bias_variable([18])
131 | h_cls_score = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score)
132 |
133 | W_bbox_pred = weight_variable([1,1,512,36])
134 | b_bbox_pred = bias_variable([36])
135 | h_bbox_pred = tf.nn.relu(conv2d(h_rpn3, W_cls_score) + b_cls_score)
136 |
137 | h_cls_score_reshape = tf.reshape(h_cls_score, [2,-1])
138 |
139 |
140 |
141 | #print h_cls_score
142 |
143 | h_fc1 = tf.reshape(h_cls_score_reshape, [-1, 14*14*18])
144 | W_fc1 = weight_variable([14*14*18,2])
145 | b_fc1 = bias_variable([2])
146 | y_conv = tf.matmul(h_fc1, W_fc1) + b_fc1
147 |
148 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y1_)
149 | loss = tf.reduce_mean(cross_entropy)
150 | train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
151 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(l,1))
152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
153 |
154 |
155 | '''
156 | init = tf.initialize_all_variables()
157 | with tf.Session() as sess:
158 | sess.run(init)
159 | coord = tf.train.Coordinator()
160 | threads = tf.train.start_queue_runners(sess=sess,coord=coord)
161 |
162 | for i in range(10000000):
163 | print labels.eval()
164 | #sess.run(train_step)
165 | #if i%10 == 0:
166 | #print "Iteration " + str(i)
167 | #print "Loss: " + str(loss.eval())
168 | #print "Accuracy: " + str(accuracy.eval())
169 | #print ""
170 |
171 | coord.request_stop()
172 | coord.join(threads)
173 | sess.close()
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/anchor_target_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/anchor_target_layer_modified.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def bbox_transform(ex_rois, gt_rois):
11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 |
16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 |
21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 | targets_dw = np.log(gt_widths / ex_widths)
24 | targets_dh = np.log(gt_heights / ex_heights)
25 |
26 | targets = np.vstack(
27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 | return targets
29 |
30 | def bbox_transform_inv(boxes, deltas):
31 | if boxes.shape[0] == 0:
32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 |
34 | boxes = boxes.astype(deltas.dtype, copy=False)
35 |
36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 | ctr_x = boxes[:, 0] + 0.5 * widths
39 | ctr_y = boxes[:, 1] + 0.5 * heights
40 |
41 | dx = deltas[:, 0::4]
42 | dy = deltas[:, 1::4]
43 | dw = deltas[:, 2::4]
44 | dh = deltas[:, 3::4]
45 |
46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 | pred_w = np.exp(dw) * widths[:, np.newaxis]
49 | pred_h = np.exp(dh) * heights[:, np.newaxis]
50 |
51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 | # x1
53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 | # y1
55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 | # x2
57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 | # y2
59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 |
61 | return pred_boxes
62 |
63 | def clip_boxes(boxes, im_shape):
64 | """
65 | Clip boxes to image boundaries.
66 | """
67 |
68 | # x1 >= 0
69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 | # y1 >= 0
71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 | # x2 < im_shape[1]
73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 | # y2 < im_shape[0]
75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 | return boxes
77 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/bbox_transform.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/bbox_transform.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/cpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/cpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_mv.hpp:
--------------------------------------------------------------------------------
1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num,
2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num,
3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num,
4 | float* finalize_output_mask, int* finalize_output_box, const int device_id);
5 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_mv.pyx:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | assert sizeof(int) == sizeof(np.int32_t)
6 |
7 | cdef extern from "gpu_mv.hpp":
8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id);
9 |
10 | # boxes: n * 4
11 | # masks: n * 1 * 21 * 21
12 | # scores: n * 21
13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes,
14 | np.ndarray[np.float32_t, ndim=4] all_masks,
15 | np.ndarray[np.int32_t, ndim=1] candidate_inds,
16 | np.ndarray[np.int32_t, ndim=1] candidate_start,
17 | np.ndarray[np.float32_t, ndim=1] candidate_weights,
18 | np.int32_t image_height,
19 | np.int32_t image_width,
20 | np.int32_t device_id = 0):
21 | cdef int all_box_num = all_boxes.shape[0]
22 | cdef int boxes_dim = all_boxes.shape[1]
23 | cdef int mask_size = all_masks.shape[3]
24 | cdef int candidate_num = candidate_inds.shape[0]
25 | cdef int result_num = candidate_start.shape[0]
26 | cdef np.ndarray[np.float32_t, ndim=4] \
27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32)
28 | cdef np.ndarray[np.int32_t, ndim=2] \
29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32)
30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id)
31 | return result_mask, result_box
32 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/gpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mnc_config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mv.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/mv.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------
2 | // Multitask Network Cascade
3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
4 | // Copyright (c) 2016, Haozhi Qi
5 | // Licensed under The MIT License [see LICENSE for details]
6 | // --------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | from mnc_config import cfg
9 | from gpu_nms import gpu_nms
10 | from cpu_nms import cpu_nms
11 |
12 |
13 | def nms(dets, thresh):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 |
24 | def apply_nms(all_boxes, thresh):
25 | """Apply non-maximum suppression to all predicted boxes output by the
26 | test_net method.
27 | """
28 | num_classes = len(all_boxes)
29 | num_images = len(all_boxes[0])
30 | nms_boxes = [[[] for _ in xrange(num_images)]
31 | for _ in xrange(num_classes)]
32 | for cls_ind in xrange(num_classes):
33 | for im_ind in xrange(num_images):
34 | dets = all_boxes[cls_ind][im_ind]
35 | if dets == []:
36 | continue
37 | keep = nms(dets, thresh)
38 | if len(keep) == 0:
39 | continue
40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
41 | return nms_boxes
42 |
43 |
44 | def apply_nms_mask(all_boxes, all_masks, thresh):
45 | num_classes = len(all_boxes)
46 | num_images = len(all_boxes[0])
47 | nms_boxes = [[[] for _ in xrange(num_images)]
48 | for _ in xrange(num_classes)]
49 | nms_masks = [[[] for _ in xrange(num_images)]
50 | for _ in xrange(num_classes)]
51 | for cls_ind in xrange(num_classes):
52 | for im_ind in xrange(num_images):
53 | dets = all_boxes[cls_ind][im_ind]
54 | masks = all_masks[cls_ind][im_ind]
55 | if dets == []:
56 | continue
57 | keep = nms(dets, thresh)
58 | if len(keep) == 0:
59 | continue
60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy()
62 | return nms_boxes, nms_masks
63 |
64 |
65 | def apply_nms_mask_single(box, mask, thresh):
66 | if box == []:
67 | return box, mask
68 | keep = nms(box, thresh)
69 | if len(keep) == 0:
70 | return box, mask
71 | return box[keep, :].copy(), mask[keep, :].copy()
72 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/nms_wrapper.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from config import cfg
9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 |
12 | def nms(dets, thresh, force_cpu=False):
13 | """Dispatch to either CPU or GPU NMS implementations."""
14 |
15 | if dets.shape[0] == 0:
16 | return []
17 | if cfg.USE_GPU_NMS and not force_cpu:
18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | else:
20 | return cpu_nms(dets, thresh)
21 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/nms_wrapper.py~:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import cfg
9 | from nms.gpu_nms import gpu_nms
10 | from nms.cpu_nms import cpu_nms
11 |
12 | def nms(dets, thresh, force_cpu=False):
13 | """Dispatch to either CPU or GPU NMS implementations."""
14 |
15 | if dets.shape[0] == 0:
16 | return []
17 | if cfg.USE_GPU_NMS and not force_cpu:
18 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 | else:
20 | return cpu_nms(dets, thresh)
21 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/test.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/test.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/train.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Train a Fast R-CNN network."""
9 |
10 | import caffe
11 | from fast_rcnn.config import cfg
12 | import roi_data_layer.roidb as rdl_roidb
13 | from utils.timer import Timer
14 | import numpy as np
15 | import os
16 |
17 | from caffe.proto import caffe_pb2
18 | import google.protobuf as pb2
19 |
20 | class SolverWrapper(object):
21 | """A simple wrapper around Caffe's solver.
22 | This wrapper gives us control over he snapshotting process, which we
23 | use to unnormalize the learned bounding-box regression weights.
24 | """
25 |
26 | def __init__(self, solver_prototxt, roidb, output_dir,
27 | pretrained_model=None):
28 | """Initialize the SolverWrapper."""
29 | self.output_dir = output_dir
30 |
31 | if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
32 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
33 | # RPN can only use precomputed normalization because there are no
34 | # fixed statistics to compute a priori
35 | assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED
36 |
37 | if cfg.TRAIN.BBOX_REG:
38 | print 'Computing bounding-box regression targets...'
39 | self.bbox_means, self.bbox_stds = \
40 | rdl_roidb.add_bbox_regression_targets(roidb)
41 | print 'done'
42 |
43 | self.solver = caffe.SGDSolver(solver_prototxt)
44 | if pretrained_model is not None:
45 | print ('Loading pretrained model '
46 | 'weights from {:s}').format(pretrained_model)
47 | self.solver.net.copy_from(pretrained_model)
48 |
49 | self.solver_param = caffe_pb2.SolverParameter()
50 | with open(solver_prototxt, 'rt') as f:
51 | pb2.text_format.Merge(f.read(), self.solver_param)
52 |
53 | self.solver.net.layers[0].set_roidb(roidb)
54 |
55 | def snapshot(self):
56 | """Take a snapshot of the network after unnormalizing the learned
57 | bounding-box regression weights. This enables easy use at test-time.
58 | """
59 | net = self.solver.net
60 |
61 | scale_bbox_params = (cfg.TRAIN.BBOX_REG and
62 | cfg.TRAIN.BBOX_NORMALIZE_TARGETS and
63 | net.params.has_key('bbox_pred'))
64 |
65 | if scale_bbox_params:
66 | # save original values
67 | orig_0 = net.params['bbox_pred'][0].data.copy()
68 | orig_1 = net.params['bbox_pred'][1].data.copy()
69 |
70 | # scale and shift with bbox reg unnormalization; then save snapshot
71 | net.params['bbox_pred'][0].data[...] = \
72 | (net.params['bbox_pred'][0].data *
73 | self.bbox_stds[:, np.newaxis])
74 | net.params['bbox_pred'][1].data[...] = \
75 | (net.params['bbox_pred'][1].data *
76 | self.bbox_stds + self.bbox_means)
77 |
78 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
79 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
80 | filename = (self.solver_param.snapshot_prefix + infix +
81 | '_iter_{:d}'.format(self.solver.iter) + '.caffemodel')
82 | filename = os.path.join(self.output_dir, filename)
83 |
84 | net.save(str(filename))
85 | print 'Wrote snapshot to: {:s}'.format(filename)
86 |
87 | if scale_bbox_params:
88 | # restore net to original state
89 | net.params['bbox_pred'][0].data[...] = orig_0
90 | net.params['bbox_pred'][1].data[...] = orig_1
91 | return filename
92 |
93 | def train_model(self, max_iters):
94 | """Network training loop."""
95 | last_snapshot_iter = -1
96 | timer = Timer()
97 | model_paths = []
98 | while self.solver.iter < max_iters:
99 | # Make one SGD update
100 | timer.tic()
101 | self.solver.step(1)
102 | timer.toc()
103 | if self.solver.iter % (10 * self.solver_param.display) == 0:
104 | print 'speed: {:.3f}s / iter'.format(timer.average_time)
105 |
106 | if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
107 | last_snapshot_iter = self.solver.iter
108 | model_paths.append(self.snapshot())
109 |
110 | if last_snapshot_iter != self.solver.iter:
111 | model_paths.append(self.snapshot())
112 | return model_paths
113 |
114 | def get_training_roidb(imdb):
115 | """Returns a roidb (Region of Interest database) for use in training."""
116 | if cfg.TRAIN.USE_FLIPPED:
117 | print 'Appending horizontally-flipped training examples...'
118 | imdb.append_flipped_images()
119 | print 'done'
120 |
121 | print 'Preparing training data...'
122 | rdl_roidb.prepare_roidb(imdb)
123 | print 'done'
124 |
125 | return imdb.roidb
126 |
127 | def filter_roidb(roidb):
128 | """Remove roidb entries that have no usable RoIs."""
129 |
130 | def is_valid(entry):
131 | # Valid images have:
132 | # (1) At least one foreground RoI OR
133 | # (2) At least one background RoI
134 | overlaps = entry['max_overlaps']
135 | # find boxes with sufficient overlap
136 | fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
137 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
138 | bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
139 | (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
140 | # image is only valid if such boxes exist
141 | valid = len(fg_inds) > 0 or len(bg_inds) > 0
142 | return valid
143 |
144 | num = len(roidb)
145 | filtered_roidb = [entry for entry in roidb if is_valid(entry)]
146 | num_after = len(filtered_roidb)
147 | print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after,
148 | num, num_after)
149 | return filtered_roidb
150 |
151 | def train_net(solver_prototxt, roidb, output_dir,
152 | pretrained_model=None, max_iters=40000):
153 | """Train a Fast R-CNN network."""
154 |
155 | roidb = filter_roidb(roidb)
156 | sw = SolverWrapper(solver_prototxt, roidb, output_dir,
157 | pretrained_model=pretrained_model)
158 |
159 | print 'Solving...'
160 | model_paths = sw.train_model(max_iters)
161 | print 'done solving'
162 | return model_paths
163 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/fast_rcnn/train.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/fast_rcnn/train.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/generate_anchors.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
11 | #
12 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
13 | # >> anchors
14 | #
15 | # anchors =
16 | #
17 | # -83 -39 100 56
18 | # -175 -87 192 104
19 | # -359 -183 376 200
20 | # -55 -55 72 72
21 | # -119 -119 136 136
22 | # -247 -247 264 264
23 | # -35 -79 52 96
24 | # -79 -167 96 184
25 | # -167 -343 184 360
26 |
27 | #array([[ -83., -39., 100., 56.],
28 | # [-175., -87., 192., 104.],
29 | # [-359., -183., 376., 200.],
30 | # [ -55., -55., 72., 72.],
31 | # [-119., -119., 136., 136.],
32 | # [-247., -247., 264., 264.],
33 | # [ -35., -79., 52., 96.],
34 | # [ -79., -167., 96., 184.],
35 | # [-167., -343., 184., 360.]])
36 |
37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
38 | scales=2**np.arange(3, 6)):
39 | """
40 | Generate anchor (reference) windows by enumerating aspect ratios X
41 | scales wrt a reference (0, 0, 15, 15) window.
42 | """
43 |
44 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
45 | ratio_anchors = _ratio_enum(base_anchor, ratios)
46 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
47 | for i in xrange(ratio_anchors.shape[0])])
48 | return anchors
49 |
50 | def _whctrs(anchor):
51 | """
52 | Return width, height, x center, and y center for an anchor (window).
53 | """
54 |
55 | w = anchor[2] - anchor[0] + 1
56 | h = anchor[3] - anchor[1] + 1
57 | x_ctr = anchor[0] + 0.5 * (w - 1)
58 | y_ctr = anchor[1] + 0.5 * (h - 1)
59 | return w, h, x_ctr, y_ctr
60 |
61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
62 | """
63 | Given a vector of widths (ws) and heights (hs) around a center
64 | (x_ctr, y_ctr), output a set of anchors (windows).
65 | """
66 |
67 | ws = ws[:, np.newaxis]
68 | hs = hs[:, np.newaxis]
69 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
70 | y_ctr - 0.5 * (hs - 1),
71 | x_ctr + 0.5 * (ws - 1),
72 | y_ctr + 0.5 * (hs - 1)))
73 | return anchors
74 |
75 | def _ratio_enum(anchor, ratios):
76 | """
77 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
78 | """
79 |
80 | w, h, x_ctr, y_ctr = _whctrs(anchor)
81 | size = w * h
82 | size_ratios = size / ratios
83 | ws = np.round(np.sqrt(size_ratios))
84 | hs = np.round(ws * ratios)
85 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
86 | return anchors
87 |
88 | def _scale_enum(anchor, scales):
89 | """
90 | Enumerate a set of anchors for each scale wrt an anchor.
91 | """
92 |
93 | w, h, x_ctr, y_ctr = _whctrs(anchor)
94 | ws = w * scales
95 | hs = h * scales
96 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
97 | return anchors
98 |
99 | if __name__ == '__main__':
100 | import time
101 | t = time.time()
102 | a = generate_anchors()
103 | print time.time() - t
104 | print a
105 | from IPython import embed; embed()
106 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/generate_anchors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/generate_anchors.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/my_anchor_target_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/my_anchor_target_layer_modified.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/cpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/cpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/gpu_mv.hpp:
--------------------------------------------------------------------------------
1 | void _mv(const float* all_boxes, const float* all_masks, const int all_boxes_num,
2 | const int* candidate_inds, const int* candidate_start, const float* candidate_weights, const int candidate_num,
3 | const int image_height, const int image_width, const int box_dim, const int mask_size, const int result_num,
4 | float* finalize_output_mask, int* finalize_output_box, const int device_id);
5 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/gpu_mv.pyx:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | cimport numpy as np
4 |
5 | assert sizeof(int) == sizeof(np.int32_t)
6 |
7 | cdef extern from "gpu_mv.hpp":
8 | void _mv(np.float32_t* all_boxes, np.float32_t* all_masks, np.int32_t all_boxes_num, np.int32_t* candidate_inds, np.int32_t* candidate_start, np.float32_t* candidate_weights, np.int32_t candidate_num, np.int32_t image_height, np.int32_t image_width, np.int32_t box_dim, np.int32_t mask_size, np.int32_t result_num, np.float32_t* result_mask, np.int32_t* result_box, np.int32_t device_id);
9 |
10 | # boxes: n * 4
11 | # masks: n * 1 * 21 * 21
12 | # scores: n * 21
13 | def mv(np.ndarray[np.float32_t, ndim=2] all_boxes,
14 | np.ndarray[np.float32_t, ndim=4] all_masks,
15 | np.ndarray[np.int32_t, ndim=1] candidate_inds,
16 | np.ndarray[np.int32_t, ndim=1] candidate_start,
17 | np.ndarray[np.float32_t, ndim=1] candidate_weights,
18 | np.int32_t image_height,
19 | np.int32_t image_width,
20 | np.int32_t device_id = 0):
21 | cdef int all_box_num = all_boxes.shape[0]
22 | cdef int boxes_dim = all_boxes.shape[1]
23 | cdef int mask_size = all_masks.shape[3]
24 | cdef int candidate_num = candidate_inds.shape[0]
25 | cdef int result_num = candidate_start.shape[0]
26 | cdef np.ndarray[np.float32_t, ndim=4] \
27 | result_mask = np.zeros((result_num, 1, all_masks.shape[2], all_masks.shape[3]), dtype=np.float32)
28 | cdef np.ndarray[np.int32_t, ndim=2] \
29 | result_box = np.zeros((result_num, boxes_dim), dtype=np.int32)
30 | _mv(&all_boxes[0, 0], &all_masks[0, 0, 0, 0], all_box_num, &candidate_inds[0], &candidate_start[0], &candidate_weights[0], candidate_num, image_height, image_width, boxes_dim, mask_size, candidate_start.shape[0], &result_mask[0,0,0,0], &result_box[0,0], device_id)
31 | return result_mask, result_box
32 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/gpu_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/gpu_nms.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/mnc_config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/mv.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/mv.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // --------------------------------------------------------
2 | // Multitask Network Cascade
3 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
4 | // Copyright (c) 2016, Haozhi Qi
5 | // Licensed under The MIT License [see LICENSE for details]
6 | // --------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | from mnc_config import cfg
9 | from gpu_nms import gpu_nms
10 | from cpu_nms import cpu_nms
11 |
12 |
13 | def nms(dets, thresh):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | if cfg.USE_GPU_NMS:
19 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
20 | else:
21 | return cpu_nms(dets, thresh)
22 |
23 |
24 | def apply_nms(all_boxes, thresh):
25 | """Apply non-maximum suppression to all predicted boxes output by the
26 | test_net method.
27 | """
28 | num_classes = len(all_boxes)
29 | num_images = len(all_boxes[0])
30 | nms_boxes = [[[] for _ in xrange(num_images)]
31 | for _ in xrange(num_classes)]
32 | for cls_ind in xrange(num_classes):
33 | for im_ind in xrange(num_images):
34 | dets = all_boxes[cls_ind][im_ind]
35 | if dets == []:
36 | continue
37 | keep = nms(dets, thresh)
38 | if len(keep) == 0:
39 | continue
40 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
41 | return nms_boxes
42 |
43 |
44 | def apply_nms_mask(all_boxes, all_masks, thresh):
45 | num_classes = len(all_boxes)
46 | num_images = len(all_boxes[0])
47 | nms_boxes = [[[] for _ in xrange(num_images)]
48 | for _ in xrange(num_classes)]
49 | nms_masks = [[[] for _ in xrange(num_images)]
50 | for _ in xrange(num_classes)]
51 | for cls_ind in xrange(num_classes):
52 | for im_ind in xrange(num_images):
53 | dets = all_boxes[cls_ind][im_ind]
54 | masks = all_masks[cls_ind][im_ind]
55 | if dets == []:
56 | continue
57 | keep = nms(dets, thresh)
58 | if len(keep) == 0:
59 | continue
60 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
61 | nms_masks[cls_ind][im_ind] = masks[keep, :].copy()
62 | return nms_boxes, nms_masks
63 |
64 |
65 | def apply_nms_mask_single(box, mask, thresh):
66 | if box == []:
67 | return box, mask
68 | keep = nms(box, thresh)
69 | if len(keep) == 0:
70 | return box, mask
71 | return box[keep, :].copy(), mask[keep, :].copy()
72 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/nms/nms_wrapper.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/proposal_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/proposal_layer_modified.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/proposal_target_layer_modified.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/proposal_target_layer_modified.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/roi_pooling_op_grad.py~:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 |
4 | @tf.RegisterShape("RoiPool")
5 | def _roi_pool_shape(op):
6 | """Shape function for the RoiPool op.
7 |
8 | """
9 | dims_data = op.inputs[0].get_shape().as_list()
10 | channels = dims_data[3]
11 |
12 | dims_rois = op.inputs[1].get_shape().as_list()
13 | num_rois = dims_rois[0]
14 |
15 | pooled_height = op.get_attr('pooled_height')
16 | pooled_width = op.get_attr('pooled_width')
17 |
18 | output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, channels])
19 | return [output_shape, output_shape]
20 |
21 | @ops.RegisterGradient("RoiPool")
22 | def _roi_pool_grad(op, grad, _):
23 | """The gradients for `roi_pool`.
24 | Args:
25 | op: The `roi_pool` `Operation` that we are differentiating, which we can use
26 | to find the inputs and outputs of the original op.
27 | grad: Gradient with respect to the output of the `roi_pool` op.
28 | Returns:
29 | Gradients with respect to the input of `zero_out`.
30 | """
31 | data = op.inputs[0]
32 | rois = op.inputs[1]
33 | argmax = op.outputs[1]
34 | pooled_height = op.get_attr('pooled_height')
35 | pooled_width = op.get_attr('pooled_width')
36 | spatial_scale = op.get_attr('spatial_scale')
37 |
38 | # load module
39 | module = tf.load_op_library('/home/alfonso/tensorflow/bazel-bin/tensorflow/core/user_ops/roi_pooling.so')
40 |
41 | # compute gradient
42 | data_grad = module.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale)
43 |
44 | return [data_grad, None] # List of one Tensor, since we have one input
45 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/__init__.py
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import random
12 | import cv2
13 | from utils.cython_bbox import bbox_overlaps
14 | from mnc_config import cfg
15 |
16 |
17 | def im_list_to_blob(ims):
18 | """
19 | Convert a list of images into a network input.
20 | Assumes images are already prepared (means subtracted, BGR order, ...).
21 | """
22 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
23 | num_images = len(ims)
24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
25 | dtype=np.float32)
26 | for i in xrange(num_images):
27 | im = ims[i]
28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
29 | # Move channels (axis 3) to axis 1
30 | # Axis order will become: (batch elem, channel, height, width)
31 | channel_swap = (0, 3, 1, 2)
32 | blob = blob.transpose(channel_swap)
33 | return blob
34 |
35 |
36 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
37 | """Mean subtract and scale an image for use in a blob."""
38 | im = im.astype(np.float32, copy=False)
39 | im -= pixel_means
40 | im_shape = im.shape
41 | im_size_min = np.min(im_shape[0:2])
42 | im_size_max = np.max(im_shape[0:2])
43 | im_scale = float(target_size) / float(im_size_min)
44 | # Prevent the biggest axis from being more than MAX_SIZE
45 | if np.round(im_scale * im_size_max) > max_size:
46 | im_scale = float(max_size) / float(im_size_max)
47 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
48 | interpolation=cv2.INTER_LINEAR)
49 |
50 | return im, im_scale
51 |
52 |
53 | def prep_im_for_blob_cfm(im, input_scales):
54 | """Converts an image into a network input.
55 | Arguments:
56 | im (ndarray): a color image in BGR order
57 | Returns:
58 | blob (ndarray): a data blob holding an image pyramid
59 | im_scale_factors (list): list of image scales (relative to im) used
60 | in the image pyramid
61 | """
62 | im_orig = im.astype(np.float32, copy=True)
63 | im_orig -= cfg.PIXEL_MEANS
64 |
65 | im_shape = im_orig.shape
66 | im_size_min = np.min(im_shape[0:2])
67 | im_size_max = np.max(im_shape[0:2])
68 |
69 | processed_ims = []
70 | im_scale_factors = []
71 |
72 | for target_size in input_scales:
73 | im_scale = float(target_size) / float(im_size_min)
74 | # Prevent the biggest axis from being more than MAX_SIZE
75 | if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
76 | im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
77 | im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
78 | interpolation=cv2.INTER_LINEAR)
79 | im_scale_factors.append(im_scale)
80 | processed_ims.append(im)
81 |
82 | # Create a blob to hold the input images
83 | blob = im_list_to_blob(processed_ims)
84 |
85 | return blob, np.array(im_scale_factors)
86 |
87 |
88 | def pred_rois_for_blob(im_rois, im_scales):
89 | """
90 | Convert rois to network input
91 | support multi-scale testing
92 | """
93 | im_rois = im_rois.astype(np.float, copy=False)
94 | if len(im_scales) > 1:
95 | widths = im_rois[:, 2] - im_rois[:, 0] + 1
96 | heights = im_rois[:, 3] - im_rois[:, 1] + 1
97 |
98 | areas = widths * heights
99 | scaled_areas = areas[:, np.newaxis] * (im_scales[np.newaxis, :] ** 2)
100 | diff_areas = np.abs(scaled_areas - 224 * 224)
101 | levels = diff_areas.argmin(axis=1)[:, np.newaxis]
102 | else:
103 | levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
104 | im_rois = im_rois * im_scales[levels]
105 | rois_blob = np.hstack((levels.astype(np.float), im_rois))
106 | return rois_blob
107 |
108 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/blob.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/blob.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/cython_bbox.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/cython_bbox.so
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/mnc_config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/mnc_config.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | def tic(self):
21 | # using time.time instead of time.clock because time time.clock
22 | # does not normalize for multithreading
23 | self.start_time = time.time()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.time() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 | self.average_time = self.total_time / self.calls
30 | if average:
31 | return self.average_time
32 | else:
33 | return self.diff
34 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/timer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/unmap.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 |
11 | def unmap(data, count, inds, fill=0):
12 | """ Unmap a subset of item (data) back to the original set of items (of
13 | size count) """
14 | if len(data.shape) == 1:
15 | ret = np.empty((count, ), dtype=np.float32)
16 | ret.fill(fill)
17 | ret[inds] = data
18 | else:
19 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
20 | ret.fill(fill)
21 | ret[inds, :] = data
22 | return ret
23 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/unmap.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/unmap.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/utils/__init__.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import cv2
12 |
13 | def im_list_to_blob(ims):
14 | """Convert a list of images into a network input.
15 |
16 | Assumes images are already prepared (means subtracted, BGR order, ...).
17 | """
18 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 | num_images = len(ims)
20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 | dtype=np.float32)
22 | for i in xrange(num_images):
23 | im = ims[i]
24 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 | # Move channels (axis 3) to axis 1
26 | # Axis order will become: (batch elem, channel, height, width)
27 | channel_swap = (0, 3, 1, 2)
28 | blob = blob.transpose(channel_swap)
29 | return blob
30 |
31 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
32 | """Mean subtract and scale an image for use in a blob."""
33 | im = im.astype(np.float32, copy=False)
34 | im -= pixel_means
35 | im_shape = im.shape
36 | im_size_min = np.min(im_shape[0:2])
37 | im_size_max = np.max(im_shape[0:2])
38 | im_scale = float(target_size) / float(im_size_min)
39 | # Prevent the biggest axis from being more than MAX_SIZE
40 | if np.round(im_scale * im_size_max) > max_size:
41 | im_scale = float(max_size) / float(im_size_max)
42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
43 | interpolation=cv2.INTER_LINEAR)
44 |
45 | return im, im_scale
46 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/blob.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/utils/blob.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 | class Timer(object):
11 | """A simple timer."""
12 | def __init__(self):
13 | self.total_time = 0.
14 | self.calls = 0
15 | self.start_time = 0.
16 | self.diff = 0.
17 | self.average_time = 0.
18 |
19 | def tic(self):
20 | # using time.time instead of time.clock because time time.clock
21 | # does not normalize for multithreading
22 | self.start_time = time.time()
23 |
24 | def toc(self, average=True):
25 | self.diff = time.time() - self.start_time
26 | self.total_time += self.diff
27 | self.calls += 1
28 | self.average_time = self.total_time / self.calls
29 | if average:
30 | return self.average_time
31 | else:
32 | return self.diff
33 |
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/utils/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alfonsolink/tf_rfcn/8c48ef65c2740388ca227872de333634cfe53328/tf_rfcn_fixed/rpn_tools/utils/utils/timer.pyc
--------------------------------------------------------------------------------
/tf_rfcn_fixed/rpn_tools/utils/vis_seg.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Multitask Network Cascade
3 | # Written by Haozhi Qi
4 | # Copyright (c) 2016, Haozhi Qi
5 | # Licensed under The MIT License [see LICENSE for details]
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import cPickle
10 | import os
11 | import cv2
12 | import Image
13 | from mnc_config import cfg
14 |
15 |
16 | def vis_seg(img_names, cls_names, output_dir, gt_dir):
17 | """
18 | This function plot segmentation results to specific directory
19 | Args:
20 | img_names: list
21 | """
22 | assert os.path.exists(output_dir)
23 | # a list of dictionary
24 | inst_dir = os.path.join(output_dir, 'SegInst')
25 | cls_dir = os.path.join(output_dir, 'SegCls')
26 | res_dir = os.path.join(output_dir, 'SegRes')
27 | if not os.path.isdir(inst_dir):
28 | os.mkdir(inst_dir)
29 | if not os.path.isdir(cls_dir):
30 | os.mkdir(cls_dir)
31 | if not os.path.isdir(res_dir):
32 | os.mkdir(res_dir)
33 |
34 | res_list = _prepare_dict(img_names, cls_names, output_dir)
35 | for img_ind, image_name in enumerate(img_names):
36 | target_inst_file = os.path.join(inst_dir, image_name + '.jpg')
37 | target_cls_file = os.path.join(cls_dir, image_name + '.jpg')
38 | print image_name
39 | gt_image = gt_dir + '/img/' + image_name + '.jpg'
40 | img_data = cv2.imread(gt_image)
41 | img_width = img_data.shape[1]
42 | img_height = img_data.shape[0]
43 | pred_dict = res_list[img_ind]
44 | inst_img, cls_img = _convert_pred_to_image(img_width, img_height, pred_dict)
45 | color_map = _get_voc_color_map()
46 | inst_out_img = np.zeros((img_height, img_width, 3))
47 | cls_out_img = np.zeros((img_height, img_width, 3))
48 | for i in xrange(img_height):
49 | for j in xrange(img_width):
50 | inst_out_img[i][j] = color_map[inst_img[i][j]][::-1]
51 | cls_out_img[i][j] = color_map[cls_img[i][j]][::-1]
52 |
53 | cv2.imwrite(target_inst_file, inst_out_img)
54 | cv2.imwrite(target_cls_file, cls_out_img)
55 | background = Image.open(gt_image)
56 | mask = Image.open(target_cls_file)
57 | background = background.convert('RGBA')
58 | mask = mask.convert('RGBA')
59 | superimpose_image = Image.blend(background, mask, 0.8)
60 | name = os.path.join(res_dir, image_name + '.png')
61 | superimpose_image.save(name, 'PNG')
62 |
63 |
64 | def _prepare_dict(img_names, cls_names, cache_dir, vis_thresh=0.5):
65 | """
66 | Returns:
67 | list, each list is a dictionary contains mask list, box list
68 | """
69 | res_list = []
70 | det_file = os.path.join(cache_dir, 'res_boxes.pkl')
71 | with open(det_file, 'rb') as f:
72 | det_pkl = cPickle.load(f)
73 | seg_file = os.path.join(cache_dir, 'res_masks.pkl')
74 | with open(seg_file, 'rb') as f:
75 | seg_pkl = cPickle.load(f)
76 |
77 | for img_ind, image_name in enumerate(img_names):
78 | box_for_img = []
79 | mask_for_img = []
80 | cls_for_img = []
81 | for cls_ind, cls_name in enumerate(cls_names):
82 | if cls_name == '__background__' or len(det_pkl[cls_ind][img_ind]) == 0:
83 | continue
84 | det_for_img = det_pkl[cls_ind][img_ind]
85 | seg_for_img = seg_pkl[cls_ind][img_ind]
86 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0]
87 | for keep in keep_inds:
88 | box_for_img.append(det_for_img[keep])
89 | # TODO: remove this annoying 0
90 | mask_for_img.append(seg_for_img[keep][0])
91 | cls_for_img.append(cls_ind)
92 | res_dict = {'image_name': image_name,
93 | 'cls_name': cls_for_img,
94 | 'boxes': box_for_img,
95 | 'masks': mask_for_img}
96 | res_list.append(res_dict)
97 |
98 | return res_list
99 |
100 |
101 | def _convert_pred_to_image(img_width, img_height, pred_dict):
102 | num_inst = len(pred_dict['boxes'])
103 | inst_img = np.zeros((img_height, img_width))
104 | cls_img = np.zeros((img_height, img_width))
105 | for i in xrange(num_inst):
106 | box = np.round(pred_dict['boxes'][i]).astype(int)
107 | mask = pred_dict['masks'][i]
108 | cls_num = pred_dict['cls_name'][i]
109 | # clip box into image space
110 | box[0] = min(max(box[0], 0), img_width - 1)
111 | box[1] = min(max(box[1], 0), img_height - 1)
112 | box[2] = min(max(box[2], 0), img_width - 1)
113 | box[3] = min(max(box[3], 0), img_height - 1)
114 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1))
115 | mask = mask >= cfg.BINARIZE_THRESH
116 |
117 | part1 = (i+1) * mask.astype(np.float32)
118 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1])
119 | part3 = np.multiply(np.logical_not(mask), cls_img[box[1]:box[3]+1, box[0]:box[2]+1])
120 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2
121 | cls_img[box[1]:box[3]+1, box[0]:box[2]+1] = cls_num * mask.astype(np.float32) + part3
122 | # Plot bounding boxes simultaneously
123 | cls_img[box[1]:box[3]+1, box[0]-1:box[0]+1] = 150
124 | cls_img[box[1]:box[3]+1, box[2]-1:box[2]+1] = 150
125 | cls_img[box[1]-1:box[1]+1, box[0]:box[2]+1] = 150
126 | cls_img[box[3]-1:box[3]+1, box[0]:box[2]+1] = 150
127 |
128 | inst_img = inst_img.astype(int)
129 | cls_img = cls_img.astype(int)
130 | return inst_img, cls_img
131 |
132 |
133 | def _get_voc_color_map(n=256):
134 | color_map = np.zeros((n, 3))
135 | for i in xrange(n):
136 | r = b = g = 0
137 | cid = i
138 | for j in xrange(0, 8):
139 | r = np.bitwise_or(r, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-1], 7-j))
140 | g = np.bitwise_or(g, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-2], 7-j))
141 | b = np.bitwise_or(b, np.left_shift(np.unpackbits(np.array([cid], dtype=np.uint8))[-3], 7-j))
142 | cid = np.right_shift(cid, 3)
143 |
144 | color_map[i][0] = r
145 | color_map[i][1] = g
146 | color_map[i][2] = b
147 | return color_map
148 |
--------------------------------------------------------------------------------