├── DetNet.py
└── README.md


/DetNet.py:
--------------------------------------------------------------------------------
  1 | ###################################################
  2 | # tsing
  3 | # 2018-09-01
  4 | ###################################################
  5 | 
  6 | # -*- coding: utf-8 -*-
  7 | import collections # 原生的collections库
  8 | import tensorflow as tf
  9 | slim = tf.contrib.slim 
 10 | 
 11 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
 12 | 
 13 | def subsample(inputs, factor, scope=None): 
 14 |     if factor == 1:
 15 |         return inputs
 16 |     else:
 17 |         return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
 18 | 
 19 | def conv2d_same(inputs, num_outputs, kernel_size, stride=None, rate=None, scope=None): 
 20 |     """
 21 |     Args:
 22 |         inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
 23 |         num_outputs: An integer, the number of output filters.
 24 |         kernel_size: An int with the kernel_size of the filters.
 25 |         stride: An integer, the output stride.
 26 |         rate: An integer, rate for atrous convolution.
 27 |         scope: Scope.
 28 | 
 29 |     Returns:
 30 |         output: A 4-D tensor of size [batch, height_out, width_out, channels] with
 31 |         the convolution output.
 32 |     """
 33 |     if stride:
 34 |         if stride == 1:
 35 |             return slim.conv2d(inputs, num_outputs, kernel_size, stride=1,
 36 |                             padding='SAME', scope=scope)
 37 |         else: 
 38 |             pad_total = kernel_size - 1
 39 |             pad_beg = pad_total // 2
 40 |             pad_end = pad_total - pad_beg
 41 |             inputs = tf.pad(inputs, 
 42 |                             [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
 43 | 
 44 |             return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
 45 |                             padding='VALID', scope=scope)
 46 |     if rate:
 47 |         return slim.conv2d(inputs, num_outputs, kernel_size, rate=rate,
 48 |                 padding='SAME', scope=scope)
 49 | 
 50 | 
 51 | @slim.add_arg_scope
 52 | def stack_blocks_dense(net, blocks,
 53 |                        outputs_collections=None):
 54 |     for block in blocks: 
 55 |         with tf.variable_scope(block.scope, 'block', [net]) as sc:
 56 |             for i, unit in enumerate(block.args):
 57 | 
 58 |                 with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
 59 |                     if isinstance(unit[-1], int):
 60 |                         unit_depth, unit_depth_bottleneck, unit_stride = unit
 61 |                         net = block.unit_fn(net, 
 62 |                                             depth=unit_depth,
 63 |                                             depth_bottleneck=unit_depth_bottleneck,
 64 |                                             stride=unit_stride)
 65 | 
 66 |                     else:
 67 |                         unit_depth, unit_depth_bottleneck, bottleneck_class = unit
 68 |                         net = block.unit_fn(net, 
 69 |                                             depth=unit_depth,
 70 |                                             depth_bottleneck=unit_depth_bottleneck,
 71 |                                             bottleneck_class=bottleneck_class)
 72 |             net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) 
 73 |     return net 
 74 | 
 75 | def resnet_arg_scope(is_training=True,
 76 |                      weight_decay=0.0001, 
 77 |                      batch_norm_decay=0.997, 
 78 |                      batch_norm_epsilon=1e-5,
 79 |                      batch_norm_scale=True):
 80 | 
 81 |     batch_norm_params = { 
 82 |         'is_training': is_training,
 83 |         'decay': batch_norm_decay,
 84 |         'epsilon': batch_norm_epsilon,
 85 |         'scale': batch_norm_scale,
 86 |         'updates_collections': tf.GraphKeys.UPDATE_OPS,
 87 |     }
 88 | 
 89 |     with slim.arg_scope([slim.conv2d],
 90 |                         weights_regularizer=slim.l2_regularizer(weight_decay), 
 91 |                         weights_initializer=slim.variance_scaling_initializer(),
 92 |                         activation_fn=tf.nn.relu,
 93 |                         normalizer_fn=slim.batch_norm, 
 94 |                         normalizer_params=batch_norm_params):
 95 |         with slim.arg_scope([slim.batch_norm], **batch_norm_params):
 96 |             with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: 
 97 |                 return arg_sc 
 98 | 
 99 | 
100 | @slim.add_arg_scope
101 | def bottleneck(inputs, depth, depth_bottleneck, stride,
102 |                outputs_collections=None, scope=None):
103 |     with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: 
104 |         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 
105 |         preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') 
106 | 
107 |         if depth == depth_in:
108 |             shortcut = subsample(inputs, stride, 'shortcut')
109 | 
110 |         else:
111 |             shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
112 |                                     normalizer_fn=None, activation_fn=None,
113 |                                     scope='shortcut')
114 |             
115 |         residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
116 |                                scope='conv1')
117 | 
118 |         residual = conv2d_same(residual, depth_bottleneck, 3, stride,
119 |                                scope='conv2')
120 | 
121 |         residual = slim.conv2d(residual, depth, [1, 1], stride=1,
122 |                                normalizer_fn=None, activation_fn=None,
123 |                                scope='conv3')
124 |         output = shortcut + residual 
125 |         return slim.utils.collect_named_outputs(outputs_collections, 
126 |                                                 sc.name,
127 |                                                 output)
128 | 
129 | 
130 | @slim.add_arg_scope
131 | def bottleneck_detnet(inputs, depth, depth_bottleneck, 
132 |                       bottleneck_class='A',
133 |                       outputs_collections=None, scope=None):
134 |     assert depth == 4 * depth_bottleneck, 'output_channel != 4xinput_channel'
135 |     with tf.variable_scope(scope, 'bottleneck_{}'.format(bottleneck_class), [inputs]) as sc: 
136 |         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 
137 |         preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') 
138 | 
139 |         if bottleneck_class == 'A':
140 |             shortcut = subsample(inputs, 1, 'shortcut')
141 |             
142 |         elif bottleneck_class == 'B':
143 |             shortcut = slim.conv2d(preact, depth, [1, 1], rate=2,
144 |                                    normalizer_fn=None, activation_fn=None,
145 |                                    scope='shortcut')
146 |         
147 | 
148 |         residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
149 |                               scope='conv1')
150 | 
151 |         residual = conv2d_same(residual, depth_bottleneck, 3, rate=2,
152 |                                scope='conv2')
153 | 
154 |         residual = slim.conv2d(residual, depth, [1, 1], stride=1,
155 |                                normalizer_fn=None, activation_fn=None,
156 |                                scope='conv3')
157 |         output = shortcut + residual 
158 |         return slim.utils.collect_named_outputs(outputs_collections, 
159 |                                                 sc.name,
160 |                                                 output)
161 | 
162 | 
163 | def detnet(inputs, 
164 |            blocks, 
165 |            num_classes=None, 
166 |            global_pool=True, 
167 |            include_root_block=True, 
168 |            reuse=None, 
169 |            scope=None): 
170 |     with tf.variable_scope(scope, 'detnet', [inputs], reuse=reuse) as sc:
171 |         end_points_collection = sc.original_name_scope + '_end_points' 
172 |         with slim.arg_scope([slim.conv2d, bottleneck, bottleneck_detnet, stack_blocks_dense],
173 |                             outputs_collections=end_points_collection): 
174 |             net = inputs
175 |             if include_root_block: 
176 |                 with slim.arg_scope([slim.conv2d],
177 |                                     activation_fn=None, 
178 |                                     normalizer_fn=None):
179 |                     net = conv2d_same(net, 64, 7, stride=2, scope='conv1') 
180 |                 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 
181 |             net = stack_blocks_dense(net, blocks) 
182 |             net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
183 | 
184 |             if global_pool: 
185 |                 net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 
186 |             if num_classes is not None:  
187 |                 net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, 
188 |                             normalizer_fn=None, scope='logits') 
189 |             end_points = slim.utils.convert_collection_to_dict(end_points_collection) 
190 |             if num_classes is not None:
191 |                 end_points['predictions'] = slim.softmax(net, scope='predictions') 
192 |             return net, end_points
193 | 
194 | 
195 | 
196 | def detnet59(inputs,
197 |              num_classes=None,
198 |              global_pool=True,
199 |              reuse=None,
200 |              scope='detnet59'):
201 |     blocks = [
202 |         Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
203 |         Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
204 |         Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
205 |         Block('block4', bottleneck_detnet, [(1024, 256, 'B')] + [(1024, 256, 'A')] * 2),
206 |         Block('block5', bottleneck_detnet, [(1024, 256, 'B')] + [(1024, 256, 'A')] * 2),
207 |     ]
208 |     return detnet(inputs, blocks, num_classes, global_pool,
209 |                   include_root_block=True, reuse=reuse, scope=scope)
210 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DetNet
2 | #### cvpr2018 detnet(tsinghua university)
3 | 
4 | https://arxiv.org/pdf/1804.06215.pdf
5 | ### DetNet: A Backbone network for Object Detection
6 | 
7 | 


--------------------------------------------------------------------------------