├── DetNet.py └── README.md /DetNet.py: -------------------------------------------------------------------------------- 1 | ################################################### 2 | # tsing 3 | # 2018-09-01 4 | ################################################### 5 | 6 | # -*- coding: utf-8 -*- 7 | import collections # 原生的collections库 8 | import tensorflow as tf 9 | slim = tf.contrib.slim 10 | 11 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 12 | 13 | def subsample(inputs, factor, scope=None): 14 | if factor == 1: 15 | return inputs 16 | else: 17 | return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) 18 | 19 | def conv2d_same(inputs, num_outputs, kernel_size, stride=None, rate=None, scope=None): 20 | """ 21 | Args: 22 | inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. 23 | num_outputs: An integer, the number of output filters. 24 | kernel_size: An int with the kernel_size of the filters. 25 | stride: An integer, the output stride. 26 | rate: An integer, rate for atrous convolution. 27 | scope: Scope. 28 | 29 | Returns: 30 | output: A 4-D tensor of size [batch, height_out, width_out, channels] with 31 | the convolution output. 32 | """ 33 | if stride: 34 | if stride == 1: 35 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, 36 | padding='SAME', scope=scope) 37 | else: 38 | pad_total = kernel_size - 1 39 | pad_beg = pad_total // 2 40 | pad_end = pad_total - pad_beg 41 | inputs = tf.pad(inputs, 42 | [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) 43 | 44 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, 45 | padding='VALID', scope=scope) 46 | if rate: 47 | return slim.conv2d(inputs, num_outputs, kernel_size, rate=rate, 48 | padding='SAME', scope=scope) 49 | 50 | 51 | @slim.add_arg_scope 52 | def stack_blocks_dense(net, blocks, 53 | outputs_collections=None): 54 | for block in blocks: 55 | with tf.variable_scope(block.scope, 'block', [net]) as sc: 56 | for i, unit in enumerate(block.args): 57 | 58 | with tf.variable_scope('unit_%d' % (i + 1), values=[net]): 59 | if isinstance(unit[-1], int): 60 | unit_depth, unit_depth_bottleneck, unit_stride = unit 61 | net = block.unit_fn(net, 62 | depth=unit_depth, 63 | depth_bottleneck=unit_depth_bottleneck, 64 | stride=unit_stride) 65 | 66 | else: 67 | unit_depth, unit_depth_bottleneck, bottleneck_class = unit 68 | net = block.unit_fn(net, 69 | depth=unit_depth, 70 | depth_bottleneck=unit_depth_bottleneck, 71 | bottleneck_class=bottleneck_class) 72 | net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) 73 | return net 74 | 75 | def resnet_arg_scope(is_training=True, 76 | weight_decay=0.0001, 77 | batch_norm_decay=0.997, 78 | batch_norm_epsilon=1e-5, 79 | batch_norm_scale=True): 80 | 81 | batch_norm_params = { 82 | 'is_training': is_training, 83 | 'decay': batch_norm_decay, 84 | 'epsilon': batch_norm_epsilon, 85 | 'scale': batch_norm_scale, 86 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 87 | } 88 | 89 | with slim.arg_scope([slim.conv2d], 90 | weights_regularizer=slim.l2_regularizer(weight_decay), 91 | weights_initializer=slim.variance_scaling_initializer(), 92 | activation_fn=tf.nn.relu, 93 | normalizer_fn=slim.batch_norm, 94 | normalizer_params=batch_norm_params): 95 | with slim.arg_scope([slim.batch_norm], **batch_norm_params): 96 | with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: 97 | return arg_sc 98 | 99 | 100 | @slim.add_arg_scope 101 | def bottleneck(inputs, depth, depth_bottleneck, stride, 102 | outputs_collections=None, scope=None): 103 | with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: 104 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 105 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') 106 | 107 | if depth == depth_in: 108 | shortcut = subsample(inputs, stride, 'shortcut') 109 | 110 | else: 111 | shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, 112 | normalizer_fn=None, activation_fn=None, 113 | scope='shortcut') 114 | 115 | residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, 116 | scope='conv1') 117 | 118 | residual = conv2d_same(residual, depth_bottleneck, 3, stride, 119 | scope='conv2') 120 | 121 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, 122 | normalizer_fn=None, activation_fn=None, 123 | scope='conv3') 124 | output = shortcut + residual 125 | return slim.utils.collect_named_outputs(outputs_collections, 126 | sc.name, 127 | output) 128 | 129 | 130 | @slim.add_arg_scope 131 | def bottleneck_detnet(inputs, depth, depth_bottleneck, 132 | bottleneck_class='A', 133 | outputs_collections=None, scope=None): 134 | assert depth == 4 * depth_bottleneck, 'output_channel != 4xinput_channel' 135 | with tf.variable_scope(scope, 'bottleneck_{}'.format(bottleneck_class), [inputs]) as sc: 136 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 137 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') 138 | 139 | if bottleneck_class == 'A': 140 | shortcut = subsample(inputs, 1, 'shortcut') 141 | 142 | elif bottleneck_class == 'B': 143 | shortcut = slim.conv2d(preact, depth, [1, 1], rate=2, 144 | normalizer_fn=None, activation_fn=None, 145 | scope='shortcut') 146 | 147 | 148 | residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, 149 | scope='conv1') 150 | 151 | residual = conv2d_same(residual, depth_bottleneck, 3, rate=2, 152 | scope='conv2') 153 | 154 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, 155 | normalizer_fn=None, activation_fn=None, 156 | scope='conv3') 157 | output = shortcut + residual 158 | return slim.utils.collect_named_outputs(outputs_collections, 159 | sc.name, 160 | output) 161 | 162 | 163 | def detnet(inputs, 164 | blocks, 165 | num_classes=None, 166 | global_pool=True, 167 | include_root_block=True, 168 | reuse=None, 169 | scope=None): 170 | with tf.variable_scope(scope, 'detnet', [inputs], reuse=reuse) as sc: 171 | end_points_collection = sc.original_name_scope + '_end_points' 172 | with slim.arg_scope([slim.conv2d, bottleneck, bottleneck_detnet, stack_blocks_dense], 173 | outputs_collections=end_points_collection): 174 | net = inputs 175 | if include_root_block: 176 | with slim.arg_scope([slim.conv2d], 177 | activation_fn=None, 178 | normalizer_fn=None): 179 | net = conv2d_same(net, 64, 7, stride=2, scope='conv1') 180 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 181 | net = stack_blocks_dense(net, blocks) 182 | net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') 183 | 184 | if global_pool: 185 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 186 | if num_classes is not None: 187 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, 188 | normalizer_fn=None, scope='logits') 189 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 190 | if num_classes is not None: 191 | end_points['predictions'] = slim.softmax(net, scope='predictions') 192 | return net, end_points 193 | 194 | 195 | 196 | def detnet59(inputs, 197 | num_classes=None, 198 | global_pool=True, 199 | reuse=None, 200 | scope='detnet59'): 201 | blocks = [ 202 | Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), 203 | Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), 204 | Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), 205 | Block('block4', bottleneck_detnet, [(1024, 256, 'B')] + [(1024, 256, 'A')] * 2), 206 | Block('block5', bottleneck_detnet, [(1024, 256, 'B')] + [(1024, 256, 'A')] * 2), 207 | ] 208 | return detnet(inputs, blocks, num_classes, global_pool, 209 | include_root_block=True, reuse=reuse, scope=scope) 210 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DetNet 2 | #### cvpr2018 detnet(tsinghua university) 3 | 4 | https://arxiv.org/pdf/1804.06215.pdf 5 | ### DetNet: A Backbone network for Object Detection 6 | 7 | --------------------------------------------------------------------------------