├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── config.py ├── convert.py ├── data ├── ResNet-101-deploy.prototxt ├── ResNet-152-deploy.prototxt ├── ResNet-50-deploy.prototxt ├── ResNet_mean.binaryproto ├── cat.jpg └── tensorflow-resnet-pretrained-20160509.tar.gz.torrent ├── forward.py ├── guess.py ├── image_processing.py ├── resnet.py ├── resnet_train.py ├── synset.py ├── train_cifar.py ├── train_imagenet.py ├── train_yourown.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .ipynb_checkpoints/ 4 | *.caffemodel 5 | *.tfmodel 6 | checkpoint 7 | ResNet-L*.ckpt 8 | ResNet-L*.meta 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Ryan Dahl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ResNet in TensorFlow 2 | 3 | Resnet相关资料 4 | Implemenation of [Deep Residual Learning for Image 5 | Recognition](http://arxiv.org/abs/1512.03385). 6 | 7 | MIT license. Contributions welcome. 8 | 9 | ## 基于源代码做的一些修改 10 | 1. 修改比较旧的,已经被抛弃的tensorflow的调用方法改成我使用的版本的,比如将tf.op_scope 改成 tf.name_scope。 11 | 2. 训练自己的数据的脚本。原代码只给了cifar和imagenet两个数据集的处理方法,我加了一个脚本可以处理任何形式的训练数据集。 12 | 3. 添加了预测的代码。 13 | 4. 修改了原来的代码使其能够在预训练的模型上继续训练。 14 | 15 | ## 具体用法 16 | 1. 准备自己的数据集 17 | 需要准备一个文本文件,里面包含了你的训练集的图片的绝对路径,以及对应的标签信息,标签必须是整数,每个标签对应一个,用tab分隔开。 18 | 然后将这个文本文件的路径作为参数,运行train_yourown.py 脚本。其他的设置比如具体用多少层的resnet,learning-rate之类就跟原代码一样设置就可以了。 19 | 如何传参详情参考tf.app.flags的用法。 20 | 2. 用训练好的模型预测 21 | 运行guess.py 的代码,你只需要修改下面几个参数: 22 | data_dir :要测试的图片文件夹所在的路径 23 | model_dir:训练好的模型所在的文件夹路径 24 | ckpt_file:具体用哪个模型 25 | target:预测输出的结果,总共包含三列,图片的路径,预测的标签,和预测的score 26 | label_list:你的模型的标签信息,顺序是整数从小到大对应的信息,比如一开始是男0女1,那么这里就是["Male","Female"]. 27 | 3. 预训练的模型可以在原代码的data文件夹中找到bt文件,下载下来就可以用了,我的代码使用50层的resnet的,如果需要用其他层数的,需要再做相应修改,只要改几个参数就可以了。 28 | 29 | ## 待完善部分 30 | 1. 训练模型验证部分需要修改。 31 | 2. 修改传参方法,更方便使用。 32 | 33 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from resnet import * 2 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # This is a variable scope aware configuation object for TensorFlow 2 | 3 | import tensorflow as tf 4 | 5 | FLAGS = tf.app.flags.FLAGS 6 | 7 | class Config: 8 | def __init__(self): 9 | root = self.Scope('') 10 | for k, v in FLAGS.__dict__['__flags'].iteritems(): 11 | root[k] = v 12 | self.stack = [ root ] 13 | 14 | def iteritems(self): 15 | return self.to_dict().iteritems() 16 | 17 | def to_dict(self): 18 | self._pop_stale() 19 | out = {} 20 | # Work backwards from the flags to top fo the stack 21 | # overwriting keys that were found earlier. 22 | for i in range(len(self.stack)): 23 | cs = self.stack[-i] 24 | for name in cs: 25 | out[name] = cs[name] 26 | return out 27 | 28 | def _pop_stale(self): 29 | var_scope_name = tf.get_variable_scope().name 30 | top = self.stack[0] 31 | while not top.contains(var_scope_name): 32 | # We aren't in this scope anymore 33 | self.stack.pop(0) 34 | top = self.stack[0] 35 | 36 | def __getitem__(self, name): 37 | self._pop_stale() 38 | # Recursively extract value 39 | for i in range(len(self.stack)): 40 | cs = self.stack[i] 41 | if name in cs: 42 | return cs[name] 43 | 44 | raise KeyError(name) 45 | 46 | def set_default(self, name, value): 47 | if not name in self: 48 | self[name] = value 49 | 50 | def __contains__(self, name): 51 | self._pop_stale() 52 | for i in range(len(self.stack)): 53 | cs = self.stack[i] 54 | if name in cs: 55 | return True 56 | return False 57 | 58 | def __setitem__(self, name, value): 59 | self._pop_stale() 60 | top = self.stack[0] 61 | var_scope_name = tf.get_variable_scope().name 62 | assert top.contains(var_scope_name) 63 | 64 | if top.name != var_scope_name: 65 | top = self.Scope(var_scope_name) 66 | self.stack.insert(0, top) 67 | 68 | top[name] = value 69 | 70 | class Scope(dict): 71 | def __init__(self, name): 72 | self.name = name 73 | 74 | def contains(self, var_scope_name): 75 | return var_scope_name.startswith(self.name) 76 | 77 | 78 | 79 | # Test 80 | if __name__ == '__main__': 81 | 82 | def assert_raises(exception, fn): 83 | try: 84 | fn() 85 | except exception: 86 | pass 87 | else: 88 | assert False, "Expected exception" 89 | 90 | c = Config() 91 | 92 | c['hello'] = 1 93 | assert c['hello'] == 1 94 | 95 | with tf.variable_scope('foo'): 96 | c.set_default("bar", 10) 97 | c['bar'] = 2 98 | assert c['bar'] == 2 99 | assert c['hello'] == 1 100 | 101 | c.set_default("mario", True) 102 | 103 | with tf.variable_scope('meow'): 104 | c['dog'] = 3 105 | assert c['dog'] == 3 106 | assert c['bar'] == 2 107 | assert c['hello'] == 1 108 | 109 | assert c['mario'] == True 110 | 111 | assert_raises(KeyError, lambda: c['dog']) 112 | assert c['bar'] == 2 113 | assert c['hello'] == 1 114 | 115 | 116 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["GLOG_minloglevel"] = "2" 3 | import sys 4 | import re 5 | import caffe 6 | import numpy as np 7 | import tensorflow as tf 8 | import skimage.io 9 | from caffe.proto import caffe_pb2 10 | from synset import * 11 | 12 | import resnet 13 | 14 | 15 | class CaffeParamProvider(): 16 | def __init__(self, caffe_net): 17 | self.caffe_net = caffe_net 18 | 19 | def conv_kernel(self, name): 20 | k = self.caffe_net.params[name][0].data 21 | # caffe [out_channels, in_channels, filter_height, filter_width] 22 | # 0 1 2 3 23 | # tensorflow [filter_height, filter_width, in_channels, out_channels] 24 | # 2 3 1 0 25 | return k.transpose((2, 3, 1, 0)) 26 | return k 27 | 28 | def bn_gamma(self, name): 29 | return self.caffe_net.params[name][0].data 30 | 31 | def bn_beta(self, name): 32 | return self.caffe_net.params[name][1].data 33 | 34 | def bn_mean(self, name): 35 | return self.caffe_net.params[name][0].data 36 | 37 | def bn_variance(self, name): 38 | return self.caffe_net.params[name][1].data 39 | 40 | def fc_weights(self, name): 41 | w = self.caffe_net.params[name][0].data 42 | w = w.transpose((1, 0)) 43 | return w 44 | 45 | def fc_biases(self, name): 46 | b = self.caffe_net.params[name][1].data 47 | return b 48 | 49 | 50 | def preprocess(img): 51 | """Changes RGB [0,1] valued image to BGR [0,255] with mean subtracted.""" 52 | mean_bgr = load_mean_bgr() 53 | print 'mean blue', np.mean(mean_bgr[:, :, 0]) 54 | print 'mean green', np.mean(mean_bgr[:, :, 1]) 55 | print 'mean red', np.mean(mean_bgr[:, :, 2]) 56 | out = np.copy(img) * 255.0 57 | out = out[:, :, [2, 1, 0]] # swap channel from RGB to BGR 58 | out -= mean_bgr 59 | return out 60 | 61 | 62 | def assert_almost_equal(caffe_tensor, tf_tensor): 63 | t = tf_tensor[0] 64 | c = caffe_tensor[0].transpose((1, 2, 0)) 65 | 66 | #for i in range(0, t.shape[-1]): 67 | # print "tf", i, t[:,i] 68 | # print "caffe", i, c[:,i] 69 | 70 | if t.shape != c.shape: 71 | print "t.shape", t.shape 72 | print "c.shape", c.shape 73 | sys.exit(1) 74 | 75 | d = np.linalg.norm(t - c) 76 | print "d", d 77 | assert d < 500 78 | 79 | 80 | # returns image of shape [224, 224, 3] 81 | # [height, width, depth] 82 | def load_image(path, size=224): 83 | img = skimage.io.imread(path) 84 | short_edge = min(img.shape[:2]) 85 | yy = int((img.shape[0] - short_edge) / 2) 86 | xx = int((img.shape[1] - short_edge) / 2) 87 | crop_img = img[yy:yy + short_edge, xx:xx + short_edge] 88 | resized_img = skimage.transform.resize(crop_img, (size, size)) 89 | return resized_img 90 | 91 | 92 | def load_mean_bgr(): 93 | """ bgr mean pixel value image, [0, 255]. [height, width, 3] """ 94 | with open("data/ResNet_mean.binaryproto", mode='rb') as f: 95 | data = f.read() 96 | blob = caffe_pb2.BlobProto() 97 | blob.ParseFromString(data) 98 | 99 | mean_bgr = caffe.io.blobproto_to_array(blob)[0] 100 | assert mean_bgr.shape == (3, 224, 224) 101 | 102 | return mean_bgr.transpose((1, 2, 0)) 103 | 104 | 105 | def load_caffe(img_p, layers=50): 106 | caffe.set_mode_cpu() 107 | 108 | prototxt = "data/ResNet-%d-deploy.prototxt" % layers 109 | caffemodel = "data/ResNet-%d-model.caffemodel" % layers 110 | net = caffe.Net(prototxt, caffemodel, caffe.TEST) 111 | 112 | net.blobs['data'].data[0] = img_p.transpose((2, 0, 1)) 113 | assert net.blobs['data'].data[0].shape == (3, 224, 224) 114 | net.forward() 115 | 116 | caffe_prob = net.blobs['prob'].data[0] 117 | print_prob(caffe_prob) 118 | 119 | return net 120 | 121 | 122 | # returns the top1 string 123 | def print_prob(prob): 124 | #print prob 125 | pred = np.argsort(prob)[::-1] 126 | 127 | # Get top1 label 128 | top1 = synset[pred[0]] 129 | print "Top1: ", top1 130 | # Get top5 label 131 | top5 = [synset[pred[i]] for i in range(5)] 132 | print "Top5: ", top5 133 | return top1 134 | 135 | 136 | def parse_tf_varnames(p, tf_varname, num_layers): 137 | if tf_varname == 'scale1/weights': 138 | return p.conv_kernel('conv1') 139 | 140 | elif tf_varname == 'scale1/gamma': 141 | return p.bn_gamma('scale_conv1') 142 | 143 | elif tf_varname == 'scale1/beta': 144 | return p.bn_beta('scale_conv1') 145 | 146 | elif tf_varname == 'scale1/moving_mean': 147 | return p.bn_mean('bn_conv1') 148 | 149 | elif tf_varname == 'scale1/moving_variance': 150 | return p.bn_variance('bn_conv1') 151 | 152 | elif tf_varname == 'fc/weights': 153 | return p.fc_weights('fc1000') 154 | 155 | elif tf_varname == 'fc/biases': 156 | return p.fc_biases('fc1000') 157 | 158 | # scale2/block1/shortcut/weights 159 | # scale3/block2/c/moving_mean 160 | # scale3/block6/c/moving_variance 161 | # scale4/block3/c/moving_mean 162 | # scale4/block8/a/beta 163 | re1 = 'scale(\d+)/block(\d+)/(shortcut|a|b|c|A|B)' 164 | m = re.search(re1, tf_varname) 165 | 166 | def letter(i): 167 | return chr(ord('a') + i - 1) 168 | 169 | scale_num = int(m.group(1)) 170 | 171 | block_num = int(m.group(2)) 172 | if scale_num == 2: 173 | # scale 2 always uses block letters 174 | block_str = letter(block_num) 175 | elif scale_num == 3 or scale_num == 4: 176 | # scale 3 uses block letters for l=50 and numbered blocks for l=101, l=151 177 | # scale 4 uses block letters for l=50 and numbered blocks for l=101, l=151 178 | if num_layers == 50: 179 | block_str = letter(block_num) 180 | else: 181 | if block_num == 1: 182 | block_str = 'a' 183 | else: 184 | block_str = 'b%d' % (block_num - 1) 185 | elif scale_num == 5: 186 | # scale 5 always block letters 187 | block_str = letter(block_num) 188 | else: 189 | raise ValueError("unexpected scale_num %d" % scale_num) 190 | 191 | branch = m.group(3) 192 | if branch == "shortcut": 193 | branch_num = 1 194 | conv_letter = '' 195 | else: 196 | branch_num = 2 197 | conv_letter = branch.lower() 198 | 199 | x = (scale_num, block_str, branch_num, conv_letter) 200 | #print x 201 | 202 | if 'weights' in tf_varname: 203 | return p.conv_kernel('res%d%s_branch%d%s' % x) 204 | 205 | if 'gamma' in tf_varname: 206 | return p.bn_gamma('scale%d%s_branch%d%s' % x) 207 | 208 | if 'beta' in tf_varname: 209 | return p.bn_beta('scale%d%s_branch%d%s' % x) 210 | 211 | if 'moving_mean' in tf_varname: 212 | return p.bn_mean('bn%d%s_branch%d%s' % x) 213 | 214 | if 'moving_variance' in tf_varname: 215 | return p.bn_variance('bn%d%s_branch%d%s' % x) 216 | 217 | raise ValueError('unhandled var ' + tf_varname) 218 | 219 | 220 | def checkpoint_fn(layers): 221 | return 'ResNet-L%d.ckpt' % layers 222 | 223 | 224 | def meta_fn(layers): 225 | return 'ResNet-L%d.meta' % layers 226 | 227 | 228 | def convert(graph, img, img_p, layers): 229 | caffe_model = load_caffe(img_p, layers) 230 | 231 | #for i, n in enumerate(caffe_model.params): 232 | # print n 233 | 234 | param_provider = CaffeParamProvider(caffe_model) 235 | 236 | if layers == 50: 237 | num_blocks = [3, 4, 6, 3] 238 | elif layers == 101: 239 | num_blocks = [3, 4, 23, 3] 240 | elif layers == 152: 241 | num_blocks = [3, 8, 36, 3] 242 | 243 | with tf.device('/cpu:0'): 244 | images = tf.placeholder("float32", [None, 224, 224, 3], name="images") 245 | logits = resnet.inference(images, 246 | is_training=False, 247 | num_blocks=num_blocks, 248 | preprocess=True, 249 | bottleneck=True) 250 | prob = tf.nn.softmax(logits, name='prob') 251 | 252 | # We write the metagraph first to avoid adding a bunch of 253 | # assign ops that are used to set variables from caffe. 254 | # The checkpoint is written to at the end. 255 | tf.train.export_meta_graph(filename=meta_fn(layers)) 256 | 257 | vars_to_restore = tf.all_variables() 258 | saver = tf.train.Saver(vars_to_restore) 259 | 260 | sess = tf.Session() 261 | sess.run(tf.initialize_all_variables()) 262 | 263 | assigns = [] 264 | for var in vars_to_restore: 265 | #print var.op.name 266 | data = parse_tf_varnames(param_provider, var.op.name, layers) 267 | #print "caffe data shape", data.shape 268 | #print "tf shape", var.get_shape() 269 | assigns.append(var.assign(data)) 270 | sess.run(assigns) 271 | 272 | #for op in tf.get_default_graph().get_operations(): 273 | # print op.name 274 | 275 | i = [ 276 | graph.get_tensor_by_name("scale1/Relu:0"), 277 | graph.get_tensor_by_name("scale2/MaxPool:0"), 278 | graph.get_tensor_by_name("scale2/block1/Relu:0"), 279 | graph.get_tensor_by_name("scale2/block2/Relu:0"), 280 | graph.get_tensor_by_name("scale2/block3/Relu:0"), 281 | graph.get_tensor_by_name("scale3/block1/Relu:0"), 282 | graph.get_tensor_by_name("scale5/block3/Relu:0"), 283 | graph.get_tensor_by_name("avg_pool:0"), 284 | graph.get_tensor_by_name("prob:0"), 285 | ] 286 | 287 | o = sess.run(i, {images: img[np.newaxis, :]}) 288 | 289 | assert_almost_equal(caffe_model.blobs['conv1'].data, o[0]) 290 | assert_almost_equal(caffe_model.blobs['pool1'].data, o[1]) 291 | assert_almost_equal(caffe_model.blobs['res2a'].data, o[2]) 292 | assert_almost_equal(caffe_model.blobs['res2b'].data, o[3]) 293 | assert_almost_equal(caffe_model.blobs['res2c'].data, o[4]) 294 | assert_almost_equal(caffe_model.blobs['res3a'].data, o[5]) 295 | assert_almost_equal(caffe_model.blobs['res5c'].data, o[6]) 296 | #assert_almost_equal(np.squeeze(caffe_model.blobs['pool5'].data), o[7]) 297 | 298 | print_prob(o[8][0]) 299 | 300 | prob_dist = np.linalg.norm(caffe_model.blobs['prob'].data - o[8]) 301 | print 'prob_dist ', prob_dist 302 | assert prob_dist < 0.2 # XXX can this be tightened? 303 | 304 | # We've already written the metagraph to avoid a bunch of assign ops. 305 | saver.save(sess, checkpoint_fn(layers), write_meta_graph=False) 306 | 307 | 308 | def save_graph(save_path): 309 | graph = tf.get_default_graph() 310 | graph_def = graph.as_graph_def() 311 | print "graph_def byte size", graph_def.ByteSize() 312 | graph_def_s = graph_def.SerializeToString() 313 | 314 | with open(save_path, "wb") as f: 315 | f.write(graph_def_s) 316 | 317 | print "saved model to %s" % save_path 318 | 319 | 320 | def main(_): 321 | img = load_image("data/cat.jpg") 322 | print img 323 | img_p = preprocess(img) 324 | 325 | for layers in [50, 101, 152]: 326 | g = tf.Graph() 327 | with g.as_default(): 328 | print "CONVERT", layers 329 | convert(g, img, img_p, layers) 330 | 331 | 332 | if __name__ == '__main__': 333 | tf.app.run() 334 | -------------------------------------------------------------------------------- /data/ResNet-50-deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "ResNet-50" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | 8 | layer { 9 | bottom: "data" 10 | top: "conv1_conv" 11 | name: "conv1" 12 | type: "Convolution" 13 | convolution_param { 14 | num_output: 64 15 | kernel_size: 7 16 | pad: 3 17 | stride: 2 18 | } 19 | } 20 | 21 | layer { 22 | bottom: "conv1_conv" 23 | top: "conv1" 24 | name: "bn_conv1" 25 | type: "BatchNorm" 26 | batch_norm_param { 27 | use_global_stats: true 28 | } 29 | } 30 | 31 | layer { 32 | bottom: "conv1" 33 | top: "conv1" 34 | name: "scale_conv1" 35 | type: "Scale" 36 | scale_param { 37 | bias_term: true 38 | } 39 | } 40 | 41 | layer { 42 | bottom: "conv1" 43 | top: "conv1" 44 | name: "conv1_relu" 45 | type: "ReLU" 46 | } 47 | 48 | layer { 49 | bottom: "conv1" 50 | top: "pool1" 51 | name: "pool1" 52 | type: "Pooling" 53 | pooling_param { 54 | kernel_size: 3 55 | stride: 2 56 | pool: MAX 57 | } 58 | } 59 | 60 | layer { 61 | bottom: "pool1" 62 | top: "res2a_branch1" 63 | name: "res2a_branch1" 64 | type: "Convolution" 65 | convolution_param { 66 | num_output: 256 67 | kernel_size: 1 68 | pad: 0 69 | stride: 1 70 | bias_term: false 71 | } 72 | } 73 | 74 | layer { 75 | bottom: "res2a_branch1" 76 | top: "res2a_branch1" 77 | name: "bn2a_branch1" 78 | type: "BatchNorm" 79 | batch_norm_param { 80 | use_global_stats: true 81 | } 82 | } 83 | 84 | layer { 85 | bottom: "res2a_branch1" 86 | top: "res2a_branch1" 87 | name: "scale2a_branch1" 88 | type: "Scale" 89 | scale_param { 90 | bias_term: true 91 | } 92 | } 93 | 94 | layer { 95 | bottom: "pool1" 96 | top: "res2a_branch2a" 97 | name: "res2a_branch2a" 98 | type: "Convolution" 99 | convolution_param { 100 | num_output: 64 101 | kernel_size: 1 102 | pad: 0 103 | stride: 1 104 | bias_term: false 105 | } 106 | } 107 | 108 | layer { 109 | bottom: "res2a_branch2a" 110 | top: "res2a_branch2a" 111 | name: "bn2a_branch2a" 112 | type: "BatchNorm" 113 | batch_norm_param { 114 | use_global_stats: true 115 | } 116 | } 117 | 118 | layer { 119 | bottom: "res2a_branch2a" 120 | top: "res2a_branch2a" 121 | name: "scale2a_branch2a" 122 | type: "Scale" 123 | scale_param { 124 | bias_term: true 125 | } 126 | } 127 | 128 | layer { 129 | bottom: "res2a_branch2a" 130 | top: "res2a_branch2a" 131 | name: "res2a_branch2a_relu" 132 | type: "ReLU" 133 | } 134 | 135 | layer { 136 | bottom: "res2a_branch2a" 137 | top: "res2a_branch2b_conv" 138 | name: "res2a_branch2b" 139 | type: "Convolution" 140 | convolution_param { 141 | num_output: 64 142 | kernel_size: 3 143 | pad: 1 144 | stride: 1 145 | bias_term: false 146 | } 147 | } 148 | 149 | layer { 150 | bottom: "res2a_branch2b_conv" 151 | top: "res2a_branch2b" 152 | name: "bn2a_branch2b" 153 | type: "BatchNorm" 154 | batch_norm_param { 155 | use_global_stats: true 156 | } 157 | } 158 | 159 | layer { 160 | bottom: "res2a_branch2b" 161 | top: "res2a_branch2b" 162 | name: "scale2a_branch2b" 163 | type: "Scale" 164 | scale_param { 165 | bias_term: true 166 | } 167 | } 168 | 169 | layer { 170 | bottom: "res2a_branch2b" 171 | top: "res2a_branch2b" 172 | name: "res2a_branch2b_relu" 173 | type: "ReLU" 174 | } 175 | 176 | layer { 177 | bottom: "res2a_branch2b" 178 | top: "res2a_branch2c" 179 | name: "res2a_branch2c" 180 | type: "Convolution" 181 | convolution_param { 182 | num_output: 256 183 | kernel_size: 1 184 | pad: 0 185 | stride: 1 186 | bias_term: false 187 | } 188 | } 189 | 190 | layer { 191 | bottom: "res2a_branch2c" 192 | top: "res2a_branch2c" 193 | name: "bn2a_branch2c" 194 | type: "BatchNorm" 195 | batch_norm_param { 196 | use_global_stats: true 197 | } 198 | } 199 | 200 | layer { 201 | bottom: "res2a_branch2c" 202 | top: "res2a_branch2c" 203 | name: "scale2a_branch2c" 204 | type: "Scale" 205 | scale_param { 206 | bias_term: true 207 | } 208 | } 209 | 210 | layer { 211 | bottom: "res2a_branch1" 212 | bottom: "res2a_branch2c" 213 | top: "res2a" 214 | name: "res2a" 215 | type: "Eltwise" 216 | } 217 | 218 | layer { 219 | bottom: "res2a" 220 | top: "res2a" 221 | name: "res2a_relu" 222 | type: "ReLU" 223 | } 224 | 225 | layer { 226 | bottom: "res2a" 227 | top: "res2b_branch2a" 228 | name: "res2b_branch2a" 229 | type: "Convolution" 230 | convolution_param { 231 | num_output: 64 232 | kernel_size: 1 233 | pad: 0 234 | stride: 1 235 | bias_term: false 236 | } 237 | } 238 | 239 | layer { 240 | bottom: "res2b_branch2a" 241 | top: "res2b_branch2a" 242 | name: "bn2b_branch2a" 243 | type: "BatchNorm" 244 | batch_norm_param { 245 | use_global_stats: true 246 | } 247 | } 248 | 249 | layer { 250 | bottom: "res2b_branch2a" 251 | top: "res2b_branch2a" 252 | name: "scale2b_branch2a" 253 | type: "Scale" 254 | scale_param { 255 | bias_term: true 256 | } 257 | } 258 | 259 | layer { 260 | bottom: "res2b_branch2a" 261 | top: "res2b_branch2a" 262 | name: "res2b_branch2a_relu" 263 | type: "ReLU" 264 | } 265 | 266 | layer { 267 | bottom: "res2b_branch2a" 268 | top: "res2b_branch2b" 269 | name: "res2b_branch2b" 270 | type: "Convolution" 271 | convolution_param { 272 | num_output: 64 273 | kernel_size: 3 274 | pad: 1 275 | stride: 1 276 | bias_term: false 277 | } 278 | } 279 | 280 | layer { 281 | bottom: "res2b_branch2b" 282 | top: "res2b_branch2b" 283 | name: "bn2b_branch2b" 284 | type: "BatchNorm" 285 | batch_norm_param { 286 | use_global_stats: true 287 | } 288 | } 289 | 290 | layer { 291 | bottom: "res2b_branch2b" 292 | top: "res2b_branch2b" 293 | name: "scale2b_branch2b" 294 | type: "Scale" 295 | scale_param { 296 | bias_term: true 297 | } 298 | } 299 | 300 | layer { 301 | bottom: "res2b_branch2b" 302 | top: "res2b_branch2b" 303 | name: "res2b_branch2b_relu" 304 | type: "ReLU" 305 | } 306 | 307 | layer { 308 | bottom: "res2b_branch2b" 309 | top: "res2b_branch2c" 310 | name: "res2b_branch2c" 311 | type: "Convolution" 312 | convolution_param { 313 | num_output: 256 314 | kernel_size: 1 315 | pad: 0 316 | stride: 1 317 | bias_term: false 318 | } 319 | } 320 | 321 | layer { 322 | bottom: "res2b_branch2c" 323 | top: "res2b_branch2c" 324 | name: "bn2b_branch2c" 325 | type: "BatchNorm" 326 | batch_norm_param { 327 | use_global_stats: true 328 | } 329 | } 330 | 331 | layer { 332 | bottom: "res2b_branch2c" 333 | top: "res2b_branch2c" 334 | name: "scale2b_branch2c" 335 | type: "Scale" 336 | scale_param { 337 | bias_term: true 338 | } 339 | } 340 | 341 | layer { 342 | bottom: "res2a" 343 | bottom: "res2b_branch2c" 344 | top: "res2b" 345 | name: "res2b" 346 | type: "Eltwise" 347 | } 348 | 349 | layer { 350 | bottom: "res2b" 351 | top: "res2b" 352 | name: "res2b_relu" 353 | type: "ReLU" 354 | } 355 | 356 | layer { 357 | bottom: "res2b" 358 | top: "res2c_branch2a" 359 | name: "res2c_branch2a" 360 | type: "Convolution" 361 | convolution_param { 362 | num_output: 64 363 | kernel_size: 1 364 | pad: 0 365 | stride: 1 366 | bias_term: false 367 | } 368 | } 369 | 370 | layer { 371 | bottom: "res2c_branch2a" 372 | top: "res2c_branch2a" 373 | name: "bn2c_branch2a" 374 | type: "BatchNorm" 375 | batch_norm_param { 376 | use_global_stats: true 377 | } 378 | } 379 | 380 | layer { 381 | bottom: "res2c_branch2a" 382 | top: "res2c_branch2a" 383 | name: "scale2c_branch2a" 384 | type: "Scale" 385 | scale_param { 386 | bias_term: true 387 | } 388 | } 389 | 390 | layer { 391 | bottom: "res2c_branch2a" 392 | top: "res2c_branch2a" 393 | name: "res2c_branch2a_relu" 394 | type: "ReLU" 395 | } 396 | 397 | layer { 398 | bottom: "res2c_branch2a" 399 | top: "res2c_branch2b" 400 | name: "res2c_branch2b" 401 | type: "Convolution" 402 | convolution_param { 403 | num_output: 64 404 | kernel_size: 3 405 | pad: 1 406 | stride: 1 407 | bias_term: false 408 | } 409 | } 410 | 411 | layer { 412 | bottom: "res2c_branch2b" 413 | top: "res2c_branch2b" 414 | name: "bn2c_branch2b" 415 | type: "BatchNorm" 416 | batch_norm_param { 417 | use_global_stats: true 418 | } 419 | } 420 | 421 | layer { 422 | bottom: "res2c_branch2b" 423 | top: "res2c_branch2b" 424 | name: "scale2c_branch2b" 425 | type: "Scale" 426 | scale_param { 427 | bias_term: true 428 | } 429 | } 430 | 431 | layer { 432 | bottom: "res2c_branch2b" 433 | top: "res2c_branch2b" 434 | name: "res2c_branch2b_relu" 435 | type: "ReLU" 436 | } 437 | 438 | layer { 439 | bottom: "res2c_branch2b" 440 | top: "res2c_branch2c" 441 | name: "res2c_branch2c" 442 | type: "Convolution" 443 | convolution_param { 444 | num_output: 256 445 | kernel_size: 1 446 | pad: 0 447 | stride: 1 448 | bias_term: false 449 | } 450 | } 451 | 452 | layer { 453 | bottom: "res2c_branch2c" 454 | top: "res2c_branch2c" 455 | name: "bn2c_branch2c" 456 | type: "BatchNorm" 457 | batch_norm_param { 458 | use_global_stats: true 459 | } 460 | } 461 | 462 | layer { 463 | bottom: "res2c_branch2c" 464 | top: "res2c_branch2c" 465 | name: "scale2c_branch2c" 466 | type: "Scale" 467 | scale_param { 468 | bias_term: true 469 | } 470 | } 471 | 472 | layer { 473 | bottom: "res2b" 474 | bottom: "res2c_branch2c" 475 | top: "res2c" 476 | name: "res2c" 477 | type: "Eltwise" 478 | } 479 | 480 | layer { 481 | bottom: "res2c" 482 | top: "res2c" 483 | name: "res2c_relu" 484 | type: "ReLU" 485 | } 486 | 487 | layer { 488 | bottom: "res2c" 489 | top: "res3a_branch1" 490 | name: "res3a_branch1" 491 | type: "Convolution" 492 | convolution_param { 493 | num_output: 512 494 | kernel_size: 1 495 | pad: 0 496 | stride: 2 497 | bias_term: false 498 | } 499 | } 500 | 501 | layer { 502 | bottom: "res3a_branch1" 503 | top: "res3a_branch1" 504 | name: "bn3a_branch1" 505 | type: "BatchNorm" 506 | batch_norm_param { 507 | use_global_stats: true 508 | } 509 | } 510 | 511 | layer { 512 | bottom: "res3a_branch1" 513 | top: "res3a_branch1" 514 | name: "scale3a_branch1" 515 | type: "Scale" 516 | scale_param { 517 | bias_term: true 518 | } 519 | } 520 | 521 | layer { 522 | bottom: "res2c" 523 | top: "res3a_branch2a" 524 | name: "res3a_branch2a" 525 | type: "Convolution" 526 | convolution_param { 527 | num_output: 128 528 | kernel_size: 1 529 | pad: 0 530 | stride: 2 531 | bias_term: false 532 | } 533 | } 534 | 535 | layer { 536 | bottom: "res3a_branch2a" 537 | top: "res3a_branch2a" 538 | name: "bn3a_branch2a" 539 | type: "BatchNorm" 540 | batch_norm_param { 541 | use_global_stats: true 542 | } 543 | } 544 | 545 | layer { 546 | bottom: "res3a_branch2a" 547 | top: "res3a_branch2a" 548 | name: "scale3a_branch2a" 549 | type: "Scale" 550 | scale_param { 551 | bias_term: true 552 | } 553 | } 554 | 555 | layer { 556 | bottom: "res3a_branch2a" 557 | top: "res3a_branch2a" 558 | name: "res3a_branch2a_relu" 559 | type: "ReLU" 560 | } 561 | 562 | layer { 563 | bottom: "res3a_branch2a" 564 | top: "res3a_branch2b" 565 | name: "res3a_branch2b" 566 | type: "Convolution" 567 | convolution_param { 568 | num_output: 128 569 | kernel_size: 3 570 | pad: 1 571 | stride: 1 572 | bias_term: false 573 | } 574 | } 575 | 576 | layer { 577 | bottom: "res3a_branch2b" 578 | top: "res3a_branch2b" 579 | name: "bn3a_branch2b" 580 | type: "BatchNorm" 581 | batch_norm_param { 582 | use_global_stats: true 583 | } 584 | } 585 | 586 | layer { 587 | bottom: "res3a_branch2b" 588 | top: "res3a_branch2b" 589 | name: "scale3a_branch2b" 590 | type: "Scale" 591 | scale_param { 592 | bias_term: true 593 | } 594 | } 595 | 596 | layer { 597 | bottom: "res3a_branch2b" 598 | top: "res3a_branch2b" 599 | name: "res3a_branch2b_relu" 600 | type: "ReLU" 601 | } 602 | 603 | layer { 604 | bottom: "res3a_branch2b" 605 | top: "res3a_branch2c" 606 | name: "res3a_branch2c" 607 | type: "Convolution" 608 | convolution_param { 609 | num_output: 512 610 | kernel_size: 1 611 | pad: 0 612 | stride: 1 613 | bias_term: false 614 | } 615 | } 616 | 617 | layer { 618 | bottom: "res3a_branch2c" 619 | top: "res3a_branch2c" 620 | name: "bn3a_branch2c" 621 | type: "BatchNorm" 622 | batch_norm_param { 623 | use_global_stats: true 624 | } 625 | } 626 | 627 | layer { 628 | bottom: "res3a_branch2c" 629 | top: "res3a_branch2c" 630 | name: "scale3a_branch2c" 631 | type: "Scale" 632 | scale_param { 633 | bias_term: true 634 | } 635 | } 636 | 637 | layer { 638 | bottom: "res3a_branch1" 639 | bottom: "res3a_branch2c" 640 | top: "res3a" 641 | name: "res3a" 642 | type: "Eltwise" 643 | } 644 | 645 | layer { 646 | bottom: "res3a" 647 | top: "res3a" 648 | name: "res3a_relu" 649 | type: "ReLU" 650 | } 651 | 652 | layer { 653 | bottom: "res3a" 654 | top: "res3b_branch2a" 655 | name: "res3b_branch2a" 656 | type: "Convolution" 657 | convolution_param { 658 | num_output: 128 659 | kernel_size: 1 660 | pad: 0 661 | stride: 1 662 | bias_term: false 663 | } 664 | } 665 | 666 | layer { 667 | bottom: "res3b_branch2a" 668 | top: "res3b_branch2a" 669 | name: "bn3b_branch2a" 670 | type: "BatchNorm" 671 | batch_norm_param { 672 | use_global_stats: true 673 | } 674 | } 675 | 676 | layer { 677 | bottom: "res3b_branch2a" 678 | top: "res3b_branch2a" 679 | name: "scale3b_branch2a" 680 | type: "Scale" 681 | scale_param { 682 | bias_term: true 683 | } 684 | } 685 | 686 | layer { 687 | bottom: "res3b_branch2a" 688 | top: "res3b_branch2a" 689 | name: "res3b_branch2a_relu" 690 | type: "ReLU" 691 | } 692 | 693 | layer { 694 | bottom: "res3b_branch2a" 695 | top: "res3b_branch2b" 696 | name: "res3b_branch2b" 697 | type: "Convolution" 698 | convolution_param { 699 | num_output: 128 700 | kernel_size: 3 701 | pad: 1 702 | stride: 1 703 | bias_term: false 704 | } 705 | } 706 | 707 | layer { 708 | bottom: "res3b_branch2b" 709 | top: "res3b_branch2b" 710 | name: "bn3b_branch2b" 711 | type: "BatchNorm" 712 | batch_norm_param { 713 | use_global_stats: true 714 | } 715 | } 716 | 717 | layer { 718 | bottom: "res3b_branch2b" 719 | top: "res3b_branch2b" 720 | name: "scale3b_branch2b" 721 | type: "Scale" 722 | scale_param { 723 | bias_term: true 724 | } 725 | } 726 | 727 | layer { 728 | bottom: "res3b_branch2b" 729 | top: "res3b_branch2b" 730 | name: "res3b_branch2b_relu" 731 | type: "ReLU" 732 | } 733 | 734 | layer { 735 | bottom: "res3b_branch2b" 736 | top: "res3b_branch2c" 737 | name: "res3b_branch2c" 738 | type: "Convolution" 739 | convolution_param { 740 | num_output: 512 741 | kernel_size: 1 742 | pad: 0 743 | stride: 1 744 | bias_term: false 745 | } 746 | } 747 | 748 | layer { 749 | bottom: "res3b_branch2c" 750 | top: "res3b_branch2c" 751 | name: "bn3b_branch2c" 752 | type: "BatchNorm" 753 | batch_norm_param { 754 | use_global_stats: true 755 | } 756 | } 757 | 758 | layer { 759 | bottom: "res3b_branch2c" 760 | top: "res3b_branch2c" 761 | name: "scale3b_branch2c" 762 | type: "Scale" 763 | scale_param { 764 | bias_term: true 765 | } 766 | } 767 | 768 | layer { 769 | bottom: "res3a" 770 | bottom: "res3b_branch2c" 771 | top: "res3b" 772 | name: "res3b" 773 | type: "Eltwise" 774 | } 775 | 776 | layer { 777 | bottom: "res3b" 778 | top: "res3b" 779 | name: "res3b_relu" 780 | type: "ReLU" 781 | } 782 | 783 | layer { 784 | bottom: "res3b" 785 | top: "res3c_branch2a" 786 | name: "res3c_branch2a" 787 | type: "Convolution" 788 | convolution_param { 789 | num_output: 128 790 | kernel_size: 1 791 | pad: 0 792 | stride: 1 793 | bias_term: false 794 | } 795 | } 796 | 797 | layer { 798 | bottom: "res3c_branch2a" 799 | top: "res3c_branch2a" 800 | name: "bn3c_branch2a" 801 | type: "BatchNorm" 802 | batch_norm_param { 803 | use_global_stats: true 804 | } 805 | } 806 | 807 | layer { 808 | bottom: "res3c_branch2a" 809 | top: "res3c_branch2a" 810 | name: "scale3c_branch2a" 811 | type: "Scale" 812 | scale_param { 813 | bias_term: true 814 | } 815 | } 816 | 817 | layer { 818 | bottom: "res3c_branch2a" 819 | top: "res3c_branch2a" 820 | name: "res3c_branch2a_relu" 821 | type: "ReLU" 822 | } 823 | 824 | layer { 825 | bottom: "res3c_branch2a" 826 | top: "res3c_branch2b" 827 | name: "res3c_branch2b" 828 | type: "Convolution" 829 | convolution_param { 830 | num_output: 128 831 | kernel_size: 3 832 | pad: 1 833 | stride: 1 834 | bias_term: false 835 | } 836 | } 837 | 838 | layer { 839 | bottom: "res3c_branch2b" 840 | top: "res3c_branch2b" 841 | name: "bn3c_branch2b" 842 | type: "BatchNorm" 843 | batch_norm_param { 844 | use_global_stats: true 845 | } 846 | } 847 | 848 | layer { 849 | bottom: "res3c_branch2b" 850 | top: "res3c_branch2b" 851 | name: "scale3c_branch2b" 852 | type: "Scale" 853 | scale_param { 854 | bias_term: true 855 | } 856 | } 857 | 858 | layer { 859 | bottom: "res3c_branch2b" 860 | top: "res3c_branch2b" 861 | name: "res3c_branch2b_relu" 862 | type: "ReLU" 863 | } 864 | 865 | layer { 866 | bottom: "res3c_branch2b" 867 | top: "res3c_branch2c" 868 | name: "res3c_branch2c" 869 | type: "Convolution" 870 | convolution_param { 871 | num_output: 512 872 | kernel_size: 1 873 | pad: 0 874 | stride: 1 875 | bias_term: false 876 | } 877 | } 878 | 879 | layer { 880 | bottom: "res3c_branch2c" 881 | top: "res3c_branch2c" 882 | name: "bn3c_branch2c" 883 | type: "BatchNorm" 884 | batch_norm_param { 885 | use_global_stats: true 886 | } 887 | } 888 | 889 | layer { 890 | bottom: "res3c_branch2c" 891 | top: "res3c_branch2c" 892 | name: "scale3c_branch2c" 893 | type: "Scale" 894 | scale_param { 895 | bias_term: true 896 | } 897 | } 898 | 899 | layer { 900 | bottom: "res3b" 901 | bottom: "res3c_branch2c" 902 | top: "res3c" 903 | name: "res3c" 904 | type: "Eltwise" 905 | } 906 | 907 | layer { 908 | bottom: "res3c" 909 | top: "res3c" 910 | name: "res3c_relu" 911 | type: "ReLU" 912 | } 913 | 914 | layer { 915 | bottom: "res3c" 916 | top: "res3d_branch2a" 917 | name: "res3d_branch2a" 918 | type: "Convolution" 919 | convolution_param { 920 | num_output: 128 921 | kernel_size: 1 922 | pad: 0 923 | stride: 1 924 | bias_term: false 925 | } 926 | } 927 | 928 | layer { 929 | bottom: "res3d_branch2a" 930 | top: "res3d_branch2a" 931 | name: "bn3d_branch2a" 932 | type: "BatchNorm" 933 | batch_norm_param { 934 | use_global_stats: true 935 | } 936 | } 937 | 938 | layer { 939 | bottom: "res3d_branch2a" 940 | top: "res3d_branch2a" 941 | name: "scale3d_branch2a" 942 | type: "Scale" 943 | scale_param { 944 | bias_term: true 945 | } 946 | } 947 | 948 | layer { 949 | bottom: "res3d_branch2a" 950 | top: "res3d_branch2a" 951 | name: "res3d_branch2a_relu" 952 | type: "ReLU" 953 | } 954 | 955 | layer { 956 | bottom: "res3d_branch2a" 957 | top: "res3d_branch2b" 958 | name: "res3d_branch2b" 959 | type: "Convolution" 960 | convolution_param { 961 | num_output: 128 962 | kernel_size: 3 963 | pad: 1 964 | stride: 1 965 | bias_term: false 966 | } 967 | } 968 | 969 | layer { 970 | bottom: "res3d_branch2b" 971 | top: "res3d_branch2b" 972 | name: "bn3d_branch2b" 973 | type: "BatchNorm" 974 | batch_norm_param { 975 | use_global_stats: true 976 | } 977 | } 978 | 979 | layer { 980 | bottom: "res3d_branch2b" 981 | top: "res3d_branch2b" 982 | name: "scale3d_branch2b" 983 | type: "Scale" 984 | scale_param { 985 | bias_term: true 986 | } 987 | } 988 | 989 | layer { 990 | bottom: "res3d_branch2b" 991 | top: "res3d_branch2b" 992 | name: "res3d_branch2b_relu" 993 | type: "ReLU" 994 | } 995 | 996 | layer { 997 | bottom: "res3d_branch2b" 998 | top: "res3d_branch2c" 999 | name: "res3d_branch2c" 1000 | type: "Convolution" 1001 | convolution_param { 1002 | num_output: 512 1003 | kernel_size: 1 1004 | pad: 0 1005 | stride: 1 1006 | bias_term: false 1007 | } 1008 | } 1009 | 1010 | layer { 1011 | bottom: "res3d_branch2c" 1012 | top: "res3d_branch2c" 1013 | name: "bn3d_branch2c" 1014 | type: "BatchNorm" 1015 | batch_norm_param { 1016 | use_global_stats: true 1017 | } 1018 | } 1019 | 1020 | layer { 1021 | bottom: "res3d_branch2c" 1022 | top: "res3d_branch2c" 1023 | name: "scale3d_branch2c" 1024 | type: "Scale" 1025 | scale_param { 1026 | bias_term: true 1027 | } 1028 | } 1029 | 1030 | layer { 1031 | bottom: "res3c" 1032 | bottom: "res3d_branch2c" 1033 | top: "res3d" 1034 | name: "res3d" 1035 | type: "Eltwise" 1036 | } 1037 | 1038 | layer { 1039 | bottom: "res3d" 1040 | top: "res3d" 1041 | name: "res3d_relu" 1042 | type: "ReLU" 1043 | } 1044 | 1045 | layer { 1046 | bottom: "res3d" 1047 | top: "res4a_branch1" 1048 | name: "res4a_branch1" 1049 | type: "Convolution" 1050 | convolution_param { 1051 | num_output: 1024 1052 | kernel_size: 1 1053 | pad: 0 1054 | stride: 2 1055 | bias_term: false 1056 | } 1057 | } 1058 | 1059 | layer { 1060 | bottom: "res4a_branch1" 1061 | top: "res4a_branch1" 1062 | name: "bn4a_branch1" 1063 | type: "BatchNorm" 1064 | batch_norm_param { 1065 | use_global_stats: true 1066 | } 1067 | } 1068 | 1069 | layer { 1070 | bottom: "res4a_branch1" 1071 | top: "res4a_branch1" 1072 | name: "scale4a_branch1" 1073 | type: "Scale" 1074 | scale_param { 1075 | bias_term: true 1076 | } 1077 | } 1078 | 1079 | layer { 1080 | bottom: "res3d" 1081 | top: "res4a_branch2a" 1082 | name: "res4a_branch2a" 1083 | type: "Convolution" 1084 | convolution_param { 1085 | num_output: 256 1086 | kernel_size: 1 1087 | pad: 0 1088 | stride: 2 1089 | bias_term: false 1090 | } 1091 | } 1092 | 1093 | layer { 1094 | bottom: "res4a_branch2a" 1095 | top: "res4a_branch2a" 1096 | name: "bn4a_branch2a" 1097 | type: "BatchNorm" 1098 | batch_norm_param { 1099 | use_global_stats: true 1100 | } 1101 | } 1102 | 1103 | layer { 1104 | bottom: "res4a_branch2a" 1105 | top: "res4a_branch2a" 1106 | name: "scale4a_branch2a" 1107 | type: "Scale" 1108 | scale_param { 1109 | bias_term: true 1110 | } 1111 | } 1112 | 1113 | layer { 1114 | bottom: "res4a_branch2a" 1115 | top: "res4a_branch2a" 1116 | name: "res4a_branch2a_relu" 1117 | type: "ReLU" 1118 | } 1119 | 1120 | layer { 1121 | bottom: "res4a_branch2a" 1122 | top: "res4a_branch2b" 1123 | name: "res4a_branch2b" 1124 | type: "Convolution" 1125 | convolution_param { 1126 | num_output: 256 1127 | kernel_size: 3 1128 | pad: 1 1129 | stride: 1 1130 | bias_term: false 1131 | } 1132 | } 1133 | 1134 | layer { 1135 | bottom: "res4a_branch2b" 1136 | top: "res4a_branch2b" 1137 | name: "bn4a_branch2b" 1138 | type: "BatchNorm" 1139 | batch_norm_param { 1140 | use_global_stats: true 1141 | } 1142 | } 1143 | 1144 | layer { 1145 | bottom: "res4a_branch2b" 1146 | top: "res4a_branch2b" 1147 | name: "scale4a_branch2b" 1148 | type: "Scale" 1149 | scale_param { 1150 | bias_term: true 1151 | } 1152 | } 1153 | 1154 | layer { 1155 | bottom: "res4a_branch2b" 1156 | top: "res4a_branch2b" 1157 | name: "res4a_branch2b_relu" 1158 | type: "ReLU" 1159 | } 1160 | 1161 | layer { 1162 | bottom: "res4a_branch2b" 1163 | top: "res4a_branch2c" 1164 | name: "res4a_branch2c" 1165 | type: "Convolution" 1166 | convolution_param { 1167 | num_output: 1024 1168 | kernel_size: 1 1169 | pad: 0 1170 | stride: 1 1171 | bias_term: false 1172 | } 1173 | } 1174 | 1175 | layer { 1176 | bottom: "res4a_branch2c" 1177 | top: "res4a_branch2c" 1178 | name: "bn4a_branch2c" 1179 | type: "BatchNorm" 1180 | batch_norm_param { 1181 | use_global_stats: true 1182 | } 1183 | } 1184 | 1185 | layer { 1186 | bottom: "res4a_branch2c" 1187 | top: "res4a_branch2c" 1188 | name: "scale4a_branch2c" 1189 | type: "Scale" 1190 | scale_param { 1191 | bias_term: true 1192 | } 1193 | } 1194 | 1195 | layer { 1196 | bottom: "res4a_branch1" 1197 | bottom: "res4a_branch2c" 1198 | top: "res4a" 1199 | name: "res4a" 1200 | type: "Eltwise" 1201 | } 1202 | 1203 | layer { 1204 | bottom: "res4a" 1205 | top: "res4a" 1206 | name: "res4a_relu" 1207 | type: "ReLU" 1208 | } 1209 | 1210 | layer { 1211 | bottom: "res4a" 1212 | top: "res4b_branch2a" 1213 | name: "res4b_branch2a" 1214 | type: "Convolution" 1215 | convolution_param { 1216 | num_output: 256 1217 | kernel_size: 1 1218 | pad: 0 1219 | stride: 1 1220 | bias_term: false 1221 | } 1222 | } 1223 | 1224 | layer { 1225 | bottom: "res4b_branch2a" 1226 | top: "res4b_branch2a" 1227 | name: "bn4b_branch2a" 1228 | type: "BatchNorm" 1229 | batch_norm_param { 1230 | use_global_stats: true 1231 | } 1232 | } 1233 | 1234 | layer { 1235 | bottom: "res4b_branch2a" 1236 | top: "res4b_branch2a" 1237 | name: "scale4b_branch2a" 1238 | type: "Scale" 1239 | scale_param { 1240 | bias_term: true 1241 | } 1242 | } 1243 | 1244 | layer { 1245 | bottom: "res4b_branch2a" 1246 | top: "res4b_branch2a" 1247 | name: "res4b_branch2a_relu" 1248 | type: "ReLU" 1249 | } 1250 | 1251 | layer { 1252 | bottom: "res4b_branch2a" 1253 | top: "res4b_branch2b" 1254 | name: "res4b_branch2b" 1255 | type: "Convolution" 1256 | convolution_param { 1257 | num_output: 256 1258 | kernel_size: 3 1259 | pad: 1 1260 | stride: 1 1261 | bias_term: false 1262 | } 1263 | } 1264 | 1265 | layer { 1266 | bottom: "res4b_branch2b" 1267 | top: "res4b_branch2b" 1268 | name: "bn4b_branch2b" 1269 | type: "BatchNorm" 1270 | batch_norm_param { 1271 | use_global_stats: true 1272 | } 1273 | } 1274 | 1275 | layer { 1276 | bottom: "res4b_branch2b" 1277 | top: "res4b_branch2b" 1278 | name: "scale4b_branch2b" 1279 | type: "Scale" 1280 | scale_param { 1281 | bias_term: true 1282 | } 1283 | } 1284 | 1285 | layer { 1286 | bottom: "res4b_branch2b" 1287 | top: "res4b_branch2b" 1288 | name: "res4b_branch2b_relu" 1289 | type: "ReLU" 1290 | } 1291 | 1292 | layer { 1293 | bottom: "res4b_branch2b" 1294 | top: "res4b_branch2c" 1295 | name: "res4b_branch2c" 1296 | type: "Convolution" 1297 | convolution_param { 1298 | num_output: 1024 1299 | kernel_size: 1 1300 | pad: 0 1301 | stride: 1 1302 | bias_term: false 1303 | } 1304 | } 1305 | 1306 | layer { 1307 | bottom: "res4b_branch2c" 1308 | top: "res4b_branch2c" 1309 | name: "bn4b_branch2c" 1310 | type: "BatchNorm" 1311 | batch_norm_param { 1312 | use_global_stats: true 1313 | } 1314 | } 1315 | 1316 | layer { 1317 | bottom: "res4b_branch2c" 1318 | top: "res4b_branch2c" 1319 | name: "scale4b_branch2c" 1320 | type: "Scale" 1321 | scale_param { 1322 | bias_term: true 1323 | } 1324 | } 1325 | 1326 | layer { 1327 | bottom: "res4a" 1328 | bottom: "res4b_branch2c" 1329 | top: "res4b" 1330 | name: "res4b" 1331 | type: "Eltwise" 1332 | } 1333 | 1334 | layer { 1335 | bottom: "res4b" 1336 | top: "res4b" 1337 | name: "res4b_relu" 1338 | type: "ReLU" 1339 | } 1340 | 1341 | layer { 1342 | bottom: "res4b" 1343 | top: "res4c_branch2a" 1344 | name: "res4c_branch2a" 1345 | type: "Convolution" 1346 | convolution_param { 1347 | num_output: 256 1348 | kernel_size: 1 1349 | pad: 0 1350 | stride: 1 1351 | bias_term: false 1352 | } 1353 | } 1354 | 1355 | layer { 1356 | bottom: "res4c_branch2a" 1357 | top: "res4c_branch2a" 1358 | name: "bn4c_branch2a" 1359 | type: "BatchNorm" 1360 | batch_norm_param { 1361 | use_global_stats: true 1362 | } 1363 | } 1364 | 1365 | layer { 1366 | bottom: "res4c_branch2a" 1367 | top: "res4c_branch2a" 1368 | name: "scale4c_branch2a" 1369 | type: "Scale" 1370 | scale_param { 1371 | bias_term: true 1372 | } 1373 | } 1374 | 1375 | layer { 1376 | bottom: "res4c_branch2a" 1377 | top: "res4c_branch2a" 1378 | name: "res4c_branch2a_relu" 1379 | type: "ReLU" 1380 | } 1381 | 1382 | layer { 1383 | bottom: "res4c_branch2a" 1384 | top: "res4c_branch2b" 1385 | name: "res4c_branch2b" 1386 | type: "Convolution" 1387 | convolution_param { 1388 | num_output: 256 1389 | kernel_size: 3 1390 | pad: 1 1391 | stride: 1 1392 | bias_term: false 1393 | } 1394 | } 1395 | 1396 | layer { 1397 | bottom: "res4c_branch2b" 1398 | top: "res4c_branch2b" 1399 | name: "bn4c_branch2b" 1400 | type: "BatchNorm" 1401 | batch_norm_param { 1402 | use_global_stats: true 1403 | } 1404 | } 1405 | 1406 | layer { 1407 | bottom: "res4c_branch2b" 1408 | top: "res4c_branch2b" 1409 | name: "scale4c_branch2b" 1410 | type: "Scale" 1411 | scale_param { 1412 | bias_term: true 1413 | } 1414 | } 1415 | 1416 | layer { 1417 | bottom: "res4c_branch2b" 1418 | top: "res4c_branch2b" 1419 | name: "res4c_branch2b_relu" 1420 | type: "ReLU" 1421 | } 1422 | 1423 | layer { 1424 | bottom: "res4c_branch2b" 1425 | top: "res4c_branch2c" 1426 | name: "res4c_branch2c" 1427 | type: "Convolution" 1428 | convolution_param { 1429 | num_output: 1024 1430 | kernel_size: 1 1431 | pad: 0 1432 | stride: 1 1433 | bias_term: false 1434 | } 1435 | } 1436 | 1437 | layer { 1438 | bottom: "res4c_branch2c" 1439 | top: "res4c_branch2c" 1440 | name: "bn4c_branch2c" 1441 | type: "BatchNorm" 1442 | batch_norm_param { 1443 | use_global_stats: true 1444 | } 1445 | } 1446 | 1447 | layer { 1448 | bottom: "res4c_branch2c" 1449 | top: "res4c_branch2c" 1450 | name: "scale4c_branch2c" 1451 | type: "Scale" 1452 | scale_param { 1453 | bias_term: true 1454 | } 1455 | } 1456 | 1457 | layer { 1458 | bottom: "res4b" 1459 | bottom: "res4c_branch2c" 1460 | top: "res4c" 1461 | name: "res4c" 1462 | type: "Eltwise" 1463 | } 1464 | 1465 | layer { 1466 | bottom: "res4c" 1467 | top: "res4c" 1468 | name: "res4c_relu" 1469 | type: "ReLU" 1470 | } 1471 | 1472 | layer { 1473 | bottom: "res4c" 1474 | top: "res4d_branch2a" 1475 | name: "res4d_branch2a" 1476 | type: "Convolution" 1477 | convolution_param { 1478 | num_output: 256 1479 | kernel_size: 1 1480 | pad: 0 1481 | stride: 1 1482 | bias_term: false 1483 | } 1484 | } 1485 | 1486 | layer { 1487 | bottom: "res4d_branch2a" 1488 | top: "res4d_branch2a" 1489 | name: "bn4d_branch2a" 1490 | type: "BatchNorm" 1491 | batch_norm_param { 1492 | use_global_stats: true 1493 | } 1494 | } 1495 | 1496 | layer { 1497 | bottom: "res4d_branch2a" 1498 | top: "res4d_branch2a" 1499 | name: "scale4d_branch2a" 1500 | type: "Scale" 1501 | scale_param { 1502 | bias_term: true 1503 | } 1504 | } 1505 | 1506 | layer { 1507 | bottom: "res4d_branch2a" 1508 | top: "res4d_branch2a" 1509 | name: "res4d_branch2a_relu" 1510 | type: "ReLU" 1511 | } 1512 | 1513 | layer { 1514 | bottom: "res4d_branch2a" 1515 | top: "res4d_branch2b" 1516 | name: "res4d_branch2b" 1517 | type: "Convolution" 1518 | convolution_param { 1519 | num_output: 256 1520 | kernel_size: 3 1521 | pad: 1 1522 | stride: 1 1523 | bias_term: false 1524 | } 1525 | } 1526 | 1527 | layer { 1528 | bottom: "res4d_branch2b" 1529 | top: "res4d_branch2b" 1530 | name: "bn4d_branch2b" 1531 | type: "BatchNorm" 1532 | batch_norm_param { 1533 | use_global_stats: true 1534 | } 1535 | } 1536 | 1537 | layer { 1538 | bottom: "res4d_branch2b" 1539 | top: "res4d_branch2b" 1540 | name: "scale4d_branch2b" 1541 | type: "Scale" 1542 | scale_param { 1543 | bias_term: true 1544 | } 1545 | } 1546 | 1547 | layer { 1548 | bottom: "res4d_branch2b" 1549 | top: "res4d_branch2b" 1550 | name: "res4d_branch2b_relu" 1551 | type: "ReLU" 1552 | } 1553 | 1554 | layer { 1555 | bottom: "res4d_branch2b" 1556 | top: "res4d_branch2c" 1557 | name: "res4d_branch2c" 1558 | type: "Convolution" 1559 | convolution_param { 1560 | num_output: 1024 1561 | kernel_size: 1 1562 | pad: 0 1563 | stride: 1 1564 | bias_term: false 1565 | } 1566 | } 1567 | 1568 | layer { 1569 | bottom: "res4d_branch2c" 1570 | top: "res4d_branch2c" 1571 | name: "bn4d_branch2c" 1572 | type: "BatchNorm" 1573 | batch_norm_param { 1574 | use_global_stats: true 1575 | } 1576 | } 1577 | 1578 | layer { 1579 | bottom: "res4d_branch2c" 1580 | top: "res4d_branch2c" 1581 | name: "scale4d_branch2c" 1582 | type: "Scale" 1583 | scale_param { 1584 | bias_term: true 1585 | } 1586 | } 1587 | 1588 | layer { 1589 | bottom: "res4c" 1590 | bottom: "res4d_branch2c" 1591 | top: "res4d" 1592 | name: "res4d" 1593 | type: "Eltwise" 1594 | } 1595 | 1596 | layer { 1597 | bottom: "res4d" 1598 | top: "res4d" 1599 | name: "res4d_relu" 1600 | type: "ReLU" 1601 | } 1602 | 1603 | layer { 1604 | bottom: "res4d" 1605 | top: "res4e_branch2a" 1606 | name: "res4e_branch2a" 1607 | type: "Convolution" 1608 | convolution_param { 1609 | num_output: 256 1610 | kernel_size: 1 1611 | pad: 0 1612 | stride: 1 1613 | bias_term: false 1614 | } 1615 | } 1616 | 1617 | layer { 1618 | bottom: "res4e_branch2a" 1619 | top: "res4e_branch2a" 1620 | name: "bn4e_branch2a" 1621 | type: "BatchNorm" 1622 | batch_norm_param { 1623 | use_global_stats: true 1624 | } 1625 | } 1626 | 1627 | layer { 1628 | bottom: "res4e_branch2a" 1629 | top: "res4e_branch2a" 1630 | name: "scale4e_branch2a" 1631 | type: "Scale" 1632 | scale_param { 1633 | bias_term: true 1634 | } 1635 | } 1636 | 1637 | layer { 1638 | bottom: "res4e_branch2a" 1639 | top: "res4e_branch2a" 1640 | name: "res4e_branch2a_relu" 1641 | type: "ReLU" 1642 | } 1643 | 1644 | layer { 1645 | bottom: "res4e_branch2a" 1646 | top: "res4e_branch2b" 1647 | name: "res4e_branch2b" 1648 | type: "Convolution" 1649 | convolution_param { 1650 | num_output: 256 1651 | kernel_size: 3 1652 | pad: 1 1653 | stride: 1 1654 | bias_term: false 1655 | } 1656 | } 1657 | 1658 | layer { 1659 | bottom: "res4e_branch2b" 1660 | top: "res4e_branch2b" 1661 | name: "bn4e_branch2b" 1662 | type: "BatchNorm" 1663 | batch_norm_param { 1664 | use_global_stats: true 1665 | } 1666 | } 1667 | 1668 | layer { 1669 | bottom: "res4e_branch2b" 1670 | top: "res4e_branch2b" 1671 | name: "scale4e_branch2b" 1672 | type: "Scale" 1673 | scale_param { 1674 | bias_term: true 1675 | } 1676 | } 1677 | 1678 | layer { 1679 | bottom: "res4e_branch2b" 1680 | top: "res4e_branch2b" 1681 | name: "res4e_branch2b_relu" 1682 | type: "ReLU" 1683 | } 1684 | 1685 | layer { 1686 | bottom: "res4e_branch2b" 1687 | top: "res4e_branch2c" 1688 | name: "res4e_branch2c" 1689 | type: "Convolution" 1690 | convolution_param { 1691 | num_output: 1024 1692 | kernel_size: 1 1693 | pad: 0 1694 | stride: 1 1695 | bias_term: false 1696 | } 1697 | } 1698 | 1699 | layer { 1700 | bottom: "res4e_branch2c" 1701 | top: "res4e_branch2c" 1702 | name: "bn4e_branch2c" 1703 | type: "BatchNorm" 1704 | batch_norm_param { 1705 | use_global_stats: true 1706 | } 1707 | } 1708 | 1709 | layer { 1710 | bottom: "res4e_branch2c" 1711 | top: "res4e_branch2c" 1712 | name: "scale4e_branch2c" 1713 | type: "Scale" 1714 | scale_param { 1715 | bias_term: true 1716 | } 1717 | } 1718 | 1719 | layer { 1720 | bottom: "res4d" 1721 | bottom: "res4e_branch2c" 1722 | top: "res4e" 1723 | name: "res4e" 1724 | type: "Eltwise" 1725 | } 1726 | 1727 | layer { 1728 | bottom: "res4e" 1729 | top: "res4e" 1730 | name: "res4e_relu" 1731 | type: "ReLU" 1732 | } 1733 | 1734 | layer { 1735 | bottom: "res4e" 1736 | top: "res4f_branch2a" 1737 | name: "res4f_branch2a" 1738 | type: "Convolution" 1739 | convolution_param { 1740 | num_output: 256 1741 | kernel_size: 1 1742 | pad: 0 1743 | stride: 1 1744 | bias_term: false 1745 | } 1746 | } 1747 | 1748 | layer { 1749 | bottom: "res4f_branch2a" 1750 | top: "res4f_branch2a" 1751 | name: "bn4f_branch2a" 1752 | type: "BatchNorm" 1753 | batch_norm_param { 1754 | use_global_stats: true 1755 | } 1756 | } 1757 | 1758 | layer { 1759 | bottom: "res4f_branch2a" 1760 | top: "res4f_branch2a" 1761 | name: "scale4f_branch2a" 1762 | type: "Scale" 1763 | scale_param { 1764 | bias_term: true 1765 | } 1766 | } 1767 | 1768 | layer { 1769 | bottom: "res4f_branch2a" 1770 | top: "res4f_branch2a" 1771 | name: "res4f_branch2a_relu" 1772 | type: "ReLU" 1773 | } 1774 | 1775 | layer { 1776 | bottom: "res4f_branch2a" 1777 | top: "res4f_branch2b" 1778 | name: "res4f_branch2b" 1779 | type: "Convolution" 1780 | convolution_param { 1781 | num_output: 256 1782 | kernel_size: 3 1783 | pad: 1 1784 | stride: 1 1785 | bias_term: false 1786 | } 1787 | } 1788 | 1789 | layer { 1790 | bottom: "res4f_branch2b" 1791 | top: "res4f_branch2b" 1792 | name: "bn4f_branch2b" 1793 | type: "BatchNorm" 1794 | batch_norm_param { 1795 | use_global_stats: true 1796 | } 1797 | } 1798 | 1799 | layer { 1800 | bottom: "res4f_branch2b" 1801 | top: "res4f_branch2b" 1802 | name: "scale4f_branch2b" 1803 | type: "Scale" 1804 | scale_param { 1805 | bias_term: true 1806 | } 1807 | } 1808 | 1809 | layer { 1810 | bottom: "res4f_branch2b" 1811 | top: "res4f_branch2b" 1812 | name: "res4f_branch2b_relu" 1813 | type: "ReLU" 1814 | } 1815 | 1816 | layer { 1817 | bottom: "res4f_branch2b" 1818 | top: "res4f_branch2c" 1819 | name: "res4f_branch2c" 1820 | type: "Convolution" 1821 | convolution_param { 1822 | num_output: 1024 1823 | kernel_size: 1 1824 | pad: 0 1825 | stride: 1 1826 | bias_term: false 1827 | } 1828 | } 1829 | 1830 | layer { 1831 | bottom: "res4f_branch2c" 1832 | top: "res4f_branch2c" 1833 | name: "bn4f_branch2c" 1834 | type: "BatchNorm" 1835 | batch_norm_param { 1836 | use_global_stats: true 1837 | } 1838 | } 1839 | 1840 | layer { 1841 | bottom: "res4f_branch2c" 1842 | top: "res4f_branch2c" 1843 | name: "scale4f_branch2c" 1844 | type: "Scale" 1845 | scale_param { 1846 | bias_term: true 1847 | } 1848 | } 1849 | 1850 | layer { 1851 | bottom: "res4e" 1852 | bottom: "res4f_branch2c" 1853 | top: "res4f" 1854 | name: "res4f" 1855 | type: "Eltwise" 1856 | } 1857 | 1858 | layer { 1859 | bottom: "res4f" 1860 | top: "res4f" 1861 | name: "res4f_relu" 1862 | type: "ReLU" 1863 | } 1864 | 1865 | layer { 1866 | bottom: "res4f" 1867 | top: "res5a_branch1" 1868 | name: "res5a_branch1" 1869 | type: "Convolution" 1870 | convolution_param { 1871 | num_output: 2048 1872 | kernel_size: 1 1873 | pad: 0 1874 | stride: 2 1875 | bias_term: false 1876 | } 1877 | } 1878 | 1879 | layer { 1880 | bottom: "res5a_branch1" 1881 | top: "res5a_branch1" 1882 | name: "bn5a_branch1" 1883 | type: "BatchNorm" 1884 | batch_norm_param { 1885 | use_global_stats: true 1886 | } 1887 | } 1888 | 1889 | layer { 1890 | bottom: "res5a_branch1" 1891 | top: "res5a_branch1" 1892 | name: "scale5a_branch1" 1893 | type: "Scale" 1894 | scale_param { 1895 | bias_term: true 1896 | } 1897 | } 1898 | 1899 | layer { 1900 | bottom: "res4f" 1901 | top: "res5a_branch2a" 1902 | name: "res5a_branch2a" 1903 | type: "Convolution" 1904 | convolution_param { 1905 | num_output: 512 1906 | kernel_size: 1 1907 | pad: 0 1908 | stride: 2 1909 | bias_term: false 1910 | } 1911 | } 1912 | 1913 | layer { 1914 | bottom: "res5a_branch2a" 1915 | top: "res5a_branch2a" 1916 | name: "bn5a_branch2a" 1917 | type: "BatchNorm" 1918 | batch_norm_param { 1919 | use_global_stats: true 1920 | } 1921 | } 1922 | 1923 | layer { 1924 | bottom: "res5a_branch2a" 1925 | top: "res5a_branch2a" 1926 | name: "scale5a_branch2a" 1927 | type: "Scale" 1928 | scale_param { 1929 | bias_term: true 1930 | } 1931 | } 1932 | 1933 | layer { 1934 | bottom: "res5a_branch2a" 1935 | top: "res5a_branch2a" 1936 | name: "res5a_branch2a_relu" 1937 | type: "ReLU" 1938 | } 1939 | 1940 | layer { 1941 | bottom: "res5a_branch2a" 1942 | top: "res5a_branch2b" 1943 | name: "res5a_branch2b" 1944 | type: "Convolution" 1945 | convolution_param { 1946 | num_output: 512 1947 | kernel_size: 3 1948 | pad: 1 1949 | stride: 1 1950 | bias_term: false 1951 | } 1952 | } 1953 | 1954 | layer { 1955 | bottom: "res5a_branch2b" 1956 | top: "res5a_branch2b" 1957 | name: "bn5a_branch2b" 1958 | type: "BatchNorm" 1959 | batch_norm_param { 1960 | use_global_stats: true 1961 | } 1962 | } 1963 | 1964 | layer { 1965 | bottom: "res5a_branch2b" 1966 | top: "res5a_branch2b" 1967 | name: "scale5a_branch2b" 1968 | type: "Scale" 1969 | scale_param { 1970 | bias_term: true 1971 | } 1972 | } 1973 | 1974 | layer { 1975 | bottom: "res5a_branch2b" 1976 | top: "res5a_branch2b" 1977 | name: "res5a_branch2b_relu" 1978 | type: "ReLU" 1979 | } 1980 | 1981 | layer { 1982 | bottom: "res5a_branch2b" 1983 | top: "res5a_branch2c" 1984 | name: "res5a_branch2c" 1985 | type: "Convolution" 1986 | convolution_param { 1987 | num_output: 2048 1988 | kernel_size: 1 1989 | pad: 0 1990 | stride: 1 1991 | bias_term: false 1992 | } 1993 | } 1994 | 1995 | layer { 1996 | bottom: "res5a_branch2c" 1997 | top: "res5a_branch2c" 1998 | name: "bn5a_branch2c" 1999 | type: "BatchNorm" 2000 | batch_norm_param { 2001 | use_global_stats: true 2002 | } 2003 | } 2004 | 2005 | layer { 2006 | bottom: "res5a_branch2c" 2007 | top: "res5a_branch2c" 2008 | name: "scale5a_branch2c" 2009 | type: "Scale" 2010 | scale_param { 2011 | bias_term: true 2012 | } 2013 | } 2014 | 2015 | layer { 2016 | bottom: "res5a_branch1" 2017 | bottom: "res5a_branch2c" 2018 | top: "res5a" 2019 | name: "res5a" 2020 | type: "Eltwise" 2021 | } 2022 | 2023 | layer { 2024 | bottom: "res5a" 2025 | top: "res5a" 2026 | name: "res5a_relu" 2027 | type: "ReLU" 2028 | } 2029 | 2030 | layer { 2031 | bottom: "res5a" 2032 | top: "res5b_branch2a" 2033 | name: "res5b_branch2a" 2034 | type: "Convolution" 2035 | convolution_param { 2036 | num_output: 512 2037 | kernel_size: 1 2038 | pad: 0 2039 | stride: 1 2040 | bias_term: false 2041 | } 2042 | } 2043 | 2044 | layer { 2045 | bottom: "res5b_branch2a" 2046 | top: "res5b_branch2a" 2047 | name: "bn5b_branch2a" 2048 | type: "BatchNorm" 2049 | batch_norm_param { 2050 | use_global_stats: true 2051 | } 2052 | } 2053 | 2054 | layer { 2055 | bottom: "res5b_branch2a" 2056 | top: "res5b_branch2a" 2057 | name: "scale5b_branch2a" 2058 | type: "Scale" 2059 | scale_param { 2060 | bias_term: true 2061 | } 2062 | } 2063 | 2064 | layer { 2065 | bottom: "res5b_branch2a" 2066 | top: "res5b_branch2a" 2067 | name: "res5b_branch2a_relu" 2068 | type: "ReLU" 2069 | } 2070 | 2071 | layer { 2072 | bottom: "res5b_branch2a" 2073 | top: "res5b_branch2b" 2074 | name: "res5b_branch2b" 2075 | type: "Convolution" 2076 | convolution_param { 2077 | num_output: 512 2078 | kernel_size: 3 2079 | pad: 1 2080 | stride: 1 2081 | bias_term: false 2082 | } 2083 | } 2084 | 2085 | layer { 2086 | bottom: "res5b_branch2b" 2087 | top: "res5b_branch2b" 2088 | name: "bn5b_branch2b" 2089 | type: "BatchNorm" 2090 | batch_norm_param { 2091 | use_global_stats: true 2092 | } 2093 | } 2094 | 2095 | layer { 2096 | bottom: "res5b_branch2b" 2097 | top: "res5b_branch2b" 2098 | name: "scale5b_branch2b" 2099 | type: "Scale" 2100 | scale_param { 2101 | bias_term: true 2102 | } 2103 | } 2104 | 2105 | layer { 2106 | bottom: "res5b_branch2b" 2107 | top: "res5b_branch2b" 2108 | name: "res5b_branch2b_relu" 2109 | type: "ReLU" 2110 | } 2111 | 2112 | layer { 2113 | bottom: "res5b_branch2b" 2114 | top: "res5b_branch2c" 2115 | name: "res5b_branch2c" 2116 | type: "Convolution" 2117 | convolution_param { 2118 | num_output: 2048 2119 | kernel_size: 1 2120 | pad: 0 2121 | stride: 1 2122 | bias_term: false 2123 | } 2124 | } 2125 | 2126 | layer { 2127 | bottom: "res5b_branch2c" 2128 | top: "res5b_branch2c" 2129 | name: "bn5b_branch2c" 2130 | type: "BatchNorm" 2131 | batch_norm_param { 2132 | use_global_stats: true 2133 | } 2134 | } 2135 | 2136 | layer { 2137 | bottom: "res5b_branch2c" 2138 | top: "res5b_branch2c" 2139 | name: "scale5b_branch2c" 2140 | type: "Scale" 2141 | scale_param { 2142 | bias_term: true 2143 | } 2144 | } 2145 | 2146 | layer { 2147 | bottom: "res5a" 2148 | bottom: "res5b_branch2c" 2149 | top: "res5b" 2150 | name: "res5b" 2151 | type: "Eltwise" 2152 | } 2153 | 2154 | layer { 2155 | bottom: "res5b" 2156 | top: "res5b" 2157 | name: "res5b_relu" 2158 | type: "ReLU" 2159 | } 2160 | 2161 | layer { 2162 | bottom: "res5b" 2163 | top: "res5c_branch2a" 2164 | name: "res5c_branch2a" 2165 | type: "Convolution" 2166 | convolution_param { 2167 | num_output: 512 2168 | kernel_size: 1 2169 | pad: 0 2170 | stride: 1 2171 | bias_term: false 2172 | } 2173 | } 2174 | 2175 | layer { 2176 | bottom: "res5c_branch2a" 2177 | top: "res5c_branch2a" 2178 | name: "bn5c_branch2a" 2179 | type: "BatchNorm" 2180 | batch_norm_param { 2181 | use_global_stats: true 2182 | } 2183 | } 2184 | 2185 | layer { 2186 | bottom: "res5c_branch2a" 2187 | top: "res5c_branch2a" 2188 | name: "scale5c_branch2a" 2189 | type: "Scale" 2190 | scale_param { 2191 | bias_term: true 2192 | } 2193 | } 2194 | 2195 | layer { 2196 | bottom: "res5c_branch2a" 2197 | top: "res5c_branch2a" 2198 | name: "res5c_branch2a_relu" 2199 | type: "ReLU" 2200 | } 2201 | 2202 | layer { 2203 | bottom: "res5c_branch2a" 2204 | top: "res5c_branch2b" 2205 | name: "res5c_branch2b" 2206 | type: "Convolution" 2207 | convolution_param { 2208 | num_output: 512 2209 | kernel_size: 3 2210 | pad: 1 2211 | stride: 1 2212 | bias_term: false 2213 | } 2214 | } 2215 | 2216 | layer { 2217 | bottom: "res5c_branch2b" 2218 | top: "res5c_branch2b" 2219 | name: "bn5c_branch2b" 2220 | type: "BatchNorm" 2221 | batch_norm_param { 2222 | use_global_stats: true 2223 | } 2224 | } 2225 | 2226 | layer { 2227 | bottom: "res5c_branch2b" 2228 | top: "res5c_branch2b" 2229 | name: "scale5c_branch2b" 2230 | type: "Scale" 2231 | scale_param { 2232 | bias_term: true 2233 | } 2234 | } 2235 | 2236 | layer { 2237 | bottom: "res5c_branch2b" 2238 | top: "res5c_branch2b" 2239 | name: "res5c_branch2b_relu" 2240 | type: "ReLU" 2241 | } 2242 | 2243 | layer { 2244 | bottom: "res5c_branch2b" 2245 | top: "res5c_branch2c" 2246 | name: "res5c_branch2c" 2247 | type: "Convolution" 2248 | convolution_param { 2249 | num_output: 2048 2250 | kernel_size: 1 2251 | pad: 0 2252 | stride: 1 2253 | bias_term: false 2254 | } 2255 | } 2256 | 2257 | layer { 2258 | bottom: "res5c_branch2c" 2259 | top: "res5c_branch2c" 2260 | name: "bn5c_branch2c" 2261 | type: "BatchNorm" 2262 | batch_norm_param { 2263 | use_global_stats: true 2264 | } 2265 | } 2266 | 2267 | layer { 2268 | bottom: "res5c_branch2c" 2269 | top: "res5c_branch2c" 2270 | name: "scale5c_branch2c" 2271 | type: "Scale" 2272 | scale_param { 2273 | bias_term: true 2274 | } 2275 | } 2276 | 2277 | layer { 2278 | bottom: "res5b" 2279 | bottom: "res5c_branch2c" 2280 | top: "res5c" 2281 | name: "res5c" 2282 | type: "Eltwise" 2283 | } 2284 | 2285 | layer { 2286 | bottom: "res5c" 2287 | top: "res5c" 2288 | name: "res5c_relu" 2289 | type: "ReLU" 2290 | } 2291 | 2292 | layer { 2293 | bottom: "res5c" 2294 | top: "pool5" 2295 | name: "pool5" 2296 | type: "Pooling" 2297 | pooling_param { 2298 | kernel_size: 7 2299 | stride: 1 2300 | pool: AVE 2301 | } 2302 | } 2303 | 2304 | layer { 2305 | bottom: "pool5" 2306 | top: "fc1000" 2307 | name: "fc1000" 2308 | type: "InnerProduct" 2309 | inner_product_param { 2310 | num_output: 1000 2311 | } 2312 | } 2313 | 2314 | layer { 2315 | bottom: "fc1000" 2316 | top: "prob" 2317 | name: "prob" 2318 | type: "Softmax" 2319 | } 2320 | 2321 | -------------------------------------------------------------------------------- /data/ResNet_mean.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IreneLini94/tensorflow-resnet/af81a30eb29d95c8319136e3bf62f024f9ce4865/data/ResNet_mean.binaryproto -------------------------------------------------------------------------------- /data/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IreneLini94/tensorflow-resnet/af81a30eb29d95c8319136e3bf62f024f9ce4865/data/cat.jpg -------------------------------------------------------------------------------- /data/tensorflow-resnet-pretrained-20160509.tar.gz.torrent: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IreneLini94/tensorflow-resnet/af81a30eb29d95c8319136e3bf62f024f9ce4865/data/tensorflow-resnet-pretrained-20160509.tar.gz.torrent -------------------------------------------------------------------------------- /forward.py: -------------------------------------------------------------------------------- 1 | from convert import print_prob, load_image, checkpoint_fn, meta_fn 2 | import tensorflow as tf 3 | 4 | layers = 50 5 | 6 | img = load_image("data/cat.jpg") 7 | 8 | sess = tf.Session() 9 | 10 | new_saver = tf.train.import_meta_graph(meta_fn(layers)) 11 | new_saver.restore(sess, checkpoint_fn(layers)) 12 | 13 | graph = tf.get_default_graph() 14 | prob_tensor = graph.get_tensor_by_name("prob:0") 15 | images = graph.get_tensor_by_name("images:0") 16 | for op in graph.get_operations(): 17 | print op.name 18 | 19 | #init = tf.initialize_all_variables() 20 | #sess.run(init) 21 | print "graph restored" 22 | 23 | batch = img.reshape((1, 224, 224, 3)) 24 | 25 | feed_dict = {images: batch} 26 | 27 | prob = sess.run(prob_tensor, feed_dict=feed_dict) 28 | 29 | print_prob(prob[0]) 30 | -------------------------------------------------------------------------------- /guess.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | from resnet import * 3 | import tensorflow as tf 4 | import sys 5 | import csv 6 | import os 7 | from resnet import inference 8 | from utils import ProgressBar, ImageCoder, make_multi_image_batch 9 | from image_processing import image_preprocessing 10 | import math 11 | import random 12 | MAX_BATCH_SZ = 32 13 | RESIZE_FINAL = 224 14 | FLAGS = tf.app.flags.FLAGS 15 | tf.app.flags.DEFINE_string('data_dir', '../val/4212_1025/test_imgs/', 16 | """test images Directory """) 17 | tf.app.flags.DEFINE_string('model_dir', '', 18 | "model saved directory") 19 | tf.app.flags.DEFINE_string('ckpt_file', 'model.ckpt-401', 20 | "checkpoint file saved path ") 21 | tf.app.flags.DEFINE_string('target','result_401.csv', 22 | 'CSV file containing the filename processed along with best guess and score') 23 | label_list = ['side','frontal'] 24 | def main(argv=None): 25 | with tf.Session() as sess: 26 | data_dir = FLAGS.data_dir 27 | files = [os.path.join(data_dir, item) for item in os.listdir(data_dir) ] 28 | # files = random.sample(files, 800) 29 | images = tf.placeholder(tf.float32, [None,RESIZE_FINAL,RESIZE_FINAL,3]) 30 | logits = inference(images, False, 31 | num_classes=2, 32 | num_blocks=[3, 4, 6, 3], # defaults to 50-layer network 33 | use_bias=False, # defaults to using batch norm 34 | bottleneck=True) 35 | init = tf.global_variables_initializer() 36 | resnet_variables = tf.global_variables() 37 | saver = tf.train.Saver(resnet_variables) 38 | saver.restore(sess, os.path.join(FLAGS.model_dir, FLAGS.ckpt_file)) 39 | 40 | softmax_output = tf.nn.softmax(logits) 41 | if FLAGS.target: 42 | print('Creating output file %s' %FLAGS.target) 43 | output = open(os.path.join(FLAGS.data_dir,FLAGS.target), 'w') 44 | writer = csv.writer(output) 45 | writer.writerow(('file', 'label', 'score')) 46 | 47 | num_batches = int(math.ceil(len(files)) / MAX_BATCH_SZ) 48 | pg = ProgressBar(num_batches) 49 | # try: 50 | for j in range(num_batches): 51 | start_offset = j * MAX_BATCH_SZ 52 | end_offset = min((j+1)*MAX_BATCH_SZ, len(files)) 53 | 54 | batch_image_files = files[start_offset:end_offset] 55 | images_ = [] 56 | for file in batch_image_files: 57 | print file 58 | image_buffer = tf.read_file(file) 59 | bbox = [] 60 | image = image_preprocessing(image_buffer, [], False) 61 | images_.append(image) 62 | image_batch = tf.stack(images_) 63 | batch_results = sess.run(softmax_output, feed_dict={images:image_batch.eval()}) 64 | batch_sz = batch_results.shape[0] 65 | 66 | for i in range(batch_sz): 67 | output_i = batch_results[i] 68 | best_i = np.argmax(output_i) 69 | 70 | best_choice = (label_list[best_i], output_i[best_i]) 71 | if writer is not None: 72 | f = batch_image_files[i] 73 | writer.writerow((f, best_choice[0], '%.2f' %best_choice[1])) 74 | pg.update() 75 | pg.done() 76 | # except Exception as e: 77 | # print(e) 78 | # print('Failed to run all images') 79 | 80 | if __name__ == "__main__": 81 | tf.app.run() 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /image_processing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Read and preprocess image data. 16 | 17 | Image processing occurs on a single image at a time. Image are read and 18 | preprocessed in pararllel across mulitple threads. The resulting images 19 | are concatenated together to form a single batch for training or evaluation. 20 | 21 | -- Provide processed image data for a network: 22 | inputs: Construct batches of evaluation examples of images. 23 | distorted_inputs: Construct batches of training examples of images. 24 | batch_inputs: Construct batches of training or evaluation examples of images. 25 | 26 | -- Data processing: 27 | parse_example_proto: Parses an Example proto containing a training example 28 | of an image. 29 | 30 | -- Image decoding: 31 | decode_jpeg: Decode a JPEG encoded string into a 3-D float32 Tensor. 32 | 33 | -- Image preprocessing: 34 | image_preprocessing: Decode and preprocess one image for evaluation or training 35 | distort_image: Distort one image for training a network. 36 | eval_image: Prepare one image for evaluation. 37 | distort_color: Distort the color in one image for training. 38 | """ 39 | from __future__ import absolute_import 40 | from __future__ import division 41 | from __future__ import print_function 42 | 43 | import tensorflow as tf 44 | 45 | FLAGS = tf.app.flags.FLAGS 46 | 47 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, 48 | """Number of preprocessing threads per tower. """ 49 | """Please make this a multiple of 4.""") 50 | tf.app.flags.DEFINE_integer('num_readers', 4, 51 | """Number of parallel readers during train.""") 52 | 53 | # Images are preprocessed asynchronously using multiple threads specifed by 54 | # --num_preprocss_threads and the resulting processed images are stored in a 55 | # random shuffling queue. The shuffling queue dequeues --batch_size images 56 | # for processing on a given Inception tower. A larger shuffling queue guarantees 57 | # better mixing across examples within a batch and results in slightly higher 58 | # predictive performance in a trained model. Empirically, 59 | # --input_queue_memory_factor=16 works well. A value of 16 implies a queue size 60 | # of 1024*16 images. Assuming RGB 299x299 images, this implies a queue size of 61 | # 16GB. If the machine is memory limited, then decrease this factor to 62 | # decrease the CPU memory footprint, accordingly. 63 | tf.app.flags.DEFINE_integer( 64 | 'input_queue_memory_factor', 16, 65 | """Size of the queue of preprocessed images. """ 66 | """Default is ideal but try smaller values, e.g. """ 67 | """4, 2 or 1, if host memory is constrained. See """ 68 | """comments in code for more details.""") 69 | 70 | 71 | def inputs(dataset, batch_size=None, num_preprocess_threads=None): 72 | """Generate batches of ImageNet images for evaluation. 73 | 74 | Use this function as the inputs for evaluating a network. 75 | 76 | Note that some (minimal) image preprocessing occurs during evaluation 77 | including central cropping and resizing of the image to fit the network. 78 | 79 | Args: 80 | dataset: instance of Dataset class specifying the dataset. 81 | batch_size: integer, number of examples in batch 82 | num_preprocess_threads: integer, total number of preprocessing threads but 83 | None defaults to FLAGS.num_preprocess_threads. 84 | 85 | Returns: 86 | images: Images. 4D tensor of size [batch_size, FLAGS.image_size, 87 | image_size, 3]. 88 | labels: 1-D integer Tensor of [FLAGS.batch_size]. 89 | """ 90 | if not batch_size: 91 | batch_size = FLAGS.batch_size 92 | 93 | # Force all input processing onto CPU in order to reserve the GPU for 94 | # the forward inference and back-propagation. 95 | with tf.device('/cpu:0'): 96 | images, labels = batch_inputs( 97 | dataset, 98 | batch_size, 99 | train=False, 100 | num_preprocess_threads=num_preprocess_threads, 101 | num_readers=1) 102 | 103 | return images, labels 104 | 105 | 106 | def decode_jpeg(image_buffer, scope=None): 107 | """Decode a JPEG string into one 3-D float image Tensor. 108 | 109 | Args: 110 | image_buffer: scalar string Tensor. 111 | scope: Optional scope for name_scope. 112 | Returns: 113 | 3-D float Tensor with values ranging from [0, 1). 114 | """ 115 | with tf.name_scope( scope, 'decode_jpeg'): 116 | # Decode the string as an RGB JPEG. 117 | # Note that the resulting image contains an unknown height and width 118 | # that is set dynamically by decode_jpeg. In other words, the height 119 | # and width of image is unknown at compile-time. 120 | image = tf.image.decode_jpeg(image_buffer, channels=3) 121 | 122 | # After this point, all image pixels reside in [0,1) 123 | # until the very end, when they're rescaled to (-1, 1). The various 124 | # adjust_* ops all require this range for dtype float. 125 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 126 | return image 127 | 128 | 129 | def distort_color(image, thread_id=0, scope=None): 130 | """Distort the color of the image. 131 | 132 | Each color distortion is non-commutative and thus ordering of the color ops 133 | matters. Ideally we would randomly permute the ordering of the color ops. 134 | Rather then adding that level of complication, we select a distinct ordering 135 | of color ops for each preprocessing thread. 136 | 137 | Args: 138 | image: Tensor containing single image. 139 | thread_id: preprocessing thread ID. 140 | scope: Optional scope for name_scope. 141 | Returns: 142 | color-distorted image 143 | """ 144 | with tf.name_scope(scope, 'distort_color'): 145 | color_ordering = thread_id % 2 146 | 147 | if color_ordering == 0: 148 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 149 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 150 | image = tf.image.random_hue(image, max_delta=0.2) 151 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 152 | elif color_ordering == 1: 153 | image = tf.image.random_brightness(image, max_delta=32. / 255.) 154 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 155 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 156 | image = tf.image.random_hue(image, max_delta=0.2) 157 | 158 | # The random_* ops do not necessarily clamp. 159 | image = tf.clip_by_value(image, 0.0, 1.0) 160 | return image 161 | 162 | 163 | def distort_image(image, height, width, bbox, thread_id=0, scope=None): 164 | """Distort one image for training a network. 165 | 166 | Distorting images provides a useful technique for augmenting the data 167 | set during training in order to make the network invariant to aspects 168 | of the image that do not effect the label. 169 | 170 | Args: 171 | image: 3-D float Tensor of image 172 | height: integer 173 | width: integer 174 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 175 | where each coordinate is [0, 1) and the coordinates are arranged 176 | as [ymin, xmin, ymax, xmax]. 177 | thread_id: integer indicating the preprocessing thread. 178 | scope: Optional scope for name_scope. 179 | Returns: 180 | 3-D float Tensor of distorted image used for training. 181 | """ 182 | with tf.name_scope(scope, 'distort_image'): 183 | 184 | # NOTE(ry) I unceremoniously removed all the bounding box code. 185 | # Original here: https://github.com/tensorflow/models/blob/148a15fb043dacdd1595eb4c5267705fbd362c6a/inception/inception/image_processing.py 186 | 187 | distorted_image = image 188 | 189 | # This resizing operation may distort the images because the aspect 190 | # ratio is not respected. We select a resize method in a round robin 191 | # fashion based on the thread number. 192 | # Note that ResizeMethod contains 4 enumerated resizing methods. 193 | resize_method = thread_id % 4 194 | distorted_image = tf.image.resize_images(distorted_image, size=(height,width), method=resize_method) 195 | # Restore the shape since the dynamic slice based upon the bbox_size loses 196 | # the third dimension. 197 | distorted_image.set_shape([height, width, 3]) 198 | if not thread_id: 199 | tf.summary.image('cropped_resized_image', 200 | tf.expand_dims(distorted_image, 0)) 201 | 202 | # Randomly flip the image horizontally. 203 | distorted_image = tf.image.random_flip_left_right(distorted_image) 204 | 205 | # Randomly distort the colors. 206 | distorted_image = distort_color(distorted_image, thread_id) 207 | 208 | if not thread_id: 209 | tf.summary.image('final_distorted_image', 210 | tf.expand_dims(distorted_image, 0)) 211 | return distorted_image 212 | 213 | 214 | def eval_image(image, height, width, scope=None): 215 | """Prepare one image for evaluation. 216 | 217 | Args: 218 | image: 3-D float Tensor 219 | height: integer 220 | width: integer 221 | scope: Optional scope for name_scope. 222 | Returns: 223 | 3-D float Tensor of prepared image. 224 | """ 225 | # with tf.name_scope([image, height, width], scope, 'eval_image'): 226 | with tf.name_scope(scope, 'eval_image'): 227 | # Crop the central region of the image with an area containing 87.5% of 228 | # the original image. 229 | image = tf.image.central_crop(image, central_fraction=0.875) 230 | 231 | # Resize the image to the original height and width. 232 | image = tf.expand_dims(image, 0) 233 | image = tf.image.resize_bilinear(image, [height, width], 234 | align_corners=False) 235 | image = tf.squeeze(image, [0]) 236 | return image 237 | 238 | 239 | def image_preprocessing(image_buffer, bbox, train, thread_id=0): 240 | """Decode and preprocess one image for evaluation or training. 241 | 242 | Args: 243 | image_buffer: JPEG encoded string Tensor 244 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 245 | where each coordinate is [0, 1) and the coordinates are arranged as 246 | [ymin, xmin, ymax, xmax]. 247 | train: boolean 248 | thread_id: integer indicating preprocessing thread 249 | 250 | Returns: 251 | 3-D float Tensor containing an appropriately scaled image 252 | 253 | Raises: 254 | ValueError: if user does not provide bounding box 255 | """ 256 | if bbox is None: 257 | raise ValueError('Please supply a bounding box.') 258 | 259 | image = decode_jpeg(image_buffer) 260 | height = FLAGS.input_size 261 | width = FLAGS.input_size 262 | 263 | if train: 264 | image = distort_image(image, height, width, bbox, thread_id) 265 | else: 266 | image = eval_image(image, height, width) 267 | 268 | # Finally, rescale to [-1,1] instead of [0, 1) 269 | image = tf.subtract(image, 0.5) 270 | image = tf.multiply(image, 2.0) 271 | return image 272 | 273 | 274 | def parse_example_proto(example_serialized): 275 | """Parses an Example proto containing a training example of an image. 276 | 277 | The output of the build_image_data.py image preprocessing script is a dataset 278 | containing serialized Example protocol buffers. Each Example proto contains 279 | the following fields: 280 | 281 | image/height: 462 282 | image/width: 581 283 | image/colorspace: 'RGB' 284 | image/channels: 3 285 | image/class/label: 615 286 | image/class/synset: 'n03623198' 287 | image/class/text: 'knee pad' 288 | image/object/bbox/xmin: 0.1 289 | image/object/bbox/xmax: 0.9 290 | image/object/bbox/ymin: 0.2 291 | image/object/bbox/ymax: 0.6 292 | image/object/bbox/label: 615 293 | image/format: 'JPEG' 294 | image/filename: 'ILSVRC2012_val_00041207.JPEG' 295 | image/encoded: 296 | 297 | Args: 298 | example_serialized: scalar Tensor tf.string containing a serialized 299 | Example protocol buffer. 300 | 301 | Returns: 302 | filename: Tensor tf.string containing the filename 303 | label: Tensor tf.int32 containing the label. 304 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 305 | where each coordinate is [0, 1) and the coordinates are arranged as 306 | [ymin, xmin, ymax, xmax]. 307 | text: Tensor tf.string containing the human-readable label. 308 | """ 309 | # Dense features in Example proto. 310 | feature_map = { 311 | 'image/filename': tf.FixedLenFeature( 312 | [], dtype=tf.string, default_value=''), 313 | 'image/class/label': tf.FixedLenFeature( 314 | [1], dtype=tf.int64, default_value=-1), 315 | 'image/class/text': tf.FixedLenFeature( 316 | [], dtype=tf.string, default_value=''), 317 | } 318 | sparse_float32 = tf.VarLenFeature(dtype=tf.float32) 319 | # Sparse features in Example proto. 320 | feature_map.update({k: sparse_float32 321 | for k in 322 | ['image/object/bbox/xmin', 'image/object/bbox/ymin', 323 | 'image/object/bbox/xmax', 'image/object/bbox/ymax']}) 324 | 325 | features = tf.parse_single_example(example_serialized, feature_map) 326 | label = tf.cast(features['image/class/label'], dtype=tf.int32) 327 | 328 | xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) 329 | ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) 330 | xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) 331 | ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) 332 | 333 | # Note that we impose an ordering of (y, x) just to make life difficult. 334 | bbox = tf.concat(0, [ymin, xmin, ymax, xmax]) 335 | 336 | # Force the variable number of bounding boxes into the shape 337 | # [1, num_boxes, coords]. 338 | bbox = tf.expand_dims(bbox, 0) 339 | bbox = tf.transpose(bbox, [0, 2, 1]) 340 | 341 | return features['image/filename'], label, bbox, features['image/class/text'] 342 | 343 | 344 | def batch_inputs(dataset, 345 | batch_size, 346 | train, 347 | num_preprocess_threads=None, 348 | num_readers=1): 349 | """Contruct batches of training or evaluation examples from the image dataset. 350 | 351 | Args: 352 | dataset: instance of Dataset class specifying the dataset. 353 | See dataset.py for details. 354 | batch_size: integer 355 | train: boolean 356 | num_preprocess_threads: integer, total number of preprocessing threads 357 | num_readers: integer, number of parallel readers 358 | 359 | Returns: 360 | images: 4-D float Tensor of a batch of images 361 | labels: 1-D integer Tensor of [batch_size]. 362 | 363 | Raises: 364 | ValueError: if data is not found 365 | """ 366 | with tf.name_scope('batch_processing'): 367 | data_files = dataset.data_files() 368 | if data_files is None: 369 | raise ValueError('No data files found for this dataset') 370 | 371 | # Create filename_queue 372 | if train: 373 | filename_queue = tf.train.string_input_producer( 374 | data_files, shuffle=True, capacity=16) 375 | else: 376 | filename_queue = tf.train.string_input_producer(data_files, 377 | shuffle=False, 378 | capacity=1) 379 | if num_preprocess_threads is None: 380 | num_preprocess_threads = FLAGS.num_preprocess_threads 381 | 382 | if num_preprocess_threads % 4: 383 | raise ValueError('Please make num_preprocess_threads a multiple ' 384 | 'of 4 (%d % 4 != 0).', num_preprocess_threads) 385 | 386 | if num_readers is None: 387 | num_readers = FLAGS.num_readers 388 | 389 | if num_readers < 1: 390 | raise ValueError('Please make num_readers at least 1') 391 | 392 | # Approximate number of examples per shard. 393 | examples_per_shard = 1024 394 | # Size the random shuffle queue to balance between good global 395 | # mixing (more examples) and memory use (fewer examples). 396 | # 1 image uses 299*299*3*4 bytes = 1MB 397 | # The default input_queue_memory_factor is 16 implying a shuffling queue 398 | # size: examples_per_shard * 16 * 1MB = 17.6GB 399 | min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor 400 | if train: 401 | examples_queue = tf.RandomShuffleQueue( 402 | capacity=min_queue_examples + 3 * batch_size, 403 | min_after_dequeue=min_queue_examples, 404 | dtypes=[tf.string]) 405 | else: 406 | examples_queue = tf.FIFOQueue( 407 | capacity=examples_per_shard + 3 * batch_size, 408 | dtypes=[tf.string]) 409 | 410 | reader = tf.TFRecordReader() 411 | _, example_serialized = reader.read(filename_queue) 412 | filename, label_index, bbox, label_text = parse_example_proto(example_serialized) 413 | 414 | fn = FLAGS.data_dir + '/' + label_text + '/' + filename 415 | 416 | examples_qr = tf.train.queue_runner.QueueRunner(examples_queue, 417 | [examples_queue.enqueue([fn])]) 418 | tf.train.queue_runner.add_queue_runner(examples_qr) 419 | 420 | images_and_labels = [] 421 | for thread_id in range(num_preprocess_threads): 422 | # Parse a serialized Example proto to extract the image and metadata. 423 | 424 | whole_file_reader = tf.WholeFileReader() 425 | _, image_buffer = whole_file_reader.read(examples_queue) 426 | 427 | image = image_preprocessing(image_buffer, bbox, train, thread_id) 428 | images_and_labels.append([image, label_index]) 429 | 430 | images, label_index_batch = tf.train.batch_join( 431 | images_and_labels, 432 | batch_size=batch_size, 433 | capacity=2 * num_preprocess_threads * batch_size) 434 | 435 | # Reshape images into these desired dimensions. 436 | height = FLAGS.image_size 437 | width = FLAGS.image_size 438 | depth = 3 439 | 440 | images = tf.cast(images, tf.float32) 441 | images = tf.reshape(images, shape=[batch_size, height, width, depth]) 442 | 443 | # Display the training images in the visualizer. 444 | tf.image_summary('images', images) 445 | 446 | return images, tf.reshape(label_index_batch, [batch_size]) 447 | -------------------------------------------------------------------------------- /resnet.py: -------------------------------------------------------------------------------- 1 | import skimage.io # bug. need to import this before tensorflow 2 | import skimage.transform # bug. need to import this before tensorflow 3 | import tensorflow as tf 4 | from tensorflow.python.ops import control_flow_ops 5 | from tensorflow.python.training import moving_averages 6 | 7 | from config import Config 8 | 9 | import datetime 10 | import numpy as np 11 | import os 12 | import time 13 | 14 | MOVING_AVERAGE_DECAY = 0.9997 15 | BN_DECAY = MOVING_AVERAGE_DECAY 16 | BN_EPSILON = 0.001 17 | CONV_WEIGHT_DECAY = 0.00004 18 | CONV_WEIGHT_STDDEV = 0.1 19 | FC_WEIGHT_DECAY = 0.00004 20 | FC_WEIGHT_STDDEV = 0.01 21 | RESNET_VARIABLES = 'resnet_variables' 22 | UPDATE_OPS_COLLECTION = 'resnet_update_ops' # must be grouped with training op 23 | IMAGENET_MEAN_BGR = [103.062623801, 115.902882574, 123.151630838, ] 24 | 25 | tf.app.flags.DEFINE_integer('input_size', 224, "input image size") 26 | 27 | 28 | activation = tf.nn.relu 29 | 30 | 31 | def inference(x, is_training, 32 | num_classes=1000, 33 | num_blocks=[3, 4, 6, 3], # defaults to 50-layer network 34 | use_bias=False, # defaults to using batch norm 35 | bottleneck=True): 36 | c = Config() 37 | c['bottleneck'] = bottleneck 38 | c['is_training'] = tf.convert_to_tensor(is_training, 39 | dtype='bool', 40 | name='is_training') 41 | c['ksize'] = 3 42 | c['stride'] = 1 43 | c['use_bias'] = use_bias 44 | c['fc_units_out'] = num_classes 45 | c['num_blocks'] = num_blocks 46 | c['stack_stride'] = 2 47 | 48 | with tf.variable_scope('scale1'): 49 | c['conv_filters_out'] = 64 50 | c['ksize'] = 7 51 | c['stride'] = 2 52 | x = conv(x, c) 53 | x = bn(x, c) 54 | x = activation(x) 55 | 56 | with tf.variable_scope('scale2'): 57 | x = _max_pool(x, ksize=3, stride=2) 58 | c['num_blocks'] = num_blocks[0] 59 | c['stack_stride'] = 1 60 | c['block_filters_internal'] = 64 61 | x = stack(x, c) 62 | 63 | with tf.variable_scope('scale3'): 64 | c['num_blocks'] = num_blocks[1] 65 | c['block_filters_internal'] = 128 66 | assert c['stack_stride'] == 2 67 | x = stack(x, c) 68 | 69 | with tf.variable_scope('scale4'): 70 | c['num_blocks'] = num_blocks[2] 71 | c['block_filters_internal'] = 256 72 | x = stack(x, c) 73 | 74 | with tf.variable_scope('scale5'): 75 | c['num_blocks'] = num_blocks[3] 76 | c['block_filters_internal'] = 512 77 | x = stack(x, c) 78 | 79 | # post-net 80 | x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") 81 | 82 | if num_classes != None: 83 | with tf.variable_scope('fc'): 84 | x = fc(x, c) 85 | 86 | return x 87 | 88 | 89 | # This is what they use for CIFAR-10 and 100. 90 | # See Section 4.2 in http://arxiv.org/abs/1512.03385 91 | def inference_small(x, 92 | is_training, 93 | num_blocks=3, # 6n+2 total weight layers will be used. 94 | use_bias=False, # defaults to using batch norm 95 | num_classes=10): 96 | c = Config() 97 | c['is_training'] = tf.convert_to_tensor(is_training, 98 | dtype='bool', 99 | name='is_training') 100 | c['use_bias'] = use_bias 101 | c['fc_units_out'] = num_classes 102 | c['num_blocks'] = num_blocks 103 | c['num_classes'] = num_classes 104 | inference_small_config(x, c) 105 | 106 | def inference_small_config(x, c): 107 | c['bottleneck'] = False 108 | c['ksize'] = 3 109 | c['stride'] = 1 110 | with tf.variable_scope('scale1'): 111 | c['conv_filters_out'] = 16 112 | c['block_filters_internal'] = 16 113 | c['stack_stride'] = 1 114 | x = conv(x, c) 115 | x = bn(x, c) 116 | x = activation(x) 117 | x = stack(x, c) 118 | 119 | with tf.variable_scope('scale2'): 120 | c['block_filters_internal'] = 32 121 | c['stack_stride'] = 2 122 | x = stack(x, c) 123 | 124 | with tf.variable_scope('scale3'): 125 | c['block_filters_internal'] = 64 126 | c['stack_stride'] = 2 127 | x = stack(x, c) 128 | 129 | # post-net 130 | x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") 131 | 132 | if c['num_classes'] != None: 133 | with tf.variable_scope('fc'): 134 | x = fc(x, c) 135 | 136 | return x 137 | 138 | 139 | def _imagenet_preprocess(rgb): 140 | """Changes RGB [0,1] valued image to BGR [0,255] with mean subtracted.""" 141 | red, green, blue = tf.split(3, 3, rgb * 255.0) 142 | bgr = tf.concat(3, [blue, green, red]) 143 | bgr -= IMAGENET_MEAN_BGR 144 | return bgr 145 | 146 | 147 | def loss(logits, labels): 148 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) 149 | cross_entropy_mean = tf.reduce_mean(cross_entropy) 150 | 151 | regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 152 | 153 | loss_ = tf.add_n([cross_entropy_mean] + regularization_losses) 154 | tf.scalar_summary('loss', loss_) 155 | 156 | return loss_ 157 | 158 | 159 | def stack(x, c): 160 | for n in range(c['num_blocks']): 161 | s = c['stack_stride'] if n == 0 else 1 162 | c['block_stride'] = s 163 | with tf.variable_scope('block%d' % (n + 1)): 164 | x = block(x, c) 165 | return x 166 | 167 | 168 | def block(x, c): 169 | filters_in = x.get_shape()[-1] 170 | 171 | # Note: filters_out isn't how many filters are outputed. 172 | # That is the case when bottleneck=False but when bottleneck is 173 | # True, filters_internal*4 filters are outputted. filters_internal is how many filters 174 | # the 3x3 convs output internally. 175 | m = 4 if c['bottleneck'] else 1 176 | filters_out = m * c['block_filters_internal'] 177 | 178 | shortcut = x # branch 1 179 | 180 | c['conv_filters_out'] = c['block_filters_internal'] 181 | 182 | if c['bottleneck']: 183 | with tf.variable_scope('a'): 184 | c['ksize'] = 1 185 | c['stride'] = c['block_stride'] 186 | x = conv(x, c) 187 | x = bn(x, c) 188 | x = activation(x) 189 | 190 | with tf.variable_scope('b'): 191 | x = conv(x, c) 192 | x = bn(x, c) 193 | x = activation(x) 194 | 195 | with tf.variable_scope('c'): 196 | c['conv_filters_out'] = filters_out 197 | c['ksize'] = 1 198 | assert c['stride'] == 1 199 | x = conv(x, c) 200 | x = bn(x, c) 201 | else: 202 | with tf.variable_scope('A'): 203 | c['stride'] = c['block_stride'] 204 | assert c['ksize'] == 3 205 | x = conv(x, c) 206 | x = bn(x, c) 207 | x = activation(x) 208 | 209 | with tf.variable_scope('B'): 210 | c['conv_filters_out'] = filters_out 211 | assert c['ksize'] == 3 212 | assert c['stride'] == 1 213 | x = conv(x, c) 214 | x = bn(x, c) 215 | 216 | with tf.variable_scope('shortcut'): 217 | if filters_out != filters_in or c['block_stride'] != 1: 218 | c['ksize'] = 1 219 | c['stride'] = c['block_stride'] 220 | c['conv_filters_out'] = filters_out 221 | shortcut = conv(shortcut, c) 222 | shortcut = bn(shortcut, c) 223 | 224 | return activation(x + shortcut) 225 | 226 | 227 | def bn(x, c): 228 | x_shape = x.get_shape() 229 | params_shape = x_shape[-1:] 230 | 231 | if c['use_bias']: 232 | bias = _get_variable('bias', params_shape, 233 | initializer=tf.zeros_initializer) 234 | return x + bias 235 | 236 | 237 | axis = list(range(len(x_shape) - 1)) 238 | 239 | beta = _get_variable('beta', 240 | params_shape, 241 | initializer=tf.zeros_initializer) 242 | gamma = _get_variable('gamma', 243 | params_shape, 244 | initializer=tf.ones_initializer) 245 | 246 | moving_mean = _get_variable('moving_mean', 247 | params_shape, 248 | initializer=tf.zeros_initializer, 249 | trainable=False) 250 | moving_variance = _get_variable('moving_variance', 251 | params_shape, 252 | initializer=tf.ones_initializer, 253 | trainable=False) 254 | 255 | # These ops will only be preformed when training. 256 | mean, variance = tf.nn.moments(x, axis) 257 | update_moving_mean = moving_averages.assign_moving_average(moving_mean, 258 | mean, BN_DECAY) 259 | update_moving_variance = moving_averages.assign_moving_average( 260 | moving_variance, variance, BN_DECAY) 261 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) 262 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) 263 | 264 | mean, variance = control_flow_ops.cond( 265 | c['is_training'], lambda: (mean, variance), 266 | lambda: (moving_mean, moving_variance)) 267 | 268 | x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, BN_EPSILON) 269 | #x.set_shape(inputs.get_shape()) ?? 270 | 271 | return x 272 | 273 | 274 | def fc(x, c): 275 | num_units_in = x.get_shape()[1] 276 | num_units_out = c['fc_units_out'] 277 | weights_initializer = tf.truncated_normal_initializer( 278 | stddev=FC_WEIGHT_STDDEV) 279 | 280 | weights = _get_variable('weights', 281 | shape=[num_units_in, num_units_out], 282 | initializer=weights_initializer, 283 | weight_decay=FC_WEIGHT_STDDEV) 284 | biases = _get_variable('biases', 285 | shape=[num_units_out], 286 | initializer=tf.zeros_initializer) 287 | x = tf.nn.xw_plus_b(x, weights, biases) 288 | return x 289 | 290 | 291 | def _get_variable(name, 292 | shape, 293 | initializer, 294 | weight_decay=0.0, 295 | dtype='float', 296 | trainable=True): 297 | "A little wrapper around tf.get_variable to do weight decay and add to" 298 | "resnet collection" 299 | if weight_decay > 0: 300 | regularizer = tf.contrib.layers.l2_regularizer(weight_decay) 301 | else: 302 | regularizer = None 303 | collections = [tf.GraphKeys.VARIABLES, RESNET_VARIABLES] 304 | return tf.get_variable(name, 305 | shape=shape, 306 | initializer=initializer, 307 | dtype=dtype, 308 | regularizer=regularizer, 309 | collections=collections, 310 | trainable=trainable) 311 | 312 | 313 | def conv(x, c): 314 | ksize = c['ksize'] 315 | stride = c['stride'] 316 | filters_out = c['conv_filters_out'] 317 | 318 | filters_in = x.get_shape()[-1] 319 | shape = [ksize, ksize, filters_in, filters_out] 320 | initializer = tf.truncated_normal_initializer(stddev=CONV_WEIGHT_STDDEV) 321 | weights = _get_variable('weights', 322 | shape=shape, 323 | dtype='float', 324 | initializer=initializer, 325 | weight_decay=CONV_WEIGHT_DECAY) 326 | return tf.nn.conv2d(x, weights, [1, stride, stride, 1], padding='SAME') 327 | 328 | 329 | def _max_pool(x, ksize=3, stride=2): 330 | return tf.nn.max_pool(x, 331 | ksize=[1, ksize, ksize, 1], 332 | strides=[1, stride, stride, 1], 333 | padding='SAME') 334 | -------------------------------------------------------------------------------- /resnet_train.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | from resnet import * 3 | import tensorflow as tf 4 | import sys 5 | import os 6 | 7 | MOMENTUM = 0.9 8 | 9 | FLAGS = tf.app.flags.FLAGS 10 | tf.app.flags.DEFINE_string('train_dir', '', 11 | """Directory where to write event logs """ 12 | """and checkpoint.""") 13 | tf.app.flags.DEFINE_string('model_dir', '', 14 | "model saved directory") 15 | tf.app.flags.DEFINE_string('ckpt_file', 'ResNet-L50.ckpt', 16 | "checkpoint file saved path ") 17 | tf.app.flags.DEFINE_float('learning_rate', 0.01, "learning rate.") 18 | tf.app.flags.DEFINE_integer('batch_size', 32, "batch size") 19 | tf.app.flags.DEFINE_integer('max_steps', 20000, "max steps") 20 | tf.app.flags.DEFINE_boolean('resume', True, 21 | 'resume from latest saved state') 22 | tf.app.flags.DEFINE_boolean('minimal_summaries', True, 23 | 'produce fewer summaries to save HD space') 24 | 25 | 26 | def top_k_error(predictions, labels, k): 27 | batch_size = float(FLAGS.batch_size) #tf.shape(predictions)[0] 28 | in_top1 = tf.to_float(tf.nn.in_top_k(predictions, labels, k=1)) 29 | num_correct = tf.reduce_sum(in_top1) 30 | return (batch_size - num_correct) / batch_size 31 | 32 | def eval(logits, images, labels): 33 | pass 34 | def train(is_training, logits, images, labels): 35 | run_dir = '%s/run-%d' %(FLAGS.train_dir, os.getpid()) 36 | global_step = tf.get_variable('global_step', [], 37 | initializer=tf.constant_initializer(0), 38 | trainable=False) 39 | val_step = tf.get_variable('val_step', [], 40 | initializer=tf.constant_initializer(0), 41 | trainable=False) 42 | 43 | loss_ = loss(logits, labels) 44 | predictions = tf.nn.softmax(logits) 45 | 46 | top1_error = top_k_error(predictions, labels, 1) 47 | 48 | 49 | # loss_avg 50 | ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) 51 | tf.add_to_collection(UPDATE_OPS_COLLECTION, ema.apply([loss_])) 52 | tf.summary.scalar('loss_avg', ema.average(loss_)) 53 | 54 | # validation stats 55 | ema = tf.train.ExponentialMovingAverage(0.9, val_step) 56 | val_op = tf.group(val_step.assign_add(1), ema.apply([top1_error])) 57 | top1_error_avg = ema.average(top1_error) 58 | tf.summary.scalar('val_top1_error_avg', top1_error_avg) 59 | 60 | tf.summary.scalar('learning_rate', FLAGS.learning_rate) 61 | 62 | opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, MOMENTUM) 63 | grads = opt.compute_gradients(loss_) 64 | for grad, var in grads: 65 | if grad is not None and not FLAGS.minimal_summaries: 66 | tf.histogram_summary(var.op.name + '/gradients', grad) 67 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 68 | 69 | if not FLAGS.minimal_summaries: 70 | # Display the training images in the visualizer. 71 | tf.image_summary('images', images) 72 | 73 | for var in tf.trainable_variables(): 74 | tf.histogram_summary(var.op.name, var) 75 | 76 | batchnorm_updates = tf.get_collection(UPDATE_OPS_COLLECTION) 77 | batchnorm_updates_op = tf.group(*batchnorm_updates) 78 | train_op = tf.group(apply_gradient_op, batchnorm_updates_op) 79 | 80 | resnet_variables = tf.trainable_variables() 81 | saver1 = tf.train.Saver(resnet_variables) 82 | # 以下变量不需要从训练好的模型中恢复 83 | # resnet_variables = filter(lambda x:"val_step" not in x.name, resnet_variables) 84 | # resnet_variables = filter(lambda x:"Momentum" not in x.name, resnet_variables) 85 | # resnet_variables = filter(lambda x:"biased" not in x.name, resnet_variables) 86 | # resnet_variables = filter(lambda x:"local_step" not in x.name, resnet_variables) 87 | # resnet_variables = filter(lambda x:"global_step" not in x.name, resnet_variables) 88 | # resnet_variables = filter(lambda x:"ExponentialMovingAverage" not in x.name, resnet_variables) 89 | resnet_variables = filter(lambda x:"fc" not in x.name, resnet_variables) 90 | 91 | saver2 = tf.train.Saver(resnet_variables) 92 | 93 | summary_op = tf.summary.merge_all() 94 | 95 | init = tf.global_variables_initializer() 96 | 97 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 98 | sess.run(init) 99 | tf.train.start_queue_runners(sess=sess) 100 | 101 | summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) 102 | 103 | if FLAGS.resume: 104 | # resnet_variables = tf.get_collection(tf.GraphKeys.VARIABLES,scope="scale1") 105 | # resnet_variables.remove("val_step") 106 | # restorer = tf.train.Saver(resnet_variables) 107 | saver2.restore(sess, os.path.join(FLAGS.model_dir, FLAGS.ckpt_file)) 108 | """ 109 | latest = tf.train.latest_checkpoint(FLAGS.model_dir) 110 | if not latest: 111 | print "No checkpoint to continue from in", FLAGS.model_dir 112 | sys.exit(1) 113 | print "resume", latest 114 | saver.restore(sess, latest) 115 | """ 116 | for x in xrange(FLAGS.max_steps + 1): 117 | start_time = time.time() 118 | 119 | step = sess.run(global_step) 120 | i = [train_op, loss_] 121 | 122 | write_summary = step % 100 and step > 1 123 | if write_summary: 124 | i.append(summary_op) 125 | 126 | o = sess.run(i,{is_training:True}) 127 | 128 | loss_value = o[1] 129 | 130 | duration = time.time() - start_time 131 | 132 | assert not np.isnan(loss_value), 'Model diverged with loss = NaN' 133 | 134 | if step % 5 == 0: 135 | examples_per_sec = FLAGS.batch_size / float(duration) 136 | format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f ' 137 | 'sec/batch)') 138 | print(format_str % (step, loss_value, examples_per_sec, duration)) 139 | 140 | if write_summary: 141 | summary_str = o[2] 142 | summary_writer.add_summary(summary_str, step) 143 | 144 | # Save the model checkpoint periodically. 145 | if step > 1 and step % 100 == 0: 146 | checkpoint_path = os.path.join(run_dir, 'model.ckpt') 147 | saver1.save(sess, checkpoint_path, global_step=global_step) 148 | 149 | # Run validation periodically 150 | if step > 1 and step % 100 == 0: 151 | _, top1_error_value = sess.run([val_op, top1_error], { is_training: False }) 152 | print('Validation top1 error %.2f' % top1_error_value) 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /synset.py: -------------------------------------------------------------------------------- 1 | synset = [ 2 | "n01440764 tench, Tinca tinca", 3 | "n01443537 goldfish, Carassius auratus", 4 | "n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias", 5 | "n01491361 tiger shark, Galeocerdo cuvieri", 6 | "n01494475 hammerhead, hammerhead shark", 7 | "n01496331 electric ray, crampfish, numbfish, torpedo", 8 | "n01498041 stingray", 9 | "n01514668 cock", 10 | "n01514859 hen", 11 | "n01518878 ostrich, Struthio camelus", 12 | "n01530575 brambling, Fringilla montifringilla", 13 | "n01531178 goldfinch, Carduelis carduelis", 14 | "n01532829 house finch, linnet, Carpodacus mexicanus", 15 | "n01534433 junco, snowbird", 16 | "n01537544 indigo bunting, indigo finch, indigo bird, Passerina cyanea", 17 | "n01558993 robin, American robin, Turdus migratorius", 18 | "n01560419 bulbul", 19 | "n01580077 jay", 20 | "n01582220 magpie", 21 | "n01592084 chickadee", 22 | "n01601694 water ouzel, dipper", 23 | "n01608432 kite", 24 | "n01614925 bald eagle, American eagle, Haliaeetus leucocephalus", 25 | "n01616318 vulture", 26 | "n01622779 great grey owl, great gray owl, Strix nebulosa", 27 | "n01629819 European fire salamander, Salamandra salamandra", 28 | "n01630670 common newt, Triturus vulgaris", 29 | "n01631663 eft", 30 | "n01632458 spotted salamander, Ambystoma maculatum", 31 | "n01632777 axolotl, mud puppy, Ambystoma mexicanum", 32 | "n01641577 bullfrog, Rana catesbeiana", 33 | "n01644373 tree frog, tree-frog", 34 | "n01644900 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", 35 | "n01664065 loggerhead, loggerhead turtle, Caretta caretta", 36 | "n01665541 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", 37 | "n01667114 mud turtle", 38 | "n01667778 terrapin", 39 | "n01669191 box turtle, box tortoise", 40 | "n01675722 banded gecko", 41 | "n01677366 common iguana, iguana, Iguana iguana", 42 | "n01682714 American chameleon, anole, Anolis carolinensis", 43 | "n01685808 whiptail, whiptail lizard", 44 | "n01687978 agama", 45 | "n01688243 frilled lizard, Chlamydosaurus kingi", 46 | "n01689811 alligator lizard", 47 | "n01692333 Gila monster, Heloderma suspectum", 48 | "n01693334 green lizard, Lacerta viridis", 49 | "n01694178 African chameleon, Chamaeleo chamaeleon", 50 | "n01695060 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis", 51 | "n01697457 African crocodile, Nile crocodile, Crocodylus niloticus", 52 | "n01698640 American alligator, Alligator mississipiensis", 53 | "n01704323 triceratops", 54 | "n01728572 thunder snake, worm snake, Carphophis amoenus", 55 | "n01728920 ringneck snake, ring-necked snake, ring snake", 56 | "n01729322 hognose snake, puff adder, sand viper", 57 | "n01729977 green snake, grass snake", 58 | "n01734418 king snake, kingsnake", 59 | "n01735189 garter snake, grass snake", 60 | "n01737021 water snake", 61 | "n01739381 vine snake", 62 | "n01740131 night snake, Hypsiglena torquata", 63 | "n01742172 boa constrictor, Constrictor constrictor", 64 | "n01744401 rock python, rock snake, Python sebae", 65 | "n01748264 Indian cobra, Naja naja", 66 | "n01749939 green mamba", 67 | "n01751748 sea snake", 68 | "n01753488 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", 69 | "n01755581 diamondback, diamondback rattlesnake, Crotalus adamanteus", 70 | "n01756291 sidewinder, horned rattlesnake, Crotalus cerastes", 71 | "n01768244 trilobite", 72 | "n01770081 harvestman, daddy longlegs, Phalangium opilio", 73 | "n01770393 scorpion", 74 | "n01773157 black and gold garden spider, Argiope aurantia", 75 | "n01773549 barn spider, Araneus cavaticus", 76 | "n01773797 garden spider, Aranea diademata", 77 | "n01774384 black widow, Latrodectus mactans", 78 | "n01774750 tarantula", 79 | "n01775062 wolf spider, hunting spider", 80 | "n01776313 tick", 81 | "n01784675 centipede", 82 | "n01795545 black grouse", 83 | "n01796340 ptarmigan", 84 | "n01797886 ruffed grouse, partridge, Bonasa umbellus", 85 | "n01798484 prairie chicken, prairie grouse, prairie fowl", 86 | "n01806143 peacock", 87 | "n01806567 quail", 88 | "n01807496 partridge", 89 | "n01817953 African grey, African gray, Psittacus erithacus", 90 | "n01818515 macaw", 91 | "n01819313 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", 92 | "n01820546 lorikeet", 93 | "n01824575 coucal", 94 | "n01828970 bee eater", 95 | "n01829413 hornbill", 96 | "n01833805 hummingbird", 97 | "n01843065 jacamar", 98 | "n01843383 toucan", 99 | "n01847000 drake", 100 | "n01855032 red-breasted merganser, Mergus serrator", 101 | "n01855672 goose", 102 | "n01860187 black swan, Cygnus atratus", 103 | "n01871265 tusker", 104 | "n01872401 echidna, spiny anteater, anteater", 105 | "n01873310 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus", 106 | "n01877812 wallaby, brush kangaroo", 107 | "n01882714 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus", 108 | "n01883070 wombat", 109 | "n01910747 jellyfish", 110 | "n01914609 sea anemone, anemone", 111 | "n01917289 brain coral", 112 | "n01924916 flatworm, platyhelminth", 113 | "n01930112 nematode, nematode worm, roundworm", 114 | "n01943899 conch", 115 | "n01944390 snail", 116 | "n01945685 slug", 117 | "n01950731 sea slug, nudibranch", 118 | "n01955084 chiton, coat-of-mail shell, sea cradle, polyplacophore", 119 | "n01968897 chambered nautilus, pearly nautilus, nautilus", 120 | "n01978287 Dungeness crab, Cancer magister", 121 | "n01978455 rock crab, Cancer irroratus", 122 | "n01980166 fiddler crab", 123 | "n01981276 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica", 124 | "n01983481 American lobster, Northern lobster, Maine lobster, Homarus americanus", 125 | "n01984695 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish", 126 | "n01985128 crayfish, crawfish, crawdad, crawdaddy", 127 | "n01986214 hermit crab", 128 | "n01990800 isopod", 129 | "n02002556 white stork, Ciconia ciconia", 130 | "n02002724 black stork, Ciconia nigra", 131 | "n02006656 spoonbill", 132 | "n02007558 flamingo", 133 | "n02009229 little blue heron, Egretta caerulea", 134 | "n02009912 American egret, great white heron, Egretta albus", 135 | "n02011460 bittern", 136 | "n02012849 crane", 137 | "n02013706 limpkin, Aramus pictus", 138 | "n02017213 European gallinule, Porphyrio porphyrio", 139 | "n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana", 140 | "n02018795 bustard", 141 | "n02025239 ruddy turnstone, Arenaria interpres", 142 | "n02027492 red-backed sandpiper, dunlin, Erolia alpina", 143 | "n02028035 redshank, Tringa totanus", 144 | "n02033041 dowitcher", 145 | "n02037110 oystercatcher, oyster catcher", 146 | "n02051845 pelican", 147 | "n02056570 king penguin, Aptenodytes patagonica", 148 | "n02058221 albatross, mollymawk", 149 | "n02066245 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus", 150 | "n02071294 killer whale, killer, orca, grampus, sea wolf, Orcinus orca", 151 | "n02074367 dugong, Dugong dugon", 152 | "n02077923 sea lion", 153 | "n02085620 Chihuahua", 154 | "n02085782 Japanese spaniel", 155 | "n02085936 Maltese dog, Maltese terrier, Maltese", 156 | "n02086079 Pekinese, Pekingese, Peke", 157 | "n02086240 Shih-Tzu", 158 | "n02086646 Blenheim spaniel", 159 | "n02086910 papillon", 160 | "n02087046 toy terrier", 161 | "n02087394 Rhodesian ridgeback", 162 | "n02088094 Afghan hound, Afghan", 163 | "n02088238 basset, basset hound", 164 | "n02088364 beagle", 165 | "n02088466 bloodhound, sleuthhound", 166 | "n02088632 bluetick", 167 | "n02089078 black-and-tan coonhound", 168 | "n02089867 Walker hound, Walker foxhound", 169 | "n02089973 English foxhound", 170 | "n02090379 redbone", 171 | "n02090622 borzoi, Russian wolfhound", 172 | "n02090721 Irish wolfhound", 173 | "n02091032 Italian greyhound", 174 | "n02091134 whippet", 175 | "n02091244 Ibizan hound, Ibizan Podenco", 176 | "n02091467 Norwegian elkhound, elkhound", 177 | "n02091635 otterhound, otter hound", 178 | "n02091831 Saluki, gazelle hound", 179 | "n02092002 Scottish deerhound, deerhound", 180 | "n02092339 Weimaraner", 181 | "n02093256 Staffordshire bullterrier, Staffordshire bull terrier", 182 | "n02093428 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier", 183 | "n02093647 Bedlington terrier", 184 | "n02093754 Border terrier", 185 | "n02093859 Kerry blue terrier", 186 | "n02093991 Irish terrier", 187 | "n02094114 Norfolk terrier", 188 | "n02094258 Norwich terrier", 189 | "n02094433 Yorkshire terrier", 190 | "n02095314 wire-haired fox terrier", 191 | "n02095570 Lakeland terrier", 192 | "n02095889 Sealyham terrier, Sealyham", 193 | "n02096051 Airedale, Airedale terrier", 194 | "n02096177 cairn, cairn terrier", 195 | "n02096294 Australian terrier", 196 | "n02096437 Dandie Dinmont, Dandie Dinmont terrier", 197 | "n02096585 Boston bull, Boston terrier", 198 | "n02097047 miniature schnauzer", 199 | "n02097130 giant schnauzer", 200 | "n02097209 standard schnauzer", 201 | "n02097298 Scotch terrier, Scottish terrier, Scottie", 202 | "n02097474 Tibetan terrier, chrysanthemum dog", 203 | "n02097658 silky terrier, Sydney silky", 204 | "n02098105 soft-coated wheaten terrier", 205 | "n02098286 West Highland white terrier", 206 | "n02098413 Lhasa, Lhasa apso", 207 | "n02099267 flat-coated retriever", 208 | "n02099429 curly-coated retriever", 209 | "n02099601 golden retriever", 210 | "n02099712 Labrador retriever", 211 | "n02099849 Chesapeake Bay retriever", 212 | "n02100236 German short-haired pointer", 213 | "n02100583 vizsla, Hungarian pointer", 214 | "n02100735 English setter", 215 | "n02100877 Irish setter, red setter", 216 | "n02101006 Gordon setter", 217 | "n02101388 Brittany spaniel", 218 | "n02101556 clumber, clumber spaniel", 219 | "n02102040 English springer, English springer spaniel", 220 | "n02102177 Welsh springer spaniel", 221 | "n02102318 cocker spaniel, English cocker spaniel, cocker", 222 | "n02102480 Sussex spaniel", 223 | "n02102973 Irish water spaniel", 224 | "n02104029 kuvasz", 225 | "n02104365 schipperke", 226 | "n02105056 groenendael", 227 | "n02105162 malinois", 228 | "n02105251 briard", 229 | "n02105412 kelpie", 230 | "n02105505 komondor", 231 | "n02105641 Old English sheepdog, bobtail", 232 | "n02105855 Shetland sheepdog, Shetland sheep dog, Shetland", 233 | "n02106030 collie", 234 | "n02106166 Border collie", 235 | "n02106382 Bouvier des Flandres, Bouviers des Flandres", 236 | "n02106550 Rottweiler", 237 | "n02106662 German shepherd, German shepherd dog, German police dog, alsatian", 238 | "n02107142 Doberman, Doberman pinscher", 239 | "n02107312 miniature pinscher", 240 | "n02107574 Greater Swiss Mountain dog", 241 | "n02107683 Bernese mountain dog", 242 | "n02107908 Appenzeller", 243 | "n02108000 EntleBucher", 244 | "n02108089 boxer", 245 | "n02108422 bull mastiff", 246 | "n02108551 Tibetan mastiff", 247 | "n02108915 French bulldog", 248 | "n02109047 Great Dane", 249 | "n02109525 Saint Bernard, St Bernard", 250 | "n02109961 Eskimo dog, husky", 251 | "n02110063 malamute, malemute, Alaskan malamute", 252 | "n02110185 Siberian husky", 253 | "n02110341 dalmatian, coach dog, carriage dog", 254 | "n02110627 affenpinscher, monkey pinscher, monkey dog", 255 | "n02110806 basenji", 256 | "n02110958 pug, pug-dog", 257 | "n02111129 Leonberg", 258 | "n02111277 Newfoundland, Newfoundland dog", 259 | "n02111500 Great Pyrenees", 260 | "n02111889 Samoyed, Samoyede", 261 | "n02112018 Pomeranian", 262 | "n02112137 chow, chow chow", 263 | "n02112350 keeshond", 264 | "n02112706 Brabancon griffon", 265 | "n02113023 Pembroke, Pembroke Welsh corgi", 266 | "n02113186 Cardigan, Cardigan Welsh corgi", 267 | "n02113624 toy poodle", 268 | "n02113712 miniature poodle", 269 | "n02113799 standard poodle", 270 | "n02113978 Mexican hairless", 271 | "n02114367 timber wolf, grey wolf, gray wolf, Canis lupus", 272 | "n02114548 white wolf, Arctic wolf, Canis lupus tundrarum", 273 | "n02114712 red wolf, maned wolf, Canis rufus, Canis niger", 274 | "n02114855 coyote, prairie wolf, brush wolf, Canis latrans", 275 | "n02115641 dingo, warrigal, warragal, Canis dingo", 276 | "n02115913 dhole, Cuon alpinus", 277 | "n02116738 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", 278 | "n02117135 hyena, hyaena", 279 | "n02119022 red fox, Vulpes vulpes", 280 | "n02119789 kit fox, Vulpes macrotis", 281 | "n02120079 Arctic fox, white fox, Alopex lagopus", 282 | "n02120505 grey fox, gray fox, Urocyon cinereoargenteus", 283 | "n02123045 tabby, tabby cat", 284 | "n02123159 tiger cat", 285 | "n02123394 Persian cat", 286 | "n02123597 Siamese cat, Siamese", 287 | "n02124075 Egyptian cat", 288 | "n02125311 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor", 289 | "n02127052 lynx, catamount", 290 | "n02128385 leopard, Panthera pardus", 291 | "n02128757 snow leopard, ounce, Panthera uncia", 292 | "n02128925 jaguar, panther, Panthera onca, Felis onca", 293 | "n02129165 lion, king of beasts, Panthera leo", 294 | "n02129604 tiger, Panthera tigris", 295 | "n02130308 cheetah, chetah, Acinonyx jubatus", 296 | "n02132136 brown bear, bruin, Ursus arctos", 297 | "n02133161 American black bear, black bear, Ursus americanus, Euarctos americanus", 298 | "n02134084 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", 299 | "n02134418 sloth bear, Melursus ursinus, Ursus ursinus", 300 | "n02137549 mongoose", 301 | "n02138441 meerkat, mierkat", 302 | "n02165105 tiger beetle", 303 | "n02165456 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", 304 | "n02167151 ground beetle, carabid beetle", 305 | "n02168699 long-horned beetle, longicorn, longicorn beetle", 306 | "n02169497 leaf beetle, chrysomelid", 307 | "n02172182 dung beetle", 308 | "n02174001 rhinoceros beetle", 309 | "n02177972 weevil", 310 | "n02190166 fly", 311 | "n02206856 bee", 312 | "n02219486 ant, emmet, pismire", 313 | "n02226429 grasshopper, hopper", 314 | "n02229544 cricket", 315 | "n02231487 walking stick, walkingstick, stick insect", 316 | "n02233338 cockroach, roach", 317 | "n02236044 mantis, mantid", 318 | "n02256656 cicada, cicala", 319 | "n02259212 leafhopper", 320 | "n02264363 lacewing, lacewing fly", 321 | "n02268443 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", 322 | "n02268853 damselfly", 323 | "n02276258 admiral", 324 | "n02277742 ringlet, ringlet butterfly", 325 | "n02279972 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", 326 | "n02280649 cabbage butterfly", 327 | "n02281406 sulphur butterfly, sulfur butterfly", 328 | "n02281787 lycaenid, lycaenid butterfly", 329 | "n02317335 starfish, sea star", 330 | "n02319095 sea urchin", 331 | "n02321529 sea cucumber, holothurian", 332 | "n02325366 wood rabbit, cottontail, cottontail rabbit", 333 | "n02326432 hare", 334 | "n02328150 Angora, Angora rabbit", 335 | "n02342885 hamster", 336 | "n02346627 porcupine, hedgehog", 337 | "n02356798 fox squirrel, eastern fox squirrel, Sciurus niger", 338 | "n02361337 marmot", 339 | "n02363005 beaver", 340 | "n02364673 guinea pig, Cavia cobaya", 341 | "n02389026 sorrel", 342 | "n02391049 zebra", 343 | "n02395406 hog, pig, grunter, squealer, Sus scrofa", 344 | "n02396427 wild boar, boar, Sus scrofa", 345 | "n02397096 warthog", 346 | "n02398521 hippopotamus, hippo, river horse, Hippopotamus amphibius", 347 | "n02403003 ox", 348 | "n02408429 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", 349 | "n02410509 bison", 350 | "n02412080 ram, tup", 351 | "n02415577 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis", 352 | "n02417914 ibex, Capra ibex", 353 | "n02422106 hartebeest", 354 | "n02422699 impala, Aepyceros melampus", 355 | "n02423022 gazelle", 356 | "n02437312 Arabian camel, dromedary, Camelus dromedarius", 357 | "n02437616 llama", 358 | "n02441942 weasel", 359 | "n02442845 mink", 360 | "n02443114 polecat, fitch, foulmart, foumart, Mustela putorius", 361 | "n02443484 black-footed ferret, ferret, Mustela nigripes", 362 | "n02444819 otter", 363 | "n02445715 skunk, polecat, wood pussy", 364 | "n02447366 badger", 365 | "n02454379 armadillo", 366 | "n02457408 three-toed sloth, ai, Bradypus tridactylus", 367 | "n02480495 orangutan, orang, orangutang, Pongo pygmaeus", 368 | "n02480855 gorilla, Gorilla gorilla", 369 | "n02481823 chimpanzee, chimp, Pan troglodytes", 370 | "n02483362 gibbon, Hylobates lar", 371 | "n02483708 siamang, Hylobates syndactylus, Symphalangus syndactylus", 372 | "n02484975 guenon, guenon monkey", 373 | "n02486261 patas, hussar monkey, Erythrocebus patas", 374 | "n02486410 baboon", 375 | "n02487347 macaque", 376 | "n02488291 langur", 377 | "n02488702 colobus, colobus monkey", 378 | "n02489166 proboscis monkey, Nasalis larvatus", 379 | "n02490219 marmoset", 380 | "n02492035 capuchin, ringtail, Cebus capucinus", 381 | "n02492660 howler monkey, howler", 382 | "n02493509 titi, titi monkey", 383 | "n02493793 spider monkey, Ateles geoffroyi", 384 | "n02494079 squirrel monkey, Saimiri sciureus", 385 | "n02497673 Madagascar cat, ring-tailed lemur, Lemur catta", 386 | "n02500267 indri, indris, Indri indri, Indri brevicaudatus", 387 | "n02504013 Indian elephant, Elephas maximus", 388 | "n02504458 African elephant, Loxodonta africana", 389 | "n02509815 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", 390 | "n02510455 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", 391 | "n02514041 barracouta, snoek", 392 | "n02526121 eel", 393 | "n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch", 394 | "n02606052 rock beauty, Holocanthus tricolor", 395 | "n02607072 anemone fish", 396 | "n02640242 sturgeon", 397 | "n02641379 gar, garfish, garpike, billfish, Lepisosteus osseus", 398 | "n02643566 lionfish", 399 | "n02655020 puffer, pufferfish, blowfish, globefish", 400 | "n02666196 abacus", 401 | "n02667093 abaya", 402 | "n02669723 academic gown, academic robe, judge's robe", 403 | "n02672831 accordion, piano accordion, squeeze box", 404 | "n02676566 acoustic guitar", 405 | "n02687172 aircraft carrier, carrier, flattop, attack aircraft carrier", 406 | "n02690373 airliner", 407 | "n02692877 airship, dirigible", 408 | "n02699494 altar", 409 | "n02701002 ambulance", 410 | "n02704792 amphibian, amphibious vehicle", 411 | "n02708093 analog clock", 412 | "n02727426 apiary, bee house", 413 | "n02730930 apron", 414 | "n02747177 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", 415 | "n02749479 assault rifle, assault gun", 416 | "n02769748 backpack, back pack, knapsack, packsack, rucksack, haversack", 417 | "n02776631 bakery, bakeshop, bakehouse", 418 | "n02777292 balance beam, beam", 419 | "n02782093 balloon", 420 | "n02783161 ballpoint, ballpoint pen, ballpen, Biro", 421 | "n02786058 Band Aid", 422 | "n02787622 banjo", 423 | "n02788148 bannister, banister, balustrade, balusters, handrail", 424 | "n02790996 barbell", 425 | "n02791124 barber chair", 426 | "n02791270 barbershop", 427 | "n02793495 barn", 428 | "n02794156 barometer", 429 | "n02795169 barrel, cask", 430 | "n02797295 barrow, garden cart, lawn cart, wheelbarrow", 431 | "n02799071 baseball", 432 | "n02802426 basketball", 433 | "n02804414 bassinet", 434 | "n02804610 bassoon", 435 | "n02807133 bathing cap, swimming cap", 436 | "n02808304 bath towel", 437 | "n02808440 bathtub, bathing tub, bath, tub", 438 | "n02814533 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon", 439 | "n02814860 beacon, lighthouse, beacon light, pharos", 440 | "n02815834 beaker", 441 | "n02817516 bearskin, busby, shako", 442 | "n02823428 beer bottle", 443 | "n02823750 beer glass", 444 | "n02825657 bell cote, bell cot", 445 | "n02834397 bib", 446 | "n02835271 bicycle-built-for-two, tandem bicycle, tandem", 447 | "n02837789 bikini, two-piece", 448 | "n02840245 binder, ring-binder", 449 | "n02841315 binoculars, field glasses, opera glasses", 450 | "n02843684 birdhouse", 451 | "n02859443 boathouse", 452 | "n02860847 bobsled, bobsleigh, bob", 453 | "n02865351 bolo tie, bolo, bola tie, bola", 454 | "n02869837 bonnet, poke bonnet", 455 | "n02870880 bookcase", 456 | "n02871525 bookshop, bookstore, bookstall", 457 | "n02877765 bottlecap", 458 | "n02879718 bow", 459 | "n02883205 bow tie, bow-tie, bowtie", 460 | "n02892201 brass, memorial tablet, plaque", 461 | "n02892767 brassiere, bra, bandeau", 462 | "n02894605 breakwater, groin, groyne, mole, bulwark, seawall, jetty", 463 | "n02895154 breastplate, aegis, egis", 464 | "n02906734 broom", 465 | "n02909870 bucket, pail", 466 | "n02910353 buckle", 467 | "n02916936 bulletproof vest", 468 | "n02917067 bullet train, bullet", 469 | "n02927161 butcher shop, meat market", 470 | "n02930766 cab, hack, taxi, taxicab", 471 | "n02939185 caldron, cauldron", 472 | "n02948072 candle, taper, wax light", 473 | "n02950826 cannon", 474 | "n02951358 canoe", 475 | "n02951585 can opener, tin opener", 476 | "n02963159 cardigan", 477 | "n02965783 car mirror", 478 | "n02966193 carousel, carrousel, merry-go-round, roundabout, whirligig", 479 | "n02966687 carpenter's kit, tool kit", 480 | "n02971356 carton", 481 | "n02974003 car wheel", 482 | "n02977058 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM", 483 | "n02978881 cassette", 484 | "n02979186 cassette player", 485 | "n02980441 castle", 486 | "n02981792 catamaran", 487 | "n02988304 CD player", 488 | "n02992211 cello, violoncello", 489 | "n02992529 cellular telephone, cellular phone, cellphone, cell, mobile phone", 490 | "n02999410 chain", 491 | "n03000134 chainlink fence", 492 | "n03000247 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour", 493 | "n03000684 chain saw, chainsaw", 494 | "n03014705 chest", 495 | "n03016953 chiffonier, commode", 496 | "n03017168 chime, bell, gong", 497 | "n03018349 china cabinet, china closet", 498 | "n03026506 Christmas stocking", 499 | "n03028079 church, church building", 500 | "n03032252 cinema, movie theater, movie theatre, movie house, picture palace", 501 | "n03041632 cleaver, meat cleaver, chopper", 502 | "n03042490 cliff dwelling", 503 | "n03045698 cloak", 504 | "n03047690 clog, geta, patten, sabot", 505 | "n03062245 cocktail shaker", 506 | "n03063599 coffee mug", 507 | "n03063689 coffeepot", 508 | "n03065424 coil, spiral, volute, whorl, helix", 509 | "n03075370 combination lock", 510 | "n03085013 computer keyboard, keypad", 511 | "n03089624 confectionery, confectionary, candy store", 512 | "n03095699 container ship, containership, container vessel", 513 | "n03100240 convertible", 514 | "n03109150 corkscrew, bottle screw", 515 | "n03110669 cornet, horn, trumpet, trump", 516 | "n03124043 cowboy boot", 517 | "n03124170 cowboy hat, ten-gallon hat", 518 | "n03125729 cradle", 519 | "n03126707 crane", 520 | "n03127747 crash helmet", 521 | "n03127925 crate", 522 | "n03131574 crib, cot", 523 | "n03133878 Crock Pot", 524 | "n03134739 croquet ball", 525 | "n03141823 crutch", 526 | "n03146219 cuirass", 527 | "n03160309 dam, dike, dyke", 528 | "n03179701 desk", 529 | "n03180011 desktop computer", 530 | "n03187595 dial telephone, dial phone", 531 | "n03188531 diaper, nappy, napkin", 532 | "n03196217 digital clock", 533 | "n03197337 digital watch", 534 | "n03201208 dining table, board", 535 | "n03207743 dishrag, dishcloth", 536 | "n03207941 dishwasher, dish washer, dishwashing machine", 537 | "n03208938 disk brake, disc brake", 538 | "n03216828 dock, dockage, docking facility", 539 | "n03218198 dogsled, dog sled, dog sleigh", 540 | "n03220513 dome", 541 | "n03223299 doormat, welcome mat", 542 | "n03240683 drilling platform, offshore rig", 543 | "n03249569 drum, membranophone, tympan", 544 | "n03250847 drumstick", 545 | "n03255030 dumbbell", 546 | "n03259280 Dutch oven", 547 | "n03271574 electric fan, blower", 548 | "n03272010 electric guitar", 549 | "n03272562 electric locomotive", 550 | "n03290653 entertainment center", 551 | "n03291819 envelope", 552 | "n03297495 espresso maker", 553 | "n03314780 face powder", 554 | "n03325584 feather boa, boa", 555 | "n03337140 file, file cabinet, filing cabinet", 556 | "n03344393 fireboat", 557 | "n03345487 fire engine, fire truck", 558 | "n03347037 fire screen, fireguard", 559 | "n03355925 flagpole, flagstaff", 560 | "n03372029 flute, transverse flute", 561 | "n03376595 folding chair", 562 | "n03379051 football helmet", 563 | "n03384352 forklift", 564 | "n03388043 fountain", 565 | "n03388183 fountain pen", 566 | "n03388549 four-poster", 567 | "n03393912 freight car", 568 | "n03394916 French horn, horn", 569 | "n03400231 frying pan, frypan, skillet", 570 | "n03404251 fur coat", 571 | "n03417042 garbage truck, dustcart", 572 | "n03424325 gasmask, respirator, gas helmet", 573 | "n03425413 gas pump, gasoline pump, petrol pump, island dispenser", 574 | "n03443371 goblet", 575 | "n03444034 go-kart", 576 | "n03445777 golf ball", 577 | "n03445924 golfcart, golf cart", 578 | "n03447447 gondola", 579 | "n03447721 gong, tam-tam", 580 | "n03450230 gown", 581 | "n03452741 grand piano, grand", 582 | "n03457902 greenhouse, nursery, glasshouse", 583 | "n03459775 grille, radiator grille", 584 | "n03461385 grocery store, grocery, food market, market", 585 | "n03467068 guillotine", 586 | "n03476684 hair slide", 587 | "n03476991 hair spray", 588 | "n03478589 half track", 589 | "n03481172 hammer", 590 | "n03482405 hamper", 591 | "n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier", 592 | "n03485407 hand-held computer, hand-held microcomputer", 593 | "n03485794 handkerchief, hankie, hanky, hankey", 594 | "n03492542 hard disc, hard disk, fixed disk", 595 | "n03494278 harmonica, mouth organ, harp, mouth harp", 596 | "n03495258 harp", 597 | "n03496892 harvester, reaper", 598 | "n03498962 hatchet", 599 | "n03527444 holster", 600 | "n03529860 home theater, home theatre", 601 | "n03530642 honeycomb", 602 | "n03532672 hook, claw", 603 | "n03534580 hoopskirt, crinoline", 604 | "n03535780 horizontal bar, high bar", 605 | "n03538406 horse cart, horse-cart", 606 | "n03544143 hourglass", 607 | "n03584254 iPod", 608 | "n03584829 iron, smoothing iron", 609 | "n03590841 jack-o'-lantern", 610 | "n03594734 jean, blue jean, denim", 611 | "n03594945 jeep, landrover", 612 | "n03595614 jersey, T-shirt, tee shirt", 613 | "n03598930 jigsaw puzzle", 614 | "n03599486 jinrikisha, ricksha, rickshaw", 615 | "n03602883 joystick", 616 | "n03617480 kimono", 617 | "n03623198 knee pad", 618 | "n03627232 knot", 619 | "n03630383 lab coat, laboratory coat", 620 | "n03633091 ladle", 621 | "n03637318 lampshade, lamp shade", 622 | "n03642806 laptop, laptop computer", 623 | "n03649909 lawn mower, mower", 624 | "n03657121 lens cap, lens cover", 625 | "n03658185 letter opener, paper knife, paperknife", 626 | "n03661043 library", 627 | "n03662601 lifeboat", 628 | "n03666591 lighter, light, igniter, ignitor", 629 | "n03670208 limousine, limo", 630 | "n03673027 liner, ocean liner", 631 | "n03676483 lipstick, lip rouge", 632 | "n03680355 Loafer", 633 | "n03690938 lotion", 634 | "n03691459 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", 635 | "n03692522 loupe, jeweler's loupe", 636 | "n03697007 lumbermill, sawmill", 637 | "n03706229 magnetic compass", 638 | "n03709823 mailbag, postbag", 639 | "n03710193 mailbox, letter box", 640 | "n03710637 maillot", 641 | "n03710721 maillot, tank suit", 642 | "n03717622 manhole cover", 643 | "n03720891 maraca", 644 | "n03721384 marimba, xylophone", 645 | "n03724870 mask", 646 | "n03729826 matchstick", 647 | "n03733131 maypole", 648 | "n03733281 maze, labyrinth", 649 | "n03733805 measuring cup", 650 | "n03742115 medicine chest, medicine cabinet", 651 | "n03743016 megalith, megalithic structure", 652 | "n03759954 microphone, mike", 653 | "n03761084 microwave, microwave oven", 654 | "n03763968 military uniform", 655 | "n03764736 milk can", 656 | "n03769881 minibus", 657 | "n03770439 miniskirt, mini", 658 | "n03770679 minivan", 659 | "n03773504 missile", 660 | "n03775071 mitten", 661 | "n03775546 mixing bowl", 662 | "n03776460 mobile home, manufactured home", 663 | "n03777568 Model T", 664 | "n03777754 modem", 665 | "n03781244 monastery", 666 | "n03782006 monitor", 667 | "n03785016 moped", 668 | "n03786901 mortar", 669 | "n03787032 mortarboard", 670 | "n03788195 mosque", 671 | "n03788365 mosquito net", 672 | "n03791053 motor scooter, scooter", 673 | "n03792782 mountain bike, all-terrain bike, off-roader", 674 | "n03792972 mountain tent", 675 | "n03793489 mouse, computer mouse", 676 | "n03794056 mousetrap", 677 | "n03796401 moving van", 678 | "n03803284 muzzle", 679 | "n03804744 nail", 680 | "n03814639 neck brace", 681 | "n03814906 necklace", 682 | "n03825788 nipple", 683 | "n03832673 notebook, notebook computer", 684 | "n03837869 obelisk", 685 | "n03838899 oboe, hautboy, hautbois", 686 | "n03840681 ocarina, sweet potato", 687 | "n03841143 odometer, hodometer, mileometer, milometer", 688 | "n03843555 oil filter", 689 | "n03854065 organ, pipe organ", 690 | "n03857828 oscilloscope, scope, cathode-ray oscilloscope, CRO", 691 | "n03866082 overskirt", 692 | "n03868242 oxcart", 693 | "n03868863 oxygen mask", 694 | "n03871628 packet", 695 | "n03873416 paddle, boat paddle", 696 | "n03874293 paddlewheel, paddle wheel", 697 | "n03874599 padlock", 698 | "n03876231 paintbrush", 699 | "n03877472 pajama, pyjama, pj's, jammies", 700 | "n03877845 palace", 701 | "n03884397 panpipe, pandean pipe, syrinx", 702 | "n03887697 paper towel", 703 | "n03888257 parachute, chute", 704 | "n03888605 parallel bars, bars", 705 | "n03891251 park bench", 706 | "n03891332 parking meter", 707 | "n03895866 passenger car, coach, carriage", 708 | "n03899768 patio, terrace", 709 | "n03902125 pay-phone, pay-station", 710 | "n03903868 pedestal, plinth, footstall", 711 | "n03908618 pencil box, pencil case", 712 | "n03908714 pencil sharpener", 713 | "n03916031 perfume, essence", 714 | "n03920288 Petri dish", 715 | "n03924679 photocopier", 716 | "n03929660 pick, plectrum, plectron", 717 | "n03929855 pickelhaube", 718 | "n03930313 picket fence, paling", 719 | "n03930630 pickup, pickup truck", 720 | "n03933933 pier", 721 | "n03935335 piggy bank, penny bank", 722 | "n03937543 pill bottle", 723 | "n03938244 pillow", 724 | "n03942813 ping-pong ball", 725 | "n03944341 pinwheel", 726 | "n03947888 pirate, pirate ship", 727 | "n03950228 pitcher, ewer", 728 | "n03954731 plane, carpenter's plane, woodworking plane", 729 | "n03956157 planetarium", 730 | "n03958227 plastic bag", 731 | "n03961711 plate rack", 732 | "n03967562 plow, plough", 733 | "n03970156 plunger, plumber's helper", 734 | "n03976467 Polaroid camera, Polaroid Land camera", 735 | "n03976657 pole", 736 | "n03977966 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria", 737 | "n03980874 poncho", 738 | "n03982430 pool table, billiard table, snooker table", 739 | "n03983396 pop bottle, soda bottle", 740 | "n03991062 pot, flowerpot", 741 | "n03992509 potter's wheel", 742 | "n03995372 power drill", 743 | "n03998194 prayer rug, prayer mat", 744 | "n04004767 printer", 745 | "n04005630 prison, prison house", 746 | "n04008634 projectile, missile", 747 | "n04009552 projector", 748 | "n04019541 puck, hockey puck", 749 | "n04023962 punching bag, punch bag, punching ball, punchball", 750 | "n04026417 purse", 751 | "n04033901 quill, quill pen", 752 | "n04033995 quilt, comforter, comfort, puff", 753 | "n04037443 racer, race car, racing car", 754 | "n04039381 racket, racquet", 755 | "n04040759 radiator", 756 | "n04041544 radio, wireless", 757 | "n04044716 radio telescope, radio reflector", 758 | "n04049303 rain barrel", 759 | "n04065272 recreational vehicle, RV, R.V.", 760 | "n04067472 reel", 761 | "n04069434 reflex camera", 762 | "n04070727 refrigerator, icebox", 763 | "n04074963 remote control, remote", 764 | "n04081281 restaurant, eating house, eating place, eatery", 765 | "n04086273 revolver, six-gun, six-shooter", 766 | "n04090263 rifle", 767 | "n04099969 rocking chair, rocker", 768 | "n04111531 rotisserie", 769 | "n04116512 rubber eraser, rubber, pencil eraser", 770 | "n04118538 rugby ball", 771 | "n04118776 rule, ruler", 772 | "n04120489 running shoe", 773 | "n04125021 safe", 774 | "n04127249 safety pin", 775 | "n04131690 saltshaker, salt shaker", 776 | "n04133789 sandal", 777 | "n04136333 sarong", 778 | "n04141076 sax, saxophone", 779 | "n04141327 scabbard", 780 | "n04141975 scale, weighing machine", 781 | "n04146614 school bus", 782 | "n04147183 schooner", 783 | "n04149813 scoreboard", 784 | "n04152593 screen, CRT screen", 785 | "n04153751 screw", 786 | "n04154565 screwdriver", 787 | "n04162706 seat belt, seatbelt", 788 | "n04179913 sewing machine", 789 | "n04192698 shield, buckler", 790 | "n04200800 shoe shop, shoe-shop, shoe store", 791 | "n04201297 shoji", 792 | "n04204238 shopping basket", 793 | "n04204347 shopping cart", 794 | "n04208210 shovel", 795 | "n04209133 shower cap", 796 | "n04209239 shower curtain", 797 | "n04228054 ski", 798 | "n04229816 ski mask", 799 | "n04235860 sleeping bag", 800 | "n04238763 slide rule, slipstick", 801 | "n04239074 sliding door", 802 | "n04243546 slot, one-armed bandit", 803 | "n04251144 snorkel", 804 | "n04252077 snowmobile", 805 | "n04252225 snowplow, snowplough", 806 | "n04254120 soap dispenser", 807 | "n04254680 soccer ball", 808 | "n04254777 sock", 809 | "n04258138 solar dish, solar collector, solar furnace", 810 | "n04259630 sombrero", 811 | "n04263257 soup bowl", 812 | "n04264628 space bar", 813 | "n04265275 space heater", 814 | "n04266014 space shuttle", 815 | "n04270147 spatula", 816 | "n04273569 speedboat", 817 | "n04275548 spider web, spider's web", 818 | "n04277352 spindle", 819 | "n04285008 sports car, sport car", 820 | "n04286575 spotlight, spot", 821 | "n04296562 stage", 822 | "n04310018 steam locomotive", 823 | "n04311004 steel arch bridge", 824 | "n04311174 steel drum", 825 | "n04317175 stethoscope", 826 | "n04325704 stole", 827 | "n04326547 stone wall", 828 | "n04328186 stopwatch, stop watch", 829 | "n04330267 stove", 830 | "n04332243 strainer", 831 | "n04335435 streetcar, tram, tramcar, trolley, trolley car", 832 | "n04336792 stretcher", 833 | "n04344873 studio couch, day bed", 834 | "n04346328 stupa, tope", 835 | "n04347754 submarine, pigboat, sub, U-boat", 836 | "n04350905 suit, suit of clothes", 837 | "n04355338 sundial", 838 | "n04355933 sunglass", 839 | "n04356056 sunglasses, dark glasses, shades", 840 | "n04357314 sunscreen, sunblock, sun blocker", 841 | "n04366367 suspension bridge", 842 | "n04367480 swab, swob, mop", 843 | "n04370456 sweatshirt", 844 | "n04371430 swimming trunks, bathing trunks", 845 | "n04371774 swing", 846 | "n04372370 switch, electric switch, electrical switch", 847 | "n04376876 syringe", 848 | "n04380533 table lamp", 849 | "n04389033 tank, army tank, armored combat vehicle, armoured combat vehicle", 850 | "n04392985 tape player", 851 | "n04398044 teapot", 852 | "n04399382 teddy, teddy bear", 853 | "n04404412 television, television system", 854 | "n04409515 tennis ball", 855 | "n04417672 thatch, thatched roof", 856 | "n04418357 theater curtain, theatre curtain", 857 | "n04423845 thimble", 858 | "n04428191 thresher, thrasher, threshing machine", 859 | "n04429376 throne", 860 | "n04435653 tile roof", 861 | "n04442312 toaster", 862 | "n04443257 tobacco shop, tobacconist shop, tobacconist", 863 | "n04447861 toilet seat", 864 | "n04456115 torch", 865 | "n04458633 totem pole", 866 | "n04461696 tow truck, tow car, wrecker", 867 | "n04462240 toyshop", 868 | "n04465501 tractor", 869 | "n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi", 870 | "n04476259 tray", 871 | "n04479046 trench coat", 872 | "n04482393 tricycle, trike, velocipede", 873 | "n04483307 trimaran", 874 | "n04485082 tripod", 875 | "n04486054 triumphal arch", 876 | "n04487081 trolleybus, trolley coach, trackless trolley", 877 | "n04487394 trombone", 878 | "n04493381 tub, vat", 879 | "n04501370 turnstile", 880 | "n04505470 typewriter keyboard", 881 | "n04507155 umbrella", 882 | "n04509417 unicycle, monocycle", 883 | "n04515003 upright, upright piano", 884 | "n04517823 vacuum, vacuum cleaner", 885 | "n04522168 vase", 886 | "n04523525 vault", 887 | "n04525038 velvet", 888 | "n04525305 vending machine", 889 | "n04532106 vestment", 890 | "n04532670 viaduct", 891 | "n04536866 violin, fiddle", 892 | "n04540053 volleyball", 893 | "n04542943 waffle iron", 894 | "n04548280 wall clock", 895 | "n04548362 wallet, billfold, notecase, pocketbook", 896 | "n04550184 wardrobe, closet, press", 897 | "n04552348 warplane, military plane", 898 | "n04553703 washbasin, handbasin, washbowl, lavabo, wash-hand basin", 899 | "n04554684 washer, automatic washer, washing machine", 900 | "n04557648 water bottle", 901 | "n04560804 water jug", 902 | "n04562935 water tower", 903 | "n04579145 whiskey jug", 904 | "n04579432 whistle", 905 | "n04584207 wig", 906 | "n04589890 window screen", 907 | "n04590129 window shade", 908 | "n04591157 Windsor tie", 909 | "n04591713 wine bottle", 910 | "n04592741 wing", 911 | "n04596742 wok", 912 | "n04597913 wooden spoon", 913 | "n04599235 wool, woolen, woollen", 914 | "n04604644 worm fence, snake fence, snake-rail fence, Virginia fence", 915 | "n04606251 wreck", 916 | "n04612504 yawl", 917 | "n04613696 yurt", 918 | "n06359193 web site, website, internet site, site", 919 | "n06596364 comic book", 920 | "n06785654 crossword puzzle, crossword", 921 | "n06794110 street sign", 922 | "n06874185 traffic light, traffic signal, stoplight", 923 | "n07248320 book jacket, dust cover, dust jacket, dust wrapper", 924 | "n07565083 menu", 925 | "n07579787 plate", 926 | "n07583066 guacamole", 927 | "n07584110 consomme", 928 | "n07590611 hot pot, hotpot", 929 | "n07613480 trifle", 930 | "n07614500 ice cream, icecream", 931 | "n07615774 ice lolly, lolly, lollipop, popsicle", 932 | "n07684084 French loaf", 933 | "n07693725 bagel, beigel", 934 | "n07695742 pretzel", 935 | "n07697313 cheeseburger", 936 | "n07697537 hotdog, hot dog, red hot", 937 | "n07711569 mashed potato", 938 | "n07714571 head cabbage", 939 | "n07714990 broccoli", 940 | "n07715103 cauliflower", 941 | "n07716358 zucchini, courgette", 942 | "n07716906 spaghetti squash", 943 | "n07717410 acorn squash", 944 | "n07717556 butternut squash", 945 | "n07718472 cucumber, cuke", 946 | "n07718747 artichoke, globe artichoke", 947 | "n07720875 bell pepper", 948 | "n07730033 cardoon", 949 | "n07734744 mushroom", 950 | "n07742313 Granny Smith", 951 | "n07745940 strawberry", 952 | "n07747607 orange", 953 | "n07749582 lemon", 954 | "n07753113 fig", 955 | "n07753275 pineapple, ananas", 956 | "n07753592 banana", 957 | "n07754684 jackfruit, jak, jack", 958 | "n07760859 custard apple", 959 | "n07768694 pomegranate", 960 | "n07802026 hay", 961 | "n07831146 carbonara", 962 | "n07836838 chocolate sauce, chocolate syrup", 963 | "n07860988 dough", 964 | "n07871810 meat loaf, meatloaf", 965 | "n07873807 pizza, pizza pie", 966 | "n07875152 potpie", 967 | "n07880968 burrito", 968 | "n07892512 red wine", 969 | "n07920052 espresso", 970 | "n07930864 cup", 971 | "n07932039 eggnog", 972 | "n09193705 alp", 973 | "n09229709 bubble", 974 | "n09246464 cliff, drop, drop-off", 975 | "n09256479 coral reef", 976 | "n09288635 geyser", 977 | "n09332890 lakeside, lakeshore", 978 | "n09399592 promontory, headland, head, foreland", 979 | "n09421951 sandbar, sand bar", 980 | "n09428293 seashore, coast, seacoast, sea-coast", 981 | "n09468604 valley, vale", 982 | "n09472597 volcano", 983 | "n09835506 ballplayer, baseball player", 984 | "n10148035 groom, bridegroom", 985 | "n10565667 scuba diver", 986 | "n11879895 rapeseed", 987 | "n11939491 daisy", 988 | "n12057211 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", 989 | "n12144580 corn", 990 | "n12267677 acorn", 991 | "n12620546 hip, rose hip, rosehip", 992 | "n12768682 buckeye, horse chestnut, conker", 993 | "n12985857 coral fungus", 994 | "n12998815 agaric", 995 | "n13037406 gyromitra", 996 | "n13040303 stinkhorn, carrion fungus", 997 | "n13044778 earthstar", 998 | "n13052670 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa", 999 | "n13054560 bolete", 1000 | "n13133613 ear, spike, capitulum", 1001 | "n15075141 toilet tissue, toilet paper, bathroom tissue", 1002 | ] 1003 | 1004 | synset_map = {} 1005 | for i, l in enumerate(synset): 1006 | label, desc = l.split(' ', 1) 1007 | synset_map[label] = {"index": i, "desc": desc, } 1008 | -------------------------------------------------------------------------------- /train_cifar.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Routine for decoding the CIFAR-10 binary file format.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import sys 23 | import tarfile 24 | 25 | from six.moves import xrange # pylint: disable=redefined-builtin 26 | from six.moves import urllib 27 | 28 | from resnet_train import train 29 | from resnet import inference_small 30 | import tensorflow as tf 31 | 32 | DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' 33 | 34 | FLAGS = tf.app.flags.FLAGS 35 | tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar-data', 36 | 'where to store the dataset') 37 | tf.app.flags.DEFINE_boolean('use_bn', True, 'use batch normalization. otherwise use biases') 38 | 39 | # Process images of this size. Note that this differs from the original CIFAR 40 | # image size of 32 x 32. If one alters this number, then the entire model 41 | # architecture will change and any model would need to be retrained. 42 | IMAGE_SIZE = 32 43 | 44 | # Global constants describing the CIFAR-10 data set. 45 | NUM_CLASSES = 10 46 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000 47 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000 48 | 49 | 50 | def read_cifar10(filename_queue): 51 | """Reads and parses examples from CIFAR10 data files. 52 | 53 | Recommendation: if you want N-way read parallelism, call this function 54 | N times. This will give you N independent Readers reading different 55 | files & positions within those files, which will give better mixing of 56 | examples. 57 | 58 | Args: 59 | filename_queue: A queue of strings with the filenames to read from. 60 | 61 | Returns: 62 | An object representing a single example, with the following fields: 63 | height: number of rows in the result (32) 64 | width: number of columns in the result (32) 65 | depth: number of color channels in the result (3) 66 | key: a scalar string Tensor describing the filename & record number 67 | for this example. 68 | label: an int32 Tensor with the label in the range 0..9. 69 | uint8image: a [height, width, depth] uint8 Tensor with the image data 70 | """ 71 | 72 | class CIFAR10Record(object): 73 | pass 74 | 75 | result = CIFAR10Record() 76 | 77 | # Dimensions of the images in the CIFAR-10 dataset. 78 | # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the 79 | # input format. 80 | label_bytes = 1 # 2 for CIFAR-100 81 | result.height = 32 82 | result.width = 32 83 | result.depth = 3 84 | image_bytes = result.height * result.width * result.depth 85 | # Every record consists of a label followed by the image, with a 86 | # fixed number of bytes for each. 87 | record_bytes = label_bytes + image_bytes 88 | 89 | # Read a record, getting filenames from the filename_queue. No 90 | # header or footer in the CIFAR-10 format, so we leave header_bytes 91 | # and footer_bytes at their default of 0. 92 | reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) 93 | result.key, value = reader.read(filename_queue) 94 | 95 | # Convert from a string to a vector of uint8 that is record_bytes long. 96 | record_bytes = tf.decode_raw(value, tf.uint8) 97 | 98 | # The first bytes represent the label, which we convert from uint8->int32. 99 | result.label = tf.cast( 100 | tf.slice(record_bytes, [0], [label_bytes]), tf.int32) 101 | 102 | # The remaining bytes after the label represent the image, which we reshape 103 | # from [depth * height * width] to [depth, height, width]. 104 | depth_major = tf.reshape( 105 | tf.slice(record_bytes, [label_bytes], [image_bytes]), 106 | [result.depth, result.height, result.width]) 107 | # Convert from [depth, height, width] to [height, width, depth]. 108 | result.uint8image = tf.transpose(depth_major, [1, 2, 0]) 109 | 110 | return result 111 | 112 | 113 | def _generate_image_and_label_batch(image, label, min_queue_examples, 114 | batch_size, shuffle): 115 | """Construct a queued batch of images and labels. 116 | 117 | Args: 118 | image: 3-D Tensor of [height, width, 3] of type.float32. 119 | label: 1-D Tensor of type.int32 120 | min_queue_examples: int32, minimum number of samples to retain 121 | in the queue that provides of batches of examples. 122 | batch_size: Number of images per batch. 123 | shuffle: boolean indicating whether to use a shuffling queue. 124 | 125 | Returns: 126 | images: Images. 4D tensor of [batch_size, height, width, 3] size. 127 | labels: Labels. 1D tensor of [batch_size] size. 128 | """ 129 | # Create a queue that shuffles the examples, and then 130 | # read 'batch_size' images + labels from the example queue. 131 | num_preprocess_threads = 16 132 | if shuffle: 133 | images, label_batch = tf.train.shuffle_batch( 134 | [image, label], 135 | batch_size=batch_size, 136 | num_threads=num_preprocess_threads, 137 | capacity=min_queue_examples + 3 * batch_size, 138 | min_after_dequeue=min_queue_examples) 139 | else: 140 | images, label_batch = tf.train.batch( 141 | [image, label], 142 | batch_size=batch_size, 143 | num_threads=num_preprocess_threads, 144 | capacity=min_queue_examples + 3 * batch_size) 145 | 146 | return images, tf.reshape(label_batch, [batch_size]) 147 | 148 | 149 | def distorted_inputs(data_dir, batch_size): 150 | """Construct distorted input for CIFAR training using the Reader ops. 151 | 152 | Args: 153 | data_dir: Path to the CIFAR-10 data directory. 154 | batch_size: Number of images per batch. 155 | 156 | Returns: 157 | images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. 158 | labels: Labels. 1D tensor of [batch_size] size. 159 | """ 160 | filenames = [ 161 | os.path.join(data_dir, 'cifar-10-batches-bin', 'data_batch_%d.bin' % i) 162 | for i in xrange(1, 6) 163 | ] 164 | 165 | for f in filenames: 166 | if not tf.gfile.Exists(f): 167 | raise ValueError('Failed to find file: ' + f) 168 | 169 | # Create a queue that produces the filenames to read. 170 | filename_queue = tf.train.string_input_producer(filenames) 171 | 172 | # Read examples from files in the filename queue. 173 | read_input = read_cifar10(filename_queue) 174 | reshaped_image = tf.cast(read_input.uint8image, tf.float32) 175 | 176 | height = IMAGE_SIZE 177 | width = IMAGE_SIZE 178 | 179 | # Image processing for training the network. Note the many random 180 | # distortions applied to the image. 181 | 182 | # Randomly crop a [height, width] section of the image. 183 | distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) 184 | 185 | # Randomly flip the image horizontally. 186 | distorted_image = tf.image.random_flip_left_right(distorted_image) 187 | 188 | # Because these operations are not commutative, consider randomizing 189 | # the order their operation. 190 | distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) 191 | distorted_image = tf.image.random_contrast( 192 | distorted_image, lower=0.2, upper=1.8) 193 | 194 | # Subtract off the mean and divide by the variance of the pixels. 195 | float_image = tf.image.per_image_whitening(distorted_image) 196 | 197 | # Ensure that the random shuffling has good mixing properties. 198 | min_fraction_of_examples_in_queue = 0.4 199 | min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * 200 | min_fraction_of_examples_in_queue) 201 | print('Filling queue with %d CIFAR images before starting to train. ' 202 | 'This will take a few minutes.' % min_queue_examples) 203 | 204 | # Generate a batch of images and labels by building up a queue of examples. 205 | return _generate_image_and_label_batch(float_image, 206 | read_input.label, 207 | min_queue_examples, 208 | batch_size, 209 | shuffle=True) 210 | 211 | 212 | def inputs(eval_data, data_dir, batch_size): 213 | """Construct input for CIFAR evaluation using the Reader ops. 214 | 215 | Args: 216 | eval_data: bool, indicating if one should use the train or eval data set. 217 | data_dir: Path to the CIFAR-10 data directory. 218 | batch_size: Number of images per batch. 219 | 220 | Returns: 221 | images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. 222 | labels: Labels. 1D tensor of [batch_size] size. 223 | """ 224 | if not eval_data: 225 | assert False, "hack. shouldn't go here" 226 | filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) 227 | for i in xrange(1, 6)] 228 | num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN 229 | else: 230 | filenames = [os.path.join(data_dir, 'cifar-10-batches-bin', 'test_batch.bin')] 231 | num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL 232 | 233 | for f in filenames: 234 | if not tf.gfile.Exists(f): 235 | raise ValueError('Failed to find file: ' + f) 236 | 237 | # Create a queue that produces the filenames to read. 238 | filename_queue = tf.train.string_input_producer(filenames) 239 | 240 | # Read examples from files in the filename queue. 241 | read_input = read_cifar10(filename_queue) 242 | reshaped_image = tf.cast(read_input.uint8image, tf.float32) 243 | 244 | height = IMAGE_SIZE 245 | width = IMAGE_SIZE 246 | 247 | # Image processing for evaluation. 248 | # Crop the central [height, width] of the image. 249 | resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, 250 | width, height) 251 | 252 | # Subtract off the mean and divide by the variance of the pixels. 253 | float_image = tf.image.per_image_whitening(resized_image) 254 | 255 | # Ensure that the random shuffling has good mixing properties. 256 | min_fraction_of_examples_in_queue = 0.4 257 | min_queue_examples = int(num_examples_per_epoch * 258 | min_fraction_of_examples_in_queue) 259 | 260 | # Generate a batch of images and labels by building up a queue of examples. 261 | return _generate_image_and_label_batch(float_image, 262 | read_input.label, 263 | min_queue_examples, 264 | batch_size, 265 | shuffle=False) 266 | 267 | 268 | def maybe_download_and_extract(): 269 | """Download and extract the tarball from Alex's website.""" 270 | dest_directory = FLAGS.data_dir 271 | if not os.path.exists(dest_directory): 272 | os.makedirs(dest_directory) 273 | filename = DATA_URL.split('/')[-1] 274 | filepath = os.path.join(dest_directory, filename) 275 | if not os.path.exists(filepath): 276 | 277 | def _progress(count, block_size, total_size): 278 | sys.stdout.write('\r>> Downloading %s %.1f%%' % 279 | (filename, float(count * block_size) / 280 | float(total_size) * 100.0)) 281 | sys.stdout.flush() 282 | 283 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) 284 | print() 285 | statinfo = os.stat(filepath) 286 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 287 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 288 | 289 | 290 | def main(argv=None): # pylint: disable=unused-argument 291 | maybe_download_and_extract() 292 | 293 | images_train, labels_train = distorted_inputs(FLAGS.data_dir, FLAGS.batch_size) 294 | images_val, labels_val = inputs(True, FLAGS.data_dir, FLAGS.batch_size) 295 | 296 | is_training = tf.placeholder('bool', [], name='is_training') 297 | 298 | images, labels = tf.cond(is_training, 299 | lambda: (images_train, labels_train), 300 | lambda: (images_val, labels_val)) 301 | 302 | logits = inference_small(images, 303 | num_classes=10, 304 | is_training=is_training, 305 | use_bias=(not FLAGS.use_bn), 306 | num_blocks=3) 307 | train(is_training, logits, images, labels) 308 | 309 | 310 | if __name__ == '__main__': 311 | tf.app.run() 312 | -------------------------------------------------------------------------------- /train_imagenet.py: -------------------------------------------------------------------------------- 1 | import skimage.io # bug. need to import this before tensorflow 2 | import skimage.transform # bug. need to import this before tensorflow 3 | from resnet_train import train 4 | import tensorflow as tf 5 | import time 6 | import os 7 | import sys 8 | import re 9 | import numpy as np 10 | 11 | from synset import * 12 | from image_processing import image_preprocessing 13 | 14 | FLAGS = tf.app.flags.FLAGS 15 | tf.app.flags.DEFINE_string('data_dir', '/home/ryan/data/ILSVRC2012/ILSVRC2012_img_train', 16 | 'imagenet dir') 17 | 18 | 19 | def file_list(data_dir): 20 | dir_txt = data_dir + ".txt" 21 | filenames = [] 22 | with open(dir_txt, 'r') as f: 23 | for line in f: 24 | if line[0] == '.': continue 25 | line = line.rstrip() 26 | fn = os.path.join(data_dir, line) 27 | filenames.append(fn) 28 | return filenames 29 | 30 | 31 | def load_data(data_dir): 32 | data = [] 33 | i = 0 34 | 35 | print "listing files in", data_dir 36 | start_time = time.time() 37 | files = file_list(data_dir) 38 | duration = time.time() - start_time 39 | print "took %f sec" % duration 40 | 41 | for img_fn in files: 42 | ext = os.path.splitext(img_fn)[1] 43 | if ext != '.JPEG': continue 44 | 45 | label_name = re.search(r'(n\d+)', img_fn).group(1) 46 | fn = os.path.join(data_dir, img_fn) 47 | 48 | label_index = synset_map[label_name]["index"] 49 | 50 | data.append({ 51 | "filename": fn, 52 | "label_name": label_name, 53 | "label_index": label_index, 54 | "desc": synset[label_index], 55 | }) 56 | 57 | return data 58 | 59 | 60 | def distorted_inputs(): 61 | data = load_data(FLAGS.data_dir) 62 | 63 | filenames = [ d['filename'] for d in data ] 64 | label_indexes = [ d['label_index'] for d in data ] 65 | 66 | filename, label_index = tf.train.slice_input_producer([filenames, label_indexes], shuffle=True) 67 | 68 | num_preprocess_threads = 4 69 | images_and_labels = [] 70 | for thread_id in range(num_preprocess_threads): 71 | image_buffer = tf.read_file(filename) 72 | 73 | bbox = [] 74 | train = True 75 | image = image_preprocessing(image_buffer, bbox, train, thread_id) 76 | images_and_labels.append([image, label_index]) 77 | 78 | images, label_index_batch = tf.train.batch_join( 79 | images_and_labels, 80 | batch_size=FLAGS.batch_size, 81 | capacity=2 * num_preprocess_threads * FLAGS.batch_size) 82 | 83 | height = FLAGS.input_size 84 | width = FLAGS.input_size 85 | depth = 3 86 | 87 | images = tf.cast(images, tf.float32) 88 | images = tf.reshape(images, shape=[FLAGS.batch_size, height, width, depth]) 89 | 90 | return images, tf.reshape(label_index_batch, [FLAGS.batch_size]) 91 | 92 | 93 | def main(_): 94 | images, labels = distorted_inputs() 95 | 96 | logits = inference(images, 97 | num_classes=1000, 98 | is_training=True, 99 | bottleneck=False, 100 | num_blocks=[2, 2, 2, 2]) 101 | train(logits, images, labels) 102 | 103 | 104 | if __name__ == '__main__': 105 | tf.app.run() 106 | -------------------------------------------------------------------------------- /train_yourown.py: -------------------------------------------------------------------------------- 1 | from resnet_train import train 2 | import tensorflow as tf 3 | import time 4 | import os 5 | import sys 6 | import re 7 | import numpy as np 8 | 9 | from synset import * 10 | from image_processing import image_preprocessing 11 | from resnet import inference 12 | 13 | tf.app.flags.DEFINE_string('train_file','','train file path') 14 | FLAGS = tf.app.flags.FLAGS 15 | 16 | def load_data(train_file): 17 | data = [] 18 | i = 0 19 | files = [] 20 | labels = [] 21 | start_time = time.time() 22 | with open(train_file,"r") as fr: 23 | for line in fr.readlines(): 24 | infos = line.split("\t") 25 | data.append({ 26 | "filename": infos[0], 27 | "label_name": int(infos[1]), 28 | }) 29 | 30 | return data 31 | 32 | def load_data_tmp(data_dir): 33 | data = [] 34 | i = 0 35 | files = [] 36 | labels = [] 37 | print "listing files in", data_dir 38 | start_time = time.time() 39 | 40 | for rootpath, dirnames, filenames in os.walk(data_dir): 41 | for dir0 in dirnames: 42 | path = os.path.join(data_dir, dir0) 43 | files0 = os.listdir(path) 44 | files += [os.path.join(path, item) for item in files0] 45 | labels += [int(dir0)]*len(files0) 46 | 47 | duration = time.time() - start_time 48 | print "took %f sec" % duration 49 | 50 | for i in range(len(files)): 51 | data.append({ 52 | "filename": files[i], 53 | "label_name": labels[i], 54 | }) 55 | 56 | return data 57 | 58 | 59 | def distorted_inputs(): 60 | data = load_data(FLAGS.train_file) 61 | 62 | filenames = [ d['filename'] for d in data ] 63 | label_indexes = [ d['label_name'] for d in data ] 64 | 65 | filename, label_index = tf.train.slice_input_producer([filenames, label_indexes], shuffle=True) 66 | 67 | num_preprocess_threads = 4 68 | images_and_labels = [] 69 | for thread_id in range(num_preprocess_threads): 70 | print "filename:",filename 71 | image_buffer = tf.read_file(filename) 72 | 73 | bbox = [] 74 | train = True 75 | image = image_preprocessing(image_buffer, bbox, train, thread_id) 76 | images_and_labels.append([image, label_index]) 77 | 78 | images, label_index_batch = tf.train.batch_join( 79 | images_and_labels, 80 | batch_size=FLAGS.batch_size, 81 | capacity=2 * num_preprocess_threads * FLAGS.batch_size) 82 | 83 | height = FLAGS.input_size 84 | width = FLAGS.input_size 85 | depth = 3 86 | 87 | images = tf.cast(images, tf.float32) 88 | images = tf.reshape(images, shape=[FLAGS.batch_size, height, width, depth]) 89 | 90 | return images, tf.reshape(label_index_batch, [FLAGS.batch_size]) 91 | 92 | def inputs(): 93 | pass 94 | 95 | def main(_): 96 | images, labels = distorted_inputs() 97 | is_training = tf.placeholder('bool',[], name='is_training') 98 | logits = inference(images, 99 | num_classes=2, 100 | is_training=is_training, 101 | bottleneck=True, 102 | num_blocks=[3, 4, 6, 3]) 103 | train(is_training,logits, images, labels) 104 | 105 | 106 | if __name__ == '__main__': 107 | tf.app.run() 108 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import six.moves 6 | from datetime import datetime 7 | import sys 8 | import math 9 | import time 10 | #from data import inputs, standardize_image 11 | import numpy as np 12 | import tensorflow as tf 13 | # from detect import * 14 | import re 15 | 16 | RESIZE_AOI = 256 17 | RESIZE_FINAL = 227 18 | 19 | # Modifed from here 20 | # http://stackoverflow.com/questions/3160699/python-progress-bar#3160819 21 | class ProgressBar(object): 22 | DEFAULT = 'Progress: %(bar)s %(percent)3d%%' 23 | FULL = '%(bar)s %(current)d/%(total)d (%(percent)3d%%) %(remaining)d to go' 24 | 25 | def __init__(self, total, width=40, fmt=DEFAULT, symbol='='): 26 | assert len(symbol) == 1 27 | 28 | self.total = total 29 | self.width = width 30 | self.symbol = symbol 31 | self.fmt = re.sub(r'(?P%\(.+?\))d', 32 | r'\g%dd' % len(str(total)), fmt) 33 | 34 | self.current = 0 35 | 36 | def update(self, step=1): 37 | self.current += step 38 | percent = self.current / float(self.total) 39 | size = int(self.width * percent) 40 | remaining = self.total - self.current 41 | bar = '[' + self.symbol * size + ' ' * (self.width - size) + ']' 42 | 43 | args = { 44 | 'total': self.total, 45 | 'bar': bar, 46 | 'current': self.current, 47 | 'percent': percent * 100, 48 | 'remaining': remaining 49 | } 50 | six.print_('\r' + self.fmt % args, end='') 51 | 52 | def done(self): 53 | self.current = self.total 54 | self.update(step=0) 55 | print('') 56 | 57 | # Read image files 58 | class ImageCoder(object): 59 | 60 | def __init__(self): 61 | # Create a single Session to run all image coding calls. 62 | config = tf.ConfigProto(allow_soft_placement=True) 63 | self._sess = tf.Session(config=config) 64 | 65 | # Initializes function that converts PNG to JPEG data. 66 | self._png_data = tf.placeholder(dtype=tf.string) 67 | image = tf.image.decode_png(self._png_data, channels=3) 68 | self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) 69 | 70 | # Initializes function that decodes RGB JPEG data. 71 | self._decode_jpeg_data = tf.placeholder(dtype=tf.string) 72 | self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) 73 | self.crop = tf.image.resize_images(self._decode_jpeg, (RESIZE_AOI, RESIZE_AOI)) 74 | 75 | def png_to_jpeg(self, image_data): 76 | return self._sess.run(self._png_to_jpeg, 77 | feed_dict={self._png_data: image_data}) 78 | 79 | def decode_jpeg(self, image_data): 80 | image = self._sess.run(self.crop, #self._decode_jpeg, 81 | feed_dict={self._decode_jpeg_data: image_data}) 82 | 83 | assert len(image.shape) == 3 84 | assert image.shape[2] == 3 85 | return image 86 | 87 | 88 | def _is_png(filename): 89 | """Determine if a file contains a PNG format image. 90 | Args: 91 | filename: string, path of the image file. 92 | Returns: 93 | boolean indicating if the image is a PNG. 94 | """ 95 | return '.png' in filename 96 | 97 | def make_multi_image_batch(filenames, coder): 98 | """Process a multi-image batch, each with a single-look 99 | Args: 100 | filenames: list of paths 101 | coder: instance of ImageCoder to provide TensorFlow image coding utils. 102 | Returns: 103 | image_buffer: string, JPEG encoding of RGB image. 104 | """ 105 | 106 | images = [] 107 | 108 | for filename in filenames: 109 | with tf.gfile.FastGFile(filename, 'rb') as f: 110 | image_data = f.read() 111 | # Convert any PNG to JPEG's for consistency. 112 | if _is_png(filename): 113 | print('Converting PNG to JPEG for %s' % filename) 114 | image_data = coder.png_to_jpeg(image_data) 115 | 116 | image = coder.decode_jpeg(image_data) 117 | 118 | crop = tf.image.resize_images(image, (RESIZE_FINAL, RESIZE_FINAL)) 119 | image = standardize_image(crop) 120 | images.append(image) 121 | image_batch = tf.stack(images) 122 | return image_batch 123 | 124 | def make_multi_crop_batch(filename, coder): 125 | """Process a single image file. 126 | Args: 127 | filename: string, path to an image file e.g., '/path/to/example.JPG'. 128 | coder: instance of ImageCoder to provide TensorFlow image coding utils. 129 | Returns: 130 | image_buffer: string, JPEG encoding of RGB image. 131 | """ 132 | # Read the image file. 133 | with tf.gfile.FastGFile(filename, 'rb') as f: 134 | image_data = f.read() 135 | 136 | # Convert any PNG to JPEG's for consistency. 137 | if _is_png(filename): 138 | print('Converting PNG to JPEG for %s' % filename) 139 | image_data = coder.png_to_jpeg(image_data) 140 | 141 | image = coder.decode_jpeg(image_data) 142 | 143 | crops = [] 144 | print('Running multi-cropped image') 145 | h = image.shape[0] 146 | w = image.shape[1] 147 | hl = h - RESIZE_FINAL 148 | wl = w - RESIZE_FINAL 149 | 150 | crop = tf.image.resize_images(image, (RESIZE_FINAL, RESIZE_FINAL)) 151 | crops.append(standardize_image(crop)) 152 | crops.append(tf.image.flip_left_right(crop)) 153 | 154 | corners = [ (0, 0), (0, wl), (hl, 0), (hl, wl), (int(hl/2), int(wl/2))] 155 | for corner in corners: 156 | ch, cw = corner 157 | cropped = tf.image.crop_to_bounding_box(image, ch, cw, RESIZE_FINAL, RESIZE_FINAL) 158 | crops.append(standardize_image(cropped)) 159 | flipped = tf.image.flip_left_right(cropped) 160 | crops.append(standardize_image(flipped)) 161 | 162 | image_batch = tf.stack(crops) 163 | return image_batch 164 | 165 | 166 | 167 | def face_detection_model(model_type, model_path): 168 | model_type_lc = model_type.lower() 169 | if model_type_lc == 'yolo_tiny': 170 | from yolodetect import PersonDetectorYOLOTiny 171 | return PersonDetectorYOLOTiny(model_path) 172 | elif model_type_lc == 'yolo_face': 173 | from yolodetect import FaceDetectorYOLO 174 | return FaceDetectorYOLO(model_path) 175 | elif model_type == 'dlib': 176 | from dlibdetect import FaceDetectorDlib 177 | return FaceDetectorDlib(model_path) 178 | return ObjectDetectorCascadeOpenCV(model_path) 179 | --------------------------------------------------------------------------------