├── README.md
├── image_to_TF.py
├── split_data.py
├── train.py
├── one_output_result.py
├── calculate_score.py
└── resnet_v1.py
/README.md:
--------------------------------------------------------------------------------
1 | # XueLang-AI
2 | 雪浪制造AI挑战赛—视觉计算辅助良品检测
3 | 阿里天池雪浪制造AI挑战赛初赛、复赛的相关代码,初赛104/2403名, 复赛58/100名。
详细说明见我的[博客](https://blog.csdn.net/wtrnash/article/details/82530531)。
4 | 由于初赛、复赛都使用了类似的模型,整个流程比较类似,只是对数据集的处理、布匹的瑕疵类别有些不同,所以这里的代码主要是复赛的代码,
5 | 包含分割数据、制造TFRecord、训练模型、计算准确率以及生成结果文件等。
6 |
--------------------------------------------------------------------------------
/image_to_TF.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | from PIL import Image
4 |
5 |
6 | def image_to_tf(source_path, target_path, classes, name):
7 | with tf.python_io.TFRecordWriter(target_path + "\\" + name) as writer: # 用来生成训练集TFRecord格式文件
8 | for index, name in enumerate(classes):
9 | class_path = source_path + "\\" + name + "\\" # 训练集中每个类的地址
10 | if not os.path.exists(class_path):
11 | return
12 | for image_name in os.listdir(class_path):
13 | if image_name.endswith('jpg'):
14 | image_path = class_path + image_name # 每个图片的地址
15 | image = Image.open(image_path) # 打开图片
16 | image = image.resize((400, 400), Image.ANTIALIAS) # 将图片大小统一设为 600 * 600
17 | image_byte = image.tobytes() # 图片转换为二进制格式
18 | example = tf.train.Example(features=tf.train.Features(feature={
19 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
20 | 'image_byte': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_byte]))
21 | })) # 通过Example将图像和标签封装
22 | writer.write(example.SerializeToString()) # 序列化为字符串
23 |
24 | return
25 |
26 |
27 | cloth_classes = ['正常', '扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别
28 | image_to_tf("./data/分割/训练集", "./data/转换", cloth_classes, "400train.TFRecord")
29 | image_to_tf("./data/分割/测试集", "./data/转换", cloth_classes, "400test.TFRecord")
--------------------------------------------------------------------------------
/split_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import random
4 |
5 |
6 | # 将源路径中的图片按类别分割成目标路径下的训练集和测试集
7 | def split_data(source_path, target_path, classes):
8 | """
9 | :param source_path: 待分割的图片集的所在路径
10 | :param target_path: 分割完后存放训练集、测试集的所在路径
11 | :param classes: 所有图片类别
12 | """
13 | train_target_path = target_path + "\\训练集"
14 | # 目标训练集路径不存在则创建该路径
15 | if not os.path.exists(train_target_path):
16 | os.mkdir(train_target_path)
17 | train_target_path = train_target_path + "\\"
18 | test_target_path = target_path + "\\测试集"
19 | # 目标测试集路径不存在则创建该路径
20 | if not os.path.exists(test_target_path):
21 | os.mkdir(test_target_path)
22 | test_target_path = test_target_path + "\\"
23 |
24 | # 在训练集、测试集目录下创建对应类别文件夹
25 | for name in classes:
26 | train_class_path = train_target_path + name
27 | test_class_path = test_target_path + name
28 | if not os.path.exists(train_class_path):
29 | os.mkdir(train_class_path)
30 | if not os.path.exists(test_class_path):
31 | os.mkdir(test_class_path)
32 |
33 | source_path = source_path + "\\"
34 | for name in classes:
35 | print(name)
36 | class_path = source_path + name
37 | if not os.path.exists(class_path):
38 | return
39 | for image_name in os.listdir(class_path):
40 | if image_name.endswith('jpg'):
41 | image_path = class_path + "\\" + image_name
42 | # 随机产生1~5的整数,因为要按9 : 1的比率随机分出训练集和测试集
43 | # 所以生成1~5的随机数,如果是1,则分到测试集,否则分到训练集
44 | random_number = random.randint(1, 10)
45 | if random_number == 1:
46 | shutil.copyfile(image_path, test_target_path + name + "\\" + image_name)
47 | else:
48 | shutil.copyfile(image_path, train_target_path + name + "\\" + image_name)
49 | return
50 |
51 |
52 | cloth_classes = ['正常','扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别
53 | split_data("./data/瑕疵数据集", "./data/分割", cloth_classes)
54 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import resnet_v1
3 | import tensorflow.contrib.slim as slim
4 |
5 | classes = ['正常', '扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别
6 |
7 |
8 | def read_and_decode(filename):
9 | # 根据文件名生成一个队列
10 | with tf.name_scope("input"):
11 | filename_queue = tf.train.string_input_producer([filename])
12 | reader = tf.TFRecordReader()
13 | _, serialized_example = reader.read(filename_queue) # 返回文件名和文件
14 | features = tf.parse_single_example(serialized_example,
15 | features={
16 | 'label': tf.FixedLenFeature([], tf.int64),
17 | 'image_byte': tf.FixedLenFeature([], tf.string),
18 | })
19 | image = tf.decode_raw(features['image_byte'], tf.uint8)
20 | image = tf.reshape(image, [400, 400, 3])
21 | image = tf.cast(image, tf.float32)
22 | image = tf.image.per_image_standardization(image) # 将图像标准化,有利于加速训练
23 | label = tf.cast(features['label'], tf.int32)
24 |
25 | return image, label
26 |
27 |
28 | def main(_):
29 | train_image, train_label = read_and_decode("./data/转换/400train.TFRecord")
30 |
31 | train_image_batch, train_label_batch = tf.train.shuffle_batch([train_image, train_label], batch_size=20,
32 | capacity=4000, min_after_dequeue=2000,
33 | allow_smaller_final_batch=True)
34 |
35 | x = tf.placeholder(tf.float32, [None, 400, 400, 3], name="x")
36 | y_ = tf.placeholder(tf.int64, [None], name="y_")
37 | tf.summary.image("input_image", x, 10)
38 |
39 | net, _ = resnet_v1.resnet_v1_152(x)
40 | net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度
41 | logits = slim.fully_connected(net, num_outputs=len(classes),
42 | activation_fn=None, scope='predict')
43 |
44 | # 计算交叉熵及其平均值
45 | with tf.name_scope('training'):
46 | labels = tf.one_hot(y_, len(classes))
47 | logits = tf.nn.softmax(logits)
48 | logits = tf.clip_by_value(logits, 1e-10, 1.0)
49 | loss = -tf.reduce_mean(tf.reduce_sum(labels * tf.log(logits)))
50 | # 优化损失函数
51 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)
52 | tf.summary.scalar('loss', loss)
53 |
54 | with tf.name_scope('accuracy'):
55 | correct_prediction = tf.equal(y_, tf.argmax(logits, 1))
56 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
57 | tf.summary.scalar('accuracy', accuracy)
58 |
59 | # 初始化回话并开始训练过程。
60 | with tf.Session() as sess:
61 | saver = tf.train.Saver()
62 | merged = tf.summary.merge_all()
63 | train_writer = tf.summary.FileWriter('./train_model_demo', sess.graph)
64 | sess.run(tf.global_variables_initializer())
65 | # saver.restore(sess, './checkpoint/resnet.ckpt')
66 | coord = tf.train.Coordinator()
67 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
68 | # 循环的训练神经网络
69 |
70 | for i in range(15000):
71 | img, lbl = sess.run([train_image_batch, train_label_batch])
72 | sess.run(train_step, feed_dict={x: img, y_: lbl})
73 | if i % 10 == 0:
74 | summary, l, acc = sess.run([merged, loss, accuracy], feed_dict={x: img, y_: lbl})
75 | print("After %d training step(s), loss is %g ,accuracy is %g" % (i, l, acc))
76 | train_writer.add_summary(summary, i)
77 | if i % 3000 == 0 and i != 0 and i <= 15000:
78 | saver.save(sess, './checkpoint/resnet_' + str(i) + '.ckpt')
79 |
80 | saver.save(sess, './checkpoint/resnet.ckpt')
81 |
82 | coord.request_stop()
83 | coord.join(threads)
84 |
85 |
86 | if __name__ == '__main__':
87 | tf.app.run()
88 |
--------------------------------------------------------------------------------
/one_output_result.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import os
3 | import pandas as pd
4 | import numpy as np
5 | import resnet_v1
6 | import tensorflow.contrib.slim as slim
7 | np.set_printoptions(suppress=True)
8 | defect_classes = ['norm', 'defect_1', 'defect_2', 'defect_3', 'defect_4', 'defect_5', 'defect_6',
9 | 'defect_7', 'defect_8', 'defect_9', 'defect_10']
10 |
11 |
12 | def read_and_decode(filename, size):
13 | # 根据文件名生成一个队列
14 | with tf.name_scope("input"):
15 | filename_queue = tf.train.string_input_producer([filename])
16 | reader = tf.TFRecordReader()
17 | _, serialized_example = reader.read(filename_queue) # 返回文件名和文件
18 | features = tf.parse_single_example(serialized_example,
19 | features={
20 | 'file_name': tf.FixedLenFeature([], tf.string),
21 | 'image_byte': tf.FixedLenFeature([], tf.string)
22 | })
23 | image = tf.decode_raw(features['image_byte'], tf.uint8)
24 | image = tf.reshape(image, [size, size, 3])
25 | image = tf.cast(image, tf.float32)
26 | image = tf.image.per_image_standardization(image) # 将图像标准化,有利于加速训练
27 | file_name = features['file_name']
28 |
29 | return image, file_name
30 |
31 |
32 | # 递归计算图片总数
33 | def count_image_number(path):
34 | count = 0
35 | for index, file_name in enumerate(os.listdir(path)):
36 | file_path = path + file_name
37 | # 如果是文件夹则递归调用本函数
38 | if os.path.isdir(file_path):
39 | count = count + count_image_number(file_path + '\\')
40 | else:
41 | count = count + 1
42 | return count
43 |
44 |
45 | # 科学计数法转为浮点型
46 | def as_num(num):
47 | float_num = '{:.5f}'.format(num)
48 | if float(float_num) > float(0.99999):
49 | float_num = float(0.99999)
50 | if float(float_num) < float(0.00001):
51 | float_num = float(0.00001)
52 | return float(float_num)
53 |
54 |
55 | path = './data/xuelang_round2_test_b_201808031/'
56 | # 计算图片数量
57 | count = count_image_number(path)
58 | batch_size = 20
59 |
60 | # 声明占位符
61 | x = tf.placeholder(tf.float32, [None, 400, 400, 3], name="x")
62 |
63 | # 获得网络结果
64 | # logits = model.model(x, is_training=False, dropout_pro=0.5, num=len(defect_classes), weight_decay=0.0)
65 | net, _ = resnet_v1.resnet_v1_50(x, is_training=False)
66 | net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度
67 | logits = slim.fully_connected(net, num_outputs=len(defect_classes),
68 | activation_fn=None, scope='predict')
69 | # logits, _ = inception_v3.inception_v3(x, len(defect_classes), is_training=False)
70 |
71 | logits = tf.nn.softmax(logits)
72 | logits = tf.clip_by_value(logits, 1e-10, 1)
73 |
74 | # 初始化回话
75 | sess = tf.Session()
76 | saver = tf.train.Saver()
77 | saver.restore(sess, './checkpoint/resnet50_0.558/resnet_3000.ckpt')
78 | tf_path = "./classification_tf/400classification.TFRecord"
79 |
80 | image, file_name = read_and_decode(tf_path, 400)
81 |
82 | image_batch, file_name_batch = tf.train.batch(
83 | [image, file_name], batch_size=batch_size, capacity=3000, allow_smaller_final_batch=True)
84 | coord = tf.train.Coordinator()
85 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
86 |
87 | defect_list = []
88 | i = 0
89 | while i + batch_size <= count:
90 | i = i + batch_size
91 |
92 | image_value, file_name_value = sess.run([image_batch, file_name_batch])
93 | value = sess.run(logits, feed_dict={x: image_value}) # 传入网络得到结果
94 | for j in range(batch_size):
95 | for index, _ in enumerate(defect_classes):
96 | defect_list.append([file_name_value[j].decode() + "|" + defect_classes[index], as_num(value[j][index])])
97 |
98 |
99 | # 处理最后一批数据
100 | image_value, file_name_value = sess.run([image_batch, file_name_batch])
101 | value = sess.run(logits, feed_dict={x: image_value}) # 传入网络得到结果
102 |
103 | for j in range(count - i):
104 | for index, _ in enumerate(defect_classes):
105 | defect_list.append([file_name_value[j].decode() + "|" + defect_classes[index], as_num(value[j][index])])
106 |
107 | coord.request_stop()
108 | coord.join(threads)
109 |
110 |
111 | result_df = pd.DataFrame(defect_list) # 转为DataFrame
112 | result_df = result_df.drop_duplicates() # 去重
113 | result_df.rename(columns={0: 'filename|defect', 1: 'probability'}, inplace=True)
114 | result_df.to_csv("resnet152_submit.csv", encoding='utf-8', index=None)
115 |
--------------------------------------------------------------------------------
/calculate_score.py:
--------------------------------------------------------------------------------
1 | import resnet_v1
2 | import tensorflow as tf
3 | import tensorflow.contrib.slim as slim
4 |
5 | classes = ['正常', '扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别
6 |
7 |
8 | def read_and_decode(filename):
9 | # 根据文件名生成一个队列
10 | with tf.name_scope("input"):
11 | filename_queue = tf.train.string_input_producer([filename])
12 | reader = tf.TFRecordReader()
13 | _, serialized_example = reader.read(filename_queue) # 返回文件名和文件
14 | features = tf.parse_single_example(serialized_example,
15 | features={
16 | 'label': tf.FixedLenFeature([], tf.int64),
17 | 'image_byte': tf.FixedLenFeature([], tf.string),
18 | })
19 | image = tf.decode_raw(features['image_byte'], tf.uint8)
20 | image = tf.reshape(image, [400, 400, 3])
21 | image = tf.cast(image, tf.float32)
22 | image = tf.image.per_image_standardization(image) # 将图像标准化,有利于加速训练
23 | label = tf.cast(features['label'], tf.int32)
24 |
25 | return image, label
26 |
27 |
28 | def main(_):
29 | test_image, test_label = read_and_decode("./data/转换/400test.TFRecord")
30 |
31 | test_img_batch, test_lbl_batch = tf.train.batch([test_image, test_label], batch_size=1, capacity=2000)
32 |
33 | x = tf.placeholder(tf.float32, [None, 400, 400, 3], name="x")
34 | y_ = tf.placeholder(tf.int64, [None], name="y_")
35 | tf.summary.image("input_image", x, 10)
36 |
37 | net, _ = resnet_v1.resnet_v1_152(x, is_training=False)
38 | net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度
39 | logits = slim.fully_connected(net, num_outputs=len(classes),
40 | activation_fn=None, scope='predict')
41 |
42 | # 计算交叉熵及其平均值
43 | with tf.name_scope('training'):
44 | labels = tf.one_hot(y_, len(classes))
45 | logits = tf.nn.softmax(logits)
46 | logits = tf.clip_by_value(logits, 1e-10, 1.0)
47 | cross_entropy = -tf.reduce_mean(tf.reduce_sum(labels * tf.log(logits)))
48 | # 损失函数的计算
49 | loss = cross_entropy
50 | # 优化损失函数
51 | tf.summary.scalar('loss', loss)
52 |
53 | with tf.name_scope('accuracy'):
54 | correct_prediction = tf.equal(y_, tf.argmax(logits, 1))
55 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
56 | tf.summary.scalar('accuracy', accuracy)
57 |
58 | # 初始化回话并开始训练过程。
59 | with tf.Session() as sess:
60 | saver = tf.train.Saver()
61 | saver.restore(sess, './checkpoint/resnet_15000.ckpt')
62 | coord = tf.train.Coordinator()
63 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
64 |
65 | map_label_list = []
66 | map_prediction_list = []
67 |
68 | auc_label_list = []
69 | auc_prediction_list = []
70 | # 342是样本数
71 | for i in range(342):
72 | test_img, test_lbl = sess.run([test_img_batch, test_lbl_batch])
73 | temp, prediction = sess.run([accuracy, logits], feed_dict={x: test_img, y_: test_lbl})
74 | map_label_list.append(test_lbl[0])
75 | map_prediction_list.append(list(prediction[0]))
76 |
77 | if test_lbl[0] == 0:
78 | auc_label_list.append(True)
79 | else:
80 | auc_label_list.append(False)
81 |
82 | auc_prediction_list.append(prediction[0][0])
83 |
84 | prediction_tensor = tf.convert_to_tensor(auc_prediction_list)
85 | label_tensor = tf.convert_to_tensor(auc_label_list)
86 | map_prediction_tensor = tf.convert_to_tensor(map_prediction_list, dtype=tf.float32)
87 | map_label_tensor = tf.convert_to_tensor(map_label_list, dtype=tf.int64)
88 | auc_value, auc_op = tf.metrics.auc(label_tensor, prediction_tensor, num_thresholds=400)
89 | map_value, map_op = tf.metrics.average_precision_at_k(map_label_tensor, map_prediction_tensor, 1)
90 | sess.run(tf.global_variables_initializer())
91 | sess.run(tf.local_variables_initializer())
92 | sess.run([auc_op, map_op])
93 | auc, mAP = sess.run([auc_value, map_value])
94 |
95 | print("AUC:" + str(auc))
96 | print("mAP:" + str(mAP))
97 | score = 0.7 * auc + 0.3 * mAP
98 | print("score:" + str(score))
99 | coord.request_stop()
100 | coord.join(threads)
101 |
102 |
103 | if __name__ == '__main__':
104 | tf.app.run()
105 |
--------------------------------------------------------------------------------
/resnet_v1.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains definitions for the original form of Residual Networks.
16 |
17 | The 'v1' residual networks (ResNets) implemented in this module were proposed
18 | by:
19 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
20 | Deep Residual Learning for Image Recognition. arXiv:1512.03385
21 |
22 | Other variants were introduced in:
23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
25 |
26 | The networks defined in this module utilize the bottleneck building block of
27 | [1] with projection shortcuts only for increasing depths. They employ batch
28 | normalization *after* every weight layer. This is the architecture used by
29 | MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
30 | ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
31 | architecture and the alternative 'v2' architecture of [2] which uses batch
32 | normalization *before* every weight layer in the so-called full pre-activation
33 | units.
34 |
35 | Typical use:
36 |
37 | from tensorflow.contrib.slim.python.slim.nets import
38 | resnet_v1
39 |
40 | ResNet-101 for image classification into 1000 classes:
41 |
42 | # inputs has shape [batch, 224, 224, 3]
43 | with slim.arg_scope(resnet_v1.resnet_arg_scope()):
44 | net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)
45 |
46 | ResNet-101 for semantic segmentation into 21 classes:
47 |
48 | # inputs has shape [batch, 513, 513, 3]
49 | with slim.arg_scope(resnet_v1.resnet_arg_scope()):
50 | net, end_points = resnet_v1.resnet_v1_101(inputs,
51 | 21,
52 | is_training=False,
53 | global_pool=False,
54 | output_stride=16)
55 | """
56 |
57 | from __future__ import absolute_import
58 | from __future__ import division
59 | from __future__ import print_function
60 |
61 | from tensorflow.contrib import layers
62 | from tensorflow.contrib.framework.python.ops import add_arg_scope
63 | from tensorflow.contrib.framework.python.ops import arg_scope
64 | from tensorflow.contrib.layers.python.layers import layers as layers_lib
65 | from tensorflow.contrib.layers.python.layers import utils
66 | from tensorflow.contrib.slim.python.slim.nets import resnet_utils
67 | from tensorflow.python.ops import math_ops
68 | from tensorflow.python.ops import nn_ops
69 | from tensorflow.python.ops import variable_scope
70 |
71 | resnet_arg_scope = resnet_utils.resnet_arg_scope
72 |
73 |
74 | @add_arg_scope
75 | def bottleneck(inputs,
76 | depth,
77 | depth_bottleneck,
78 | stride,
79 | rate=1,
80 | outputs_collections=None,
81 | scope=None):
82 | """Bottleneck residual unit variant with BN after convolutions.
83 |
84 | This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
85 | its definition. Note that we use here the bottleneck variant which has an
86 | extra bottleneck layer.
87 |
88 | When putting together two consecutive ResNet blocks that use this unit, one
89 | should use stride = 2 in the last unit of the first block.
90 |
91 | Args:
92 | inputs: A tensor of size [batch, height, width, channels].
93 | depth: The depth of the ResNet unit output.
94 | depth_bottleneck: The depth of the bottleneck layers.
95 | stride: The ResNet unit's stride. Determines the amount of downsampling of
96 | the units output compared to its input.
97 | rate: An integer, rate for atrous convolution.
98 | outputs_collections: Collection to add the ResNet unit output.
99 | scope: Optional variable_scope.
100 |
101 | Returns:
102 | The ResNet unit's output.
103 | """
104 | with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
105 | depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
106 | if depth == depth_in:
107 | shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
108 | else:
109 | shortcut = layers.conv2d(
110 | inputs,
111 | depth, [1, 1],
112 | stride=stride,
113 | activation_fn=None,
114 | scope='shortcut')
115 |
116 | residual = layers.conv2d(
117 | inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')
118 | residual = resnet_utils.conv2d_same(
119 | residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
120 | residual = layers.conv2d(
121 | residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3')
122 |
123 | output = nn_ops.relu(shortcut + residual)
124 |
125 | return utils.collect_named_outputs(outputs_collections, sc.name, output)
126 |
127 |
128 | def resnet_v1(inputs,
129 | blocks,
130 | num_classes=None,
131 | is_training=True,
132 | global_pool=True,
133 | output_stride=None,
134 | include_root_block=True,
135 | reuse=None,
136 | scope=None):
137 | """Generator for v1 ResNet models.
138 |
139 | This function generates a family of ResNet v1 models. See the resnet_v1_*()
140 | methods for specific model instantiations, obtained by selecting different
141 | block instantiations that produce ResNets of various depths.
142 |
143 | Training for image classification on Imagenet is usually done with [224, 224]
144 | inputs, resulting in [7, 7] feature maps at the output of the last ResNet
145 | block for the ResNets defined in [1] that have nominal stride equal to 32.
146 | However, for dense prediction tasks we advise that one uses inputs with
147 | spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
148 | this case the feature maps at the ResNet output will have spatial shape
149 | [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
150 | and corners exactly aligned with the input image corners, which greatly
151 | facilitates alignment of the features to the image. Using as input [225, 225]
152 | images results in [8, 8] feature maps at the output of the last ResNet block.
153 |
154 | For dense prediction tasks, the ResNet needs to run in fully-convolutional
155 | (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
156 | have nominal stride equal to 32 and a good choice in FCN mode is to use
157 | output_stride=16 in order to increase the density of the computed features at
158 | small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
159 |
160 | Args:
161 | inputs: A tensor of size [batch, height_in, width_in, channels].
162 | blocks: A list of length equal to the number of ResNet blocks. Each element
163 | is a resnet_utils.Block object describing the units in the block.
164 | num_classes: Number of predicted classes for classification tasks. If None
165 | we return the features before the logit layer.
166 | is_training: whether batch_norm layers are in training mode.
167 | global_pool: If True, we perform global average pooling before computing the
168 | logits. Set to True for image classification, False for dense prediction.
169 | output_stride: If None, then the output will be computed at the nominal
170 | network stride. If output_stride is not None, it specifies the requested
171 | ratio of input to output spatial resolution.
172 | include_root_block: If True, include the initial convolution followed by
173 | max-pooling, if False excludes it.
174 | reuse: whether or not the network and its variables should be reused. To be
175 | able to reuse 'scope' must be given.
176 | scope: Optional variable_scope.
177 |
178 | Returns:
179 | net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
180 | If global_pool is False, then height_out and width_out are reduced by a
181 | factor of output_stride compared to the respective height_in and width_in,
182 | else both height_out and width_out equal one. If num_classes is None, then
183 | net is the output of the last ResNet block, potentially after global
184 | average pooling. If num_classes is not None, net contains the pre-softmax
185 | activations.
186 | end_points: A dictionary from components of the network to the corresponding
187 | activation.
188 |
189 | Raises:
190 | ValueError: If the target output_stride is not valid.
191 | """
192 | with variable_scope.variable_scope(
193 | scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
194 | end_points_collection = sc.original_name_scope + '_end_points'
195 | with arg_scope(
196 | [layers.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
197 | outputs_collections=end_points_collection):
198 | with arg_scope([layers.batch_norm], is_training=is_training):
199 | net = inputs
200 | if include_root_block:
201 | if output_stride is not None:
202 | if output_stride % 4 != 0:
203 | raise ValueError('The output_stride needs to be a multiple of 4.')
204 | output_stride /= 4
205 | net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
206 | net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='pool1')
207 | net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
208 | if global_pool:
209 | # Global average pooling.
210 | net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True)
211 | if num_classes is not None:
212 | net = layers.conv2d(
213 | net,
214 | num_classes, [1, 1],
215 | activation_fn=None,
216 | normalizer_fn=None,
217 | scope='logits')
218 | # Convert end_points_collection into a dictionary of end_points.
219 | end_points = utils.convert_collection_to_dict(end_points_collection)
220 | if num_classes is not None:
221 | end_points['predictions'] = layers_lib.softmax(
222 | net, scope='predictions')
223 | return net, end_points
224 |
225 | resnet_v1.default_image_size = 400
226 |
227 |
228 | def resnet_v1_block(scope, base_depth, num_units, stride):
229 | """Helper function for creating a resnet_v1 bottleneck block.
230 |
231 | Args:
232 | scope: The scope of the block.
233 | base_depth: The depth of the bottleneck layer for each unit.
234 | num_units: The number of units in the block.
235 | stride: The stride of the block, implemented as a stride in the last unit.
236 | All other units have stride=1.
237 |
238 | Returns:
239 | A resnet_v1 bottleneck block.
240 | """
241 | return resnet_utils.Block(scope, bottleneck, [{
242 | 'depth': base_depth * 4,
243 | 'depth_bottleneck': base_depth,
244 | 'stride': 1
245 | }] * (num_units - 1) + [{
246 | 'depth': base_depth * 4,
247 | 'depth_bottleneck': base_depth,
248 | 'stride': stride
249 | }])
250 |
251 |
252 | def resnet_v1_50(inputs,
253 | num_classes=None,
254 | is_training=True,
255 | global_pool=True,
256 | output_stride=None,
257 | reuse=None,
258 | scope='resnet_v1_50'):
259 | """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
260 | blocks = [
261 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
262 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
263 | resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
264 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
265 | ]
266 | return resnet_v1(
267 | inputs,
268 | blocks,
269 | num_classes,
270 | is_training,
271 | global_pool,
272 | output_stride,
273 | include_root_block=True,
274 | reuse=reuse,
275 | scope=scope)
276 |
277 |
278 | def resnet_v1_101(inputs,
279 | num_classes=None,
280 | is_training=True,
281 | global_pool=True,
282 | output_stride=None,
283 | reuse=None,
284 | scope='resnet_v1_101'):
285 | """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
286 | blocks = [
287 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
288 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
289 | resnet_v1_block('block3', base_depth=256, num_units=23, stride=2),
290 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
291 | ]
292 | return resnet_v1(
293 | inputs,
294 | blocks,
295 | num_classes,
296 | is_training,
297 | global_pool,
298 | output_stride,
299 | include_root_block=True,
300 | reuse=reuse,
301 | scope=scope)
302 |
303 |
304 | def resnet_v1_152(inputs,
305 | num_classes=None,
306 | is_training=True,
307 | global_pool=True,
308 | output_stride=None,
309 | reuse=None,
310 | scope='resnet_v1_152'):
311 | """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
312 | blocks = [
313 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
314 | resnet_v1_block('block2', base_depth=128, num_units=8, stride=2),
315 | resnet_v1_block('block3', base_depth=256, num_units=36, stride=2),
316 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
317 | ]
318 | return resnet_v1(
319 | inputs,
320 | blocks,
321 | num_classes,
322 | is_training,
323 | global_pool,
324 | output_stride,
325 | include_root_block=True,
326 | reuse=reuse,
327 | scope=scope)
328 |
329 |
330 | def resnet_v1_200(inputs,
331 | num_classes=None,
332 | is_training=True,
333 | global_pool=True,
334 | output_stride=None,
335 | reuse=None,
336 | scope='resnet_v1_200'):
337 | """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
338 | blocks = [
339 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
340 | resnet_v1_block('block2', base_depth=128, num_units=24, stride=2),
341 | resnet_v1_block('block3', base_depth=256, num_units=36, stride=2),
342 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
343 | ]
344 | return resnet_v1(
345 | inputs,
346 | blocks,
347 | num_classes,
348 | is_training,
349 | global_pool,
350 | output_stride,
351 | include_root_block=True,
352 | reuse=reuse,
353 | scope=scope)
354 |
--------------------------------------------------------------------------------