├── README.md ├── image_to_TF.py ├── split_data.py ├── train.py ├── one_output_result.py ├── calculate_score.py └── resnet_v1.py /README.md: -------------------------------------------------------------------------------- 1 | # XueLang-AI 2 | 雪浪制造AI挑战赛—视觉计算辅助良品检测
3 | 阿里天池雪浪制造AI挑战赛初赛、复赛的相关代码,初赛104/2403名, 复赛58/100名。
详细说明见我的[博客](https://blog.csdn.net/wtrnash/article/details/82530531)。
4 | 由于初赛、复赛都使用了类似的模型,整个流程比较类似,只是对数据集的处理、布匹的瑕疵类别有些不同,所以这里的代码主要是复赛的代码, 5 | 包含分割数据、制造TFRecord、训练模型、计算准确率以及生成结果文件等。 6 | -------------------------------------------------------------------------------- /image_to_TF.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | from PIL import Image 4 | 5 | 6 | def image_to_tf(source_path, target_path, classes, name): 7 | with tf.python_io.TFRecordWriter(target_path + "\\" + name) as writer: # 用来生成训练集TFRecord格式文件 8 | for index, name in enumerate(classes): 9 | class_path = source_path + "\\" + name + "\\" # 训练集中每个类的地址 10 | if not os.path.exists(class_path): 11 | return 12 | for image_name in os.listdir(class_path): 13 | if image_name.endswith('jpg'): 14 | image_path = class_path + image_name # 每个图片的地址 15 | image = Image.open(image_path) # 打开图片 16 | image = image.resize((400, 400), Image.ANTIALIAS) # 将图片大小统一设为 600 * 600 17 | image_byte = image.tobytes() # 图片转换为二进制格式 18 | example = tf.train.Example(features=tf.train.Features(feature={ 19 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])), 20 | 'image_byte': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_byte])) 21 | })) # 通过Example将图像和标签封装 22 | writer.write(example.SerializeToString()) # 序列化为字符串 23 | 24 | return 25 | 26 | 27 | cloth_classes = ['正常', '扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别 28 | image_to_tf("./data/分割/训练集", "./data/转换", cloth_classes, "400train.TFRecord") 29 | image_to_tf("./data/分割/测试集", "./data/转换", cloth_classes, "400test.TFRecord") -------------------------------------------------------------------------------- /split_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import random 4 | 5 | 6 | # 将源路径中的图片按类别分割成目标路径下的训练集和测试集 7 | def split_data(source_path, target_path, classes): 8 | """ 9 | :param source_path: 待分割的图片集的所在路径 10 | :param target_path: 分割完后存放训练集、测试集的所在路径 11 | :param classes: 所有图片类别 12 | """ 13 | train_target_path = target_path + "\\训练集" 14 | # 目标训练集路径不存在则创建该路径 15 | if not os.path.exists(train_target_path): 16 | os.mkdir(train_target_path) 17 | train_target_path = train_target_path + "\\" 18 | test_target_path = target_path + "\\测试集" 19 | # 目标测试集路径不存在则创建该路径 20 | if not os.path.exists(test_target_path): 21 | os.mkdir(test_target_path) 22 | test_target_path = test_target_path + "\\" 23 | 24 | # 在训练集、测试集目录下创建对应类别文件夹 25 | for name in classes: 26 | train_class_path = train_target_path + name 27 | test_class_path = test_target_path + name 28 | if not os.path.exists(train_class_path): 29 | os.mkdir(train_class_path) 30 | if not os.path.exists(test_class_path): 31 | os.mkdir(test_class_path) 32 | 33 | source_path = source_path + "\\" 34 | for name in classes: 35 | print(name) 36 | class_path = source_path + name 37 | if not os.path.exists(class_path): 38 | return 39 | for image_name in os.listdir(class_path): 40 | if image_name.endswith('jpg'): 41 | image_path = class_path + "\\" + image_name 42 | # 随机产生1~5的整数,因为要按9 : 1的比率随机分出训练集和测试集 43 | # 所以生成1~5的随机数,如果是1,则分到测试集,否则分到训练集 44 | random_number = random.randint(1, 10) 45 | if random_number == 1: 46 | shutil.copyfile(image_path, test_target_path + name + "\\" + image_name) 47 | else: 48 | shutil.copyfile(image_path, train_target_path + name + "\\" + image_name) 49 | return 50 | 51 | 52 | cloth_classes = ['正常','扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别 53 | split_data("./data/瑕疵数据集", "./data/分割", cloth_classes) 54 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import resnet_v1 3 | import tensorflow.contrib.slim as slim 4 | 5 | classes = ['正常', '扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别 6 | 7 | 8 | def read_and_decode(filename): 9 | # 根据文件名生成一个队列 10 | with tf.name_scope("input"): 11 | filename_queue = tf.train.string_input_producer([filename]) 12 | reader = tf.TFRecordReader() 13 | _, serialized_example = reader.read(filename_queue) # 返回文件名和文件 14 | features = tf.parse_single_example(serialized_example, 15 | features={ 16 | 'label': tf.FixedLenFeature([], tf.int64), 17 | 'image_byte': tf.FixedLenFeature([], tf.string), 18 | }) 19 | image = tf.decode_raw(features['image_byte'], tf.uint8) 20 | image = tf.reshape(image, [400, 400, 3]) 21 | image = tf.cast(image, tf.float32) 22 | image = tf.image.per_image_standardization(image) # 将图像标准化,有利于加速训练 23 | label = tf.cast(features['label'], tf.int32) 24 | 25 | return image, label 26 | 27 | 28 | def main(_): 29 | train_image, train_label = read_and_decode("./data/转换/400train.TFRecord") 30 | 31 | train_image_batch, train_label_batch = tf.train.shuffle_batch([train_image, train_label], batch_size=20, 32 | capacity=4000, min_after_dequeue=2000, 33 | allow_smaller_final_batch=True) 34 | 35 | x = tf.placeholder(tf.float32, [None, 400, 400, 3], name="x") 36 | y_ = tf.placeholder(tf.int64, [None], name="y_") 37 | tf.summary.image("input_image", x, 10) 38 | 39 | net, _ = resnet_v1.resnet_v1_152(x) 40 | net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度 41 | logits = slim.fully_connected(net, num_outputs=len(classes), 42 | activation_fn=None, scope='predict') 43 | 44 | # 计算交叉熵及其平均值 45 | with tf.name_scope('training'): 46 | labels = tf.one_hot(y_, len(classes)) 47 | logits = tf.nn.softmax(logits) 48 | logits = tf.clip_by_value(logits, 1e-10, 1.0) 49 | loss = -tf.reduce_mean(tf.reduce_sum(labels * tf.log(logits))) 50 | # 优化损失函数 51 | train_step = tf.train.AdamOptimizer(0.0001).minimize(loss) 52 | tf.summary.scalar('loss', loss) 53 | 54 | with tf.name_scope('accuracy'): 55 | correct_prediction = tf.equal(y_, tf.argmax(logits, 1)) 56 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 57 | tf.summary.scalar('accuracy', accuracy) 58 | 59 | # 初始化回话并开始训练过程。 60 | with tf.Session() as sess: 61 | saver = tf.train.Saver() 62 | merged = tf.summary.merge_all() 63 | train_writer = tf.summary.FileWriter('./train_model_demo', sess.graph) 64 | sess.run(tf.global_variables_initializer()) 65 | # saver.restore(sess, './checkpoint/resnet.ckpt') 66 | coord = tf.train.Coordinator() 67 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 68 | # 循环的训练神经网络 69 | 70 | for i in range(15000): 71 | img, lbl = sess.run([train_image_batch, train_label_batch]) 72 | sess.run(train_step, feed_dict={x: img, y_: lbl}) 73 | if i % 10 == 0: 74 | summary, l, acc = sess.run([merged, loss, accuracy], feed_dict={x: img, y_: lbl}) 75 | print("After %d training step(s), loss is %g ,accuracy is %g" % (i, l, acc)) 76 | train_writer.add_summary(summary, i) 77 | if i % 3000 == 0 and i != 0 and i <= 15000: 78 | saver.save(sess, './checkpoint/resnet_' + str(i) + '.ckpt') 79 | 80 | saver.save(sess, './checkpoint/resnet.ckpt') 81 | 82 | coord.request_stop() 83 | coord.join(threads) 84 | 85 | 86 | if __name__ == '__main__': 87 | tf.app.run() 88 | -------------------------------------------------------------------------------- /one_output_result.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os 3 | import pandas as pd 4 | import numpy as np 5 | import resnet_v1 6 | import tensorflow.contrib.slim as slim 7 | np.set_printoptions(suppress=True) 8 | defect_classes = ['norm', 'defect_1', 'defect_2', 'defect_3', 'defect_4', 'defect_5', 'defect_6', 9 | 'defect_7', 'defect_8', 'defect_9', 'defect_10'] 10 | 11 | 12 | def read_and_decode(filename, size): 13 | # 根据文件名生成一个队列 14 | with tf.name_scope("input"): 15 | filename_queue = tf.train.string_input_producer([filename]) 16 | reader = tf.TFRecordReader() 17 | _, serialized_example = reader.read(filename_queue) # 返回文件名和文件 18 | features = tf.parse_single_example(serialized_example, 19 | features={ 20 | 'file_name': tf.FixedLenFeature([], tf.string), 21 | 'image_byte': tf.FixedLenFeature([], tf.string) 22 | }) 23 | image = tf.decode_raw(features['image_byte'], tf.uint8) 24 | image = tf.reshape(image, [size, size, 3]) 25 | image = tf.cast(image, tf.float32) 26 | image = tf.image.per_image_standardization(image) # 将图像标准化,有利于加速训练 27 | file_name = features['file_name'] 28 | 29 | return image, file_name 30 | 31 | 32 | # 递归计算图片总数 33 | def count_image_number(path): 34 | count = 0 35 | for index, file_name in enumerate(os.listdir(path)): 36 | file_path = path + file_name 37 | # 如果是文件夹则递归调用本函数 38 | if os.path.isdir(file_path): 39 | count = count + count_image_number(file_path + '\\') 40 | else: 41 | count = count + 1 42 | return count 43 | 44 | 45 | # 科学计数法转为浮点型 46 | def as_num(num): 47 | float_num = '{:.5f}'.format(num) 48 | if float(float_num) > float(0.99999): 49 | float_num = float(0.99999) 50 | if float(float_num) < float(0.00001): 51 | float_num = float(0.00001) 52 | return float(float_num) 53 | 54 | 55 | path = './data/xuelang_round2_test_b_201808031/' 56 | # 计算图片数量 57 | count = count_image_number(path) 58 | batch_size = 20 59 | 60 | # 声明占位符 61 | x = tf.placeholder(tf.float32, [None, 400, 400, 3], name="x") 62 | 63 | # 获得网络结果 64 | # logits = model.model(x, is_training=False, dropout_pro=0.5, num=len(defect_classes), weight_decay=0.0) 65 | net, _ = resnet_v1.resnet_v1_50(x, is_training=False) 66 | net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度 67 | logits = slim.fully_connected(net, num_outputs=len(defect_classes), 68 | activation_fn=None, scope='predict') 69 | # logits, _ = inception_v3.inception_v3(x, len(defect_classes), is_training=False) 70 | 71 | logits = tf.nn.softmax(logits) 72 | logits = tf.clip_by_value(logits, 1e-10, 1) 73 | 74 | # 初始化回话 75 | sess = tf.Session() 76 | saver = tf.train.Saver() 77 | saver.restore(sess, './checkpoint/resnet50_0.558/resnet_3000.ckpt') 78 | tf_path = "./classification_tf/400classification.TFRecord" 79 | 80 | image, file_name = read_and_decode(tf_path, 400) 81 | 82 | image_batch, file_name_batch = tf.train.batch( 83 | [image, file_name], batch_size=batch_size, capacity=3000, allow_smaller_final_batch=True) 84 | coord = tf.train.Coordinator() 85 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 86 | 87 | defect_list = [] 88 | i = 0 89 | while i + batch_size <= count: 90 | i = i + batch_size 91 | 92 | image_value, file_name_value = sess.run([image_batch, file_name_batch]) 93 | value = sess.run(logits, feed_dict={x: image_value}) # 传入网络得到结果 94 | for j in range(batch_size): 95 | for index, _ in enumerate(defect_classes): 96 | defect_list.append([file_name_value[j].decode() + "|" + defect_classes[index], as_num(value[j][index])]) 97 | 98 | 99 | # 处理最后一批数据 100 | image_value, file_name_value = sess.run([image_batch, file_name_batch]) 101 | value = sess.run(logits, feed_dict={x: image_value}) # 传入网络得到结果 102 | 103 | for j in range(count - i): 104 | for index, _ in enumerate(defect_classes): 105 | defect_list.append([file_name_value[j].decode() + "|" + defect_classes[index], as_num(value[j][index])]) 106 | 107 | coord.request_stop() 108 | coord.join(threads) 109 | 110 | 111 | result_df = pd.DataFrame(defect_list) # 转为DataFrame 112 | result_df = result_df.drop_duplicates() # 去重 113 | result_df.rename(columns={0: 'filename|defect', 1: 'probability'}, inplace=True) 114 | result_df.to_csv("resnet152_submit.csv", encoding='utf-8', index=None) 115 | -------------------------------------------------------------------------------- /calculate_score.py: -------------------------------------------------------------------------------- 1 | import resnet_v1 2 | import tensorflow as tf 3 | import tensorflow.contrib.slim as slim 4 | 5 | classes = ['正常', '扎洞', '毛斑', '擦洞', '毛洞', '织稀', '吊经', '缺经', '跳花', '油污渍', '其他'] # 要分类图像类别 6 | 7 | 8 | def read_and_decode(filename): 9 | # 根据文件名生成一个队列 10 | with tf.name_scope("input"): 11 | filename_queue = tf.train.string_input_producer([filename]) 12 | reader = tf.TFRecordReader() 13 | _, serialized_example = reader.read(filename_queue) # 返回文件名和文件 14 | features = tf.parse_single_example(serialized_example, 15 | features={ 16 | 'label': tf.FixedLenFeature([], tf.int64), 17 | 'image_byte': tf.FixedLenFeature([], tf.string), 18 | }) 19 | image = tf.decode_raw(features['image_byte'], tf.uint8) 20 | image = tf.reshape(image, [400, 400, 3]) 21 | image = tf.cast(image, tf.float32) 22 | image = tf.image.per_image_standardization(image) # 将图像标准化,有利于加速训练 23 | label = tf.cast(features['label'], tf.int32) 24 | 25 | return image, label 26 | 27 | 28 | def main(_): 29 | test_image, test_label = read_and_decode("./data/转换/400test.TFRecord") 30 | 31 | test_img_batch, test_lbl_batch = tf.train.batch([test_image, test_label], batch_size=1, capacity=2000) 32 | 33 | x = tf.placeholder(tf.float32, [None, 400, 400, 3], name="x") 34 | y_ = tf.placeholder(tf.int64, [None], name="y_") 35 | tf.summary.image("input_image", x, 10) 36 | 37 | net, _ = resnet_v1.resnet_v1_152(x, is_training=False) 38 | net = tf.squeeze(net, axis=[1, 2]) # 去除第一、第二个维度 39 | logits = slim.fully_connected(net, num_outputs=len(classes), 40 | activation_fn=None, scope='predict') 41 | 42 | # 计算交叉熵及其平均值 43 | with tf.name_scope('training'): 44 | labels = tf.one_hot(y_, len(classes)) 45 | logits = tf.nn.softmax(logits) 46 | logits = tf.clip_by_value(logits, 1e-10, 1.0) 47 | cross_entropy = -tf.reduce_mean(tf.reduce_sum(labels * tf.log(logits))) 48 | # 损失函数的计算 49 | loss = cross_entropy 50 | # 优化损失函数 51 | tf.summary.scalar('loss', loss) 52 | 53 | with tf.name_scope('accuracy'): 54 | correct_prediction = tf.equal(y_, tf.argmax(logits, 1)) 55 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 56 | tf.summary.scalar('accuracy', accuracy) 57 | 58 | # 初始化回话并开始训练过程。 59 | with tf.Session() as sess: 60 | saver = tf.train.Saver() 61 | saver.restore(sess, './checkpoint/resnet_15000.ckpt') 62 | coord = tf.train.Coordinator() 63 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 64 | 65 | map_label_list = [] 66 | map_prediction_list = [] 67 | 68 | auc_label_list = [] 69 | auc_prediction_list = [] 70 | # 342是样本数 71 | for i in range(342): 72 | test_img, test_lbl = sess.run([test_img_batch, test_lbl_batch]) 73 | temp, prediction = sess.run([accuracy, logits], feed_dict={x: test_img, y_: test_lbl}) 74 | map_label_list.append(test_lbl[0]) 75 | map_prediction_list.append(list(prediction[0])) 76 | 77 | if test_lbl[0] == 0: 78 | auc_label_list.append(True) 79 | else: 80 | auc_label_list.append(False) 81 | 82 | auc_prediction_list.append(prediction[0][0]) 83 | 84 | prediction_tensor = tf.convert_to_tensor(auc_prediction_list) 85 | label_tensor = tf.convert_to_tensor(auc_label_list) 86 | map_prediction_tensor = tf.convert_to_tensor(map_prediction_list, dtype=tf.float32) 87 | map_label_tensor = tf.convert_to_tensor(map_label_list, dtype=tf.int64) 88 | auc_value, auc_op = tf.metrics.auc(label_tensor, prediction_tensor, num_thresholds=400) 89 | map_value, map_op = tf.metrics.average_precision_at_k(map_label_tensor, map_prediction_tensor, 1) 90 | sess.run(tf.global_variables_initializer()) 91 | sess.run(tf.local_variables_initializer()) 92 | sess.run([auc_op, map_op]) 93 | auc, mAP = sess.run([auc_value, map_value]) 94 | 95 | print("AUC:" + str(auc)) 96 | print("mAP:" + str(mAP)) 97 | score = 0.7 * auc + 0.3 * mAP 98 | print("score:" + str(score)) 99 | coord.request_stop() 100 | coord.join(threads) 101 | 102 | 103 | if __name__ == '__main__': 104 | tf.app.run() 105 | -------------------------------------------------------------------------------- /resnet_v1.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains definitions for the original form of Residual Networks. 16 | 17 | The 'v1' residual networks (ResNets) implemented in this module were proposed 18 | by: 19 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 20 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 21 | 22 | Other variants were introduced in: 23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 25 | 26 | The networks defined in this module utilize the bottleneck building block of 27 | [1] with projection shortcuts only for increasing depths. They employ batch 28 | normalization *after* every weight layer. This is the architecture used by 29 | MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and 30 | ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1' 31 | architecture and the alternative 'v2' architecture of [2] which uses batch 32 | normalization *before* every weight layer in the so-called full pre-activation 33 | units. 34 | 35 | Typical use: 36 | 37 | from tensorflow.contrib.slim.python.slim.nets import 38 | resnet_v1 39 | 40 | ResNet-101 for image classification into 1000 classes: 41 | 42 | # inputs has shape [batch, 224, 224, 3] 43 | with slim.arg_scope(resnet_v1.resnet_arg_scope()): 44 | net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False) 45 | 46 | ResNet-101 for semantic segmentation into 21 classes: 47 | 48 | # inputs has shape [batch, 513, 513, 3] 49 | with slim.arg_scope(resnet_v1.resnet_arg_scope()): 50 | net, end_points = resnet_v1.resnet_v1_101(inputs, 51 | 21, 52 | is_training=False, 53 | global_pool=False, 54 | output_stride=16) 55 | """ 56 | 57 | from __future__ import absolute_import 58 | from __future__ import division 59 | from __future__ import print_function 60 | 61 | from tensorflow.contrib import layers 62 | from tensorflow.contrib.framework.python.ops import add_arg_scope 63 | from tensorflow.contrib.framework.python.ops import arg_scope 64 | from tensorflow.contrib.layers.python.layers import layers as layers_lib 65 | from tensorflow.contrib.layers.python.layers import utils 66 | from tensorflow.contrib.slim.python.slim.nets import resnet_utils 67 | from tensorflow.python.ops import math_ops 68 | from tensorflow.python.ops import nn_ops 69 | from tensorflow.python.ops import variable_scope 70 | 71 | resnet_arg_scope = resnet_utils.resnet_arg_scope 72 | 73 | 74 | @add_arg_scope 75 | def bottleneck(inputs, 76 | depth, 77 | depth_bottleneck, 78 | stride, 79 | rate=1, 80 | outputs_collections=None, 81 | scope=None): 82 | """Bottleneck residual unit variant with BN after convolutions. 83 | 84 | This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for 85 | its definition. Note that we use here the bottleneck variant which has an 86 | extra bottleneck layer. 87 | 88 | When putting together two consecutive ResNet blocks that use this unit, one 89 | should use stride = 2 in the last unit of the first block. 90 | 91 | Args: 92 | inputs: A tensor of size [batch, height, width, channels]. 93 | depth: The depth of the ResNet unit output. 94 | depth_bottleneck: The depth of the bottleneck layers. 95 | stride: The ResNet unit's stride. Determines the amount of downsampling of 96 | the units output compared to its input. 97 | rate: An integer, rate for atrous convolution. 98 | outputs_collections: Collection to add the ResNet unit output. 99 | scope: Optional variable_scope. 100 | 101 | Returns: 102 | The ResNet unit's output. 103 | """ 104 | with variable_scope.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: 105 | depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4) 106 | if depth == depth_in: 107 | shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') 108 | else: 109 | shortcut = layers.conv2d( 110 | inputs, 111 | depth, [1, 1], 112 | stride=stride, 113 | activation_fn=None, 114 | scope='shortcut') 115 | 116 | residual = layers.conv2d( 117 | inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1') 118 | residual = resnet_utils.conv2d_same( 119 | residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') 120 | residual = layers.conv2d( 121 | residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3') 122 | 123 | output = nn_ops.relu(shortcut + residual) 124 | 125 | return utils.collect_named_outputs(outputs_collections, sc.name, output) 126 | 127 | 128 | def resnet_v1(inputs, 129 | blocks, 130 | num_classes=None, 131 | is_training=True, 132 | global_pool=True, 133 | output_stride=None, 134 | include_root_block=True, 135 | reuse=None, 136 | scope=None): 137 | """Generator for v1 ResNet models. 138 | 139 | This function generates a family of ResNet v1 models. See the resnet_v1_*() 140 | methods for specific model instantiations, obtained by selecting different 141 | block instantiations that produce ResNets of various depths. 142 | 143 | Training for image classification on Imagenet is usually done with [224, 224] 144 | inputs, resulting in [7, 7] feature maps at the output of the last ResNet 145 | block for the ResNets defined in [1] that have nominal stride equal to 32. 146 | However, for dense prediction tasks we advise that one uses inputs with 147 | spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In 148 | this case the feature maps at the ResNet output will have spatial shape 149 | [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] 150 | and corners exactly aligned with the input image corners, which greatly 151 | facilitates alignment of the features to the image. Using as input [225, 225] 152 | images results in [8, 8] feature maps at the output of the last ResNet block. 153 | 154 | For dense prediction tasks, the ResNet needs to run in fully-convolutional 155 | (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all 156 | have nominal stride equal to 32 and a good choice in FCN mode is to use 157 | output_stride=16 in order to increase the density of the computed features at 158 | small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. 159 | 160 | Args: 161 | inputs: A tensor of size [batch, height_in, width_in, channels]. 162 | blocks: A list of length equal to the number of ResNet blocks. Each element 163 | is a resnet_utils.Block object describing the units in the block. 164 | num_classes: Number of predicted classes for classification tasks. If None 165 | we return the features before the logit layer. 166 | is_training: whether batch_norm layers are in training mode. 167 | global_pool: If True, we perform global average pooling before computing the 168 | logits. Set to True for image classification, False for dense prediction. 169 | output_stride: If None, then the output will be computed at the nominal 170 | network stride. If output_stride is not None, it specifies the requested 171 | ratio of input to output spatial resolution. 172 | include_root_block: If True, include the initial convolution followed by 173 | max-pooling, if False excludes it. 174 | reuse: whether or not the network and its variables should be reused. To be 175 | able to reuse 'scope' must be given. 176 | scope: Optional variable_scope. 177 | 178 | Returns: 179 | net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. 180 | If global_pool is False, then height_out and width_out are reduced by a 181 | factor of output_stride compared to the respective height_in and width_in, 182 | else both height_out and width_out equal one. If num_classes is None, then 183 | net is the output of the last ResNet block, potentially after global 184 | average pooling. If num_classes is not None, net contains the pre-softmax 185 | activations. 186 | end_points: A dictionary from components of the network to the corresponding 187 | activation. 188 | 189 | Raises: 190 | ValueError: If the target output_stride is not valid. 191 | """ 192 | with variable_scope.variable_scope( 193 | scope, 'resnet_v1', [inputs], reuse=reuse) as sc: 194 | end_points_collection = sc.original_name_scope + '_end_points' 195 | with arg_scope( 196 | [layers.conv2d, bottleneck, resnet_utils.stack_blocks_dense], 197 | outputs_collections=end_points_collection): 198 | with arg_scope([layers.batch_norm], is_training=is_training): 199 | net = inputs 200 | if include_root_block: 201 | if output_stride is not None: 202 | if output_stride % 4 != 0: 203 | raise ValueError('The output_stride needs to be a multiple of 4.') 204 | output_stride /= 4 205 | net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') 206 | net = layers_lib.max_pool2d(net, [3, 3], stride=2, scope='pool1') 207 | net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) 208 | if global_pool: 209 | # Global average pooling. 210 | net = math_ops.reduce_mean(net, [1, 2], name='pool5', keepdims=True) 211 | if num_classes is not None: 212 | net = layers.conv2d( 213 | net, 214 | num_classes, [1, 1], 215 | activation_fn=None, 216 | normalizer_fn=None, 217 | scope='logits') 218 | # Convert end_points_collection into a dictionary of end_points. 219 | end_points = utils.convert_collection_to_dict(end_points_collection) 220 | if num_classes is not None: 221 | end_points['predictions'] = layers_lib.softmax( 222 | net, scope='predictions') 223 | return net, end_points 224 | 225 | resnet_v1.default_image_size = 400 226 | 227 | 228 | def resnet_v1_block(scope, base_depth, num_units, stride): 229 | """Helper function for creating a resnet_v1 bottleneck block. 230 | 231 | Args: 232 | scope: The scope of the block. 233 | base_depth: The depth of the bottleneck layer for each unit. 234 | num_units: The number of units in the block. 235 | stride: The stride of the block, implemented as a stride in the last unit. 236 | All other units have stride=1. 237 | 238 | Returns: 239 | A resnet_v1 bottleneck block. 240 | """ 241 | return resnet_utils.Block(scope, bottleneck, [{ 242 | 'depth': base_depth * 4, 243 | 'depth_bottleneck': base_depth, 244 | 'stride': 1 245 | }] * (num_units - 1) + [{ 246 | 'depth': base_depth * 4, 247 | 'depth_bottleneck': base_depth, 248 | 'stride': stride 249 | }]) 250 | 251 | 252 | def resnet_v1_50(inputs, 253 | num_classes=None, 254 | is_training=True, 255 | global_pool=True, 256 | output_stride=None, 257 | reuse=None, 258 | scope='resnet_v1_50'): 259 | """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" 260 | blocks = [ 261 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 262 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), 263 | resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), 264 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 265 | ] 266 | return resnet_v1( 267 | inputs, 268 | blocks, 269 | num_classes, 270 | is_training, 271 | global_pool, 272 | output_stride, 273 | include_root_block=True, 274 | reuse=reuse, 275 | scope=scope) 276 | 277 | 278 | def resnet_v1_101(inputs, 279 | num_classes=None, 280 | is_training=True, 281 | global_pool=True, 282 | output_stride=None, 283 | reuse=None, 284 | scope='resnet_v1_101'): 285 | """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" 286 | blocks = [ 287 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 288 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), 289 | resnet_v1_block('block3', base_depth=256, num_units=23, stride=2), 290 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 291 | ] 292 | return resnet_v1( 293 | inputs, 294 | blocks, 295 | num_classes, 296 | is_training, 297 | global_pool, 298 | output_stride, 299 | include_root_block=True, 300 | reuse=reuse, 301 | scope=scope) 302 | 303 | 304 | def resnet_v1_152(inputs, 305 | num_classes=None, 306 | is_training=True, 307 | global_pool=True, 308 | output_stride=None, 309 | reuse=None, 310 | scope='resnet_v1_152'): 311 | """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" 312 | blocks = [ 313 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 314 | resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), 315 | resnet_v1_block('block3', base_depth=256, num_units=36, stride=2), 316 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 317 | ] 318 | return resnet_v1( 319 | inputs, 320 | blocks, 321 | num_classes, 322 | is_training, 323 | global_pool, 324 | output_stride, 325 | include_root_block=True, 326 | reuse=reuse, 327 | scope=scope) 328 | 329 | 330 | def resnet_v1_200(inputs, 331 | num_classes=None, 332 | is_training=True, 333 | global_pool=True, 334 | output_stride=None, 335 | reuse=None, 336 | scope='resnet_v1_200'): 337 | """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" 338 | blocks = [ 339 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 340 | resnet_v1_block('block2', base_depth=128, num_units=24, stride=2), 341 | resnet_v1_block('block3', base_depth=256, num_units=36, stride=2), 342 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 343 | ] 344 | return resnet_v1( 345 | inputs, 346 | blocks, 347 | num_classes, 348 | is_training, 349 | global_pool, 350 | output_stride, 351 | include_root_block=True, 352 | reuse=reuse, 353 | scope=scope) 354 | --------------------------------------------------------------------------------