├── README.md ├── input_data.py ├── model.py └── training.py /README.md: -------------------------------------------------------------------------------- 1 | # cats_vs_dogs 2 | 猫狗大战——图像识别 3 | 4 | 数据下载地址: 5 | [点击下载](https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data) 6 | 7 | 8 | -------------------------------------------------------------------------------- /input_data.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | 5 | train_dir = '/homw/wxr/hyx/cat_vs_dog/data/train/' 6 | def get_files(file_dir): 7 | ''' 8 | args: 9 | file_dir : file directory 10 | return: 11 | list of images and labels 12 | ''' 13 | cats = [] 14 | label_cats = [] 15 | dogs = [] 16 | label_dogs = [] 17 | for file in os.listdir(file_dir): 18 | name = file.split('.') 19 | if name[0] == 'cat': 20 | cats.append(file_dir+file) 21 | label_cats.append(0) 22 | else: 23 | dogs.append(file_dir+file) 24 | label_dogs.append(1) 25 | 26 | print('There are %d cats\n There are %d dogs' %(len(cats),len(dogs))) 27 | 28 | #hstack => axis = 1,eg: cats = [[123234]] dogs = [[321432]] 29 | # => [[123234321432]] 30 | # label_cats = [1,2,3] label_dogs = [3,2,1] 31 | # => [1,2,3,3,2,1] 32 | image_list = np.hstack((cats,dogs)) 33 | label_list = np.hstack((label_cats, label_dogs)) 34 | 35 | #[ 36 | # [aaa,bbb,ccc], 37 | # [0,0,1] 38 | # ] 39 | 40 | temp = np.array([image_list, label_list]) 41 | temp = temp.transpose() 42 | #[ 43 | #[aaa,0], 44 | #[bbb,0], 45 | #[ccc,1], 46 | #] 47 | np.random.shuffle(temp) 48 | 49 | image_list = list(temp[:,0]) 50 | label_list = list(temp[:,1]) 51 | label_list = [int(i) for i in label_list] 52 | 53 | return image_list, label_list 54 | 55 | 56 | def get_batch(image, label, image_W, image_H, batch_size, capacity): 57 | ''' 58 | args: 59 | images:list type 60 | label:list type 61 | image_W:image width 62 | image_H:image height 63 | batch_size:batch_size 64 | capacity:the maximum elements in queue 65 | return: 66 | image_batch: 4D tensor [batch_size , width , height , 3], dtype = tf.float32 67 | label_batch: 1D tensor [batch_size], dtype = tf.int32 68 | ''' 69 | image = tf.cast(image, tf.string) 70 | label = tf.cast(label, tf.int32) 71 | 72 | #make an input queue 73 | #[ 74 | #[aaa,bbb,ccc], 75 | #[0,1,0] 76 | #] 77 | print "image,label shape : ",image.get_shape(),label.get_shape() 78 | input_queue = tf.train.slice_input_producer([image ,label]) 79 | print "input_queue shape :",input_queue.get_shape() 80 | label = input_queue[1] 81 | image_contents = tf.read_file(input_queue[0]) 82 | image = tf.image.decode_jpeg(image_contents, channels =3) 83 | 84 | image = tf.image.per_image_standardization(image) 85 | 86 | image_batch, label_batch = tf.train.batch([image, label], 87 | batch_size = batch_size, 88 | num_threads = 64, 89 | capacity = capacity) 90 | 91 | label_batch = tf.reshape(label_batch, [batch_size]) 92 | image_batch = tf.cast(image_batch, tf.float32) 93 | 94 | return image_batch, label_batch 95 | 96 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | def inference(images, batch_size, n_classes): 3 | ''' 4 | build model 5 | args: 6 | images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels] 7 | return: 8 | output tensor with the computed logits, float, [batch_size, n_classes] 9 | ''' 10 | with tf.variable_scope('conv1') as scope: 11 | weights = tf.get_variable('weights', 12 | shape = [3,3,3,16], 13 | dtype = tf.float32, 14 | initializer = tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) 15 | biases = tf.get_variables('biases', 16 | shape = [16], 17 | dtype = tf.float32, 18 | initializer = tf.constant_initializer(0.1)) 19 | conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME') 20 | pre_activation = tf.nn.bias_add(conv, biases) 21 | conv1 = tf.nn.relu(pre_activation, name = scope.name) 22 | 23 | with tf.variable_scope('pooling1_lrn') as scope: 24 | pool1 = tf.nn.max_pool(conv1, ksize = [1,3,3,1], strides = [1,2,2,1], 25 | padding = 'SAME', name = 'pooling1') 26 | norm1 = tf.nn.lrn(pool1, depth_radius=4, bias = 1.0, alpha=0.001/9.0, 27 | beta=0.75,name='norm1') 28 | with tf.variable_scope('conv2') as scope: 29 | weights = tf.get_variable('weights', 30 | shape=[3,3,16,16], 31 | dtype=tf.float32, 32 | initializer = tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32)) 33 | biases = tf.get_variable('biases', 34 | shape=[16], 35 | dtype=tf.float32, 36 | initializer=tf.constant_initializer(0.1)) 37 | conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1], padding = 'SAME') 38 | pre_activation = tf.nn.bias_add(conv, biases) 39 | conv2 = tf.nn.relu(pre_activation, name='conv2') 40 | with tf.variable_scope('pooling2_lrn') as scope: 41 | norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha = 0.001/9.0, 42 | beta=0.75,name='norm2') 43 | pool2 = tf.nn.max_pool(norm2,ksize=[1,3,3,1],strides=[1,1,1,1], 44 | padding='SAME',name='pooling2') 45 | with tf.variable_scope('local3') as scope: 46 | reshape = tf.reshape(pool2,shape=[batch_size, -1]) 47 | dim = reshape.get_shape()[1].value 48 | weights = tf.get_variable('weights', 49 | shape=[dim,128], 50 | dtype=tf.float32, 51 | initializer=tf.truncated_normal_initalizer(stddev=0.005, dtype=tf.float32)) 52 | biases = tf.get_variable('biases', 53 | shape=[128], 54 | dtype=tf.float32, 55 | initializer=tf.constant_initializer(0.1)) 56 | local3 = tf.nn.relu(tf.matmul(reshape, weights)+biases, name=scope.name) 57 | with tf.variable_scope('local4') as scope: 58 | weights = tf.get_variable('weights', 59 | shape=[128.128], 60 | dtype=tf.float32, 61 | initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) 62 | biases = tf.get_variable('biases', 63 | shape=[128], 64 | dtype = tf.float32, 65 | initializer = tf.constant_initializer(0.1)) 66 | local4 = tf.nn.relu(tf.matmul(local3,weights)+biases, name='local4') 67 | with tf.variable_scope('softmax_layer') as scope: 68 | weights = tf.get_variable('softmax_linear', 69 | shape=[128,n_classes], 70 | dtype=tf.float32, 71 | initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.lofat32)) 72 | biases = tf.get_variable('biases', 73 | shape=[n_classes], 74 | dtype=tf.float32, 75 | initializer=tf.constant_initializer(0.1)) 76 | softmax_liner=tf.add(tf.matmul(local4, weights),biases,name='softmax_linear') 77 | return softmax_layer 78 | 79 | 80 | def losses(logits,labels): 81 | ''' 82 | compute loss from logits and labels 83 | args: 84 | logits: logit tensor, float , [batch_size, n_classes] 85 | labels: label tensor, tf.int32, [batch_size] 86 | returns: 87 | loss tensor of float type 88 | ''' 89 | 90 | with tf.variable_scope('loss') as scope: 91 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 92 | logits = logits, labels = labels,name='xentropy_re_example') 93 | loss = tf.reduce_mean(cross_entropy, name='loss') 94 | tf.summary.scalar(scope.name+'/loss',loss) 95 | return loss 96 | 97 | 98 | def training(loss, learning_rate): 99 | ''' 100 | training ops, the op returned by this function is what must be passed to 'sess.run()' 101 | call to cause the model to train. 102 | args: 103 | loss : loss tensor, from losses() 104 | returns: 105 | train_op: the op for training 106 | ''' 107 | with tf.name_scope('optimizer'): 108 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 109 | global_step = tf.Variable(0,name='global_step',trainable=False) 110 | train_op = optimizer.minimize(loss, global_step = global_step) 111 | return train_op 112 | 113 | 114 | def evalutaion(logits, lables): 115 | ''' 116 | evaluate the quality of the logits at prediction the lable. 117 | args: 118 | logits: logits tensor, float, [batch_size, num_classes] 119 | lables: lables tensor,tf.int32, [batch_size], with values in the 120 | range [0,num_classes] 121 | returns: 122 | a scalar int32 tensor with the number of examples(out of batch_size) 123 | that were predicted correctly. 124 | ''' 125 | with tf.variable_scope('accuracy') as scope: 126 | correct = tf.nn.in_top_k(logits, labels,1) 127 | correct = tf.cast(correct, tf.float16) 128 | accuracy = tf.reduce_mean(correct) 129 | tf.summary.scalar(scope.name+'/accuracy',accuracy) 130 | return accuracy 131 | 132 | -------------------------------------------------------------------------------- /training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import tensorflow as tf 4 | import input_data 5 | import model 6 | 7 | #%% 8 | 9 | N_CLASSES = 2 10 | IMG_W = 208 # resize the image, if the input image is too large, training will be very slow. 11 | IMG_H = 208 12 | BATCH_SIZE = 16 13 | CAPACITY = 2000 14 | MAX_STEP = 10000 # with current parameters, it is suggested to use MAX_STEP>10k 15 | learning_rate = 0.0001 # with current parameters, it is suggested to use learning rate<0.0001 16 | 17 | 18 | #%% 19 | def run_training(): 20 | 21 | # you need to change the directories to yours. 22 | train_dir = '/home/wxr/hyx/cat_vs_dog/data/train/' 23 | logs_train_dir = '/home/wxr/hyx/cat_vs_dog/logs/train/' 24 | 25 | train, train_label = input_data.get_files(train_dir) 26 | 27 | train_batch, train_label_batch = input_data.get_batch(train, 28 | train_label, 29 | IMG_W, 30 | IMG_H, 31 | BATCH_SIZE, 32 | CAPACITY) 33 | #model.inference 34 | #return the model => softmax layer 35 | train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) 36 | #model.losses 37 | #return loss 38 | train_loss = model.losses(train_logits, train_label_batch) 39 | train_op = model.trainning(train_loss, learning_rate) 40 | train__acc = model.evaluation(train_logits, train_label_batch) 41 | 42 | summary_op = tf.summary.merge_all() 43 | sess = tf.Session() 44 | train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) 45 | saver = tf.train.Saver() 46 | 47 | sess.run(tf.global_variables_initializer()) 48 | coord = tf.train.Coordinator() 49 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 50 | 51 | try: 52 | for step in np.arange(MAX_STEP): 53 | if coord.should_stop(): 54 | break 55 | _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) 56 | 57 | if step % 50 == 0: 58 | print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc)) 59 | summary_str = sess.run(summary_op) 60 | train_writer.add_summary(summary_str, step) 61 | 62 | if step % 2000 == 0 or (step + 1) == MAX_STEP: 63 | checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') 64 | saver.save(sess, checkpoint_path, global_step=step) 65 | 66 | except tf.errors.OutOfRangeError: 67 | print('Done training -- epoch limit reached') 68 | finally: 69 | coord.request_stop() 70 | 71 | coord.join(threads) 72 | sess.close() 73 | --------------------------------------------------------------------------------