├── BCNN ├── README.txt ├── bcnn.py └── create_h5_dataset.py ├── DIP Project - Final Report.pdf ├── DIP Project - Slides.pptx ├── Interpret_CUB_200_2011.docx ├── README.md ├── Transfer_Learning ├── Figure_CUB200.png ├── README.txt ├── Transfer_Model ├── cub_util.py └── transfer.py ├── 大作业布置.pdf └── 细分类讲解.pdf /BCNN/README.txt: -------------------------------------------------------------------------------- 1 | 文件说明: 2 | bcnn/create_h5_dataset.py 将图片添加label后保存成h5py文件供程序读取 3 | bcnn/bcnn.py读入label和图片,用数据集进行bcnn训练,用测试集的前32张图片做验证,最后用测试集测试准确率 4 | 使用说明: 5 | 将原本数据集中的images文件夹放入bcnn/中,去掉测试集中图片,将文件夹重命名为train 6 | 将原本数据集中的images文件夹放入bcnn/中,去掉训练集中图片,将文件夹重命名为test 7 | 进入bcnn文件夹,运行python create_h5_dataset.py 8 | 下载vgg16_weights.npz放入bcnn/中,在该文件夹下运行python bcnn.py,进行最后全连接层的训练。 9 | 将load_weights中注释的语句恢复,并设置main中的isFineTune=True,再次运行python bcnn.py,进行卷积层和全连接层参数的训练。并可以得到最后的测试准确率。 10 | -------------------------------------------------------------------------------- /BCNN/bcnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | import numpy as np 4 | import tflearn 5 | from tflearn.data_preprocessing import ImagePreprocessing 6 | from tflearn.data_augmentation import ImageAugmentation 7 | import os 8 | from tflearn.data_utils import shuffle 9 | from skimage import transform 10 | 11 | import pickle 12 | from tflearn.data_utils import image_preloader 13 | import h5py 14 | import math 15 | import logging 16 | import random 17 | import time 18 | import scipy 19 | 20 | 21 | 22 | 23 | def random_flip_right_to_left(image_batch): 24 | result = [] 25 | for n in range(image_batch.shape[0]): 26 | if bool(random.getrandbits(1)): 27 | result.append(image_batch[n][:,::-1,:]) 28 | elif bool(random.getrandbits(1)): 29 | result.append(image_batch[n][::-1,:,:]) 30 | else: 31 | result.append(image_batch[n]) 32 | return result 33 | 34 | def random_crop(image_batch): 35 | result = [] 36 | for n in range(image_batch.shape[0]): 37 | start_x = random.randint(0,19) 38 | start_y = random.randint(0,19) 39 | result.append(scipy.misc.imresize(image_batch[n][start_y:start_y+224,start_x:start_x+224,:],(224,224,3))) 40 | return np.array(result) 41 | 42 | class vgg16: 43 | def __init__(self, imgs, weights=None, sess=None, trainable=True, drop_prob=None): 44 | self.imgs = imgs 45 | self.last_layer_parameters = [] 46 | self.parameters = [] 47 | self.convlayers(trainable) 48 | self.fc_layers() 49 | self.weight_file = weights 50 | self.drop_prob=drop_prob 51 | #self.load_weights(weights, sess) 52 | 53 | 54 | def convlayers(self,trainable): 55 | 56 | # zero-mean input 57 | with tf.name_scope('preprocess') as scope: 58 | mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 59 | images = self.imgs-mean 60 | print('Adding Data Augmentation') 61 | 62 | 63 | # conv1_1 64 | with tf.name_scope('conv1_1') as scope: 65 | kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, 66 | stddev=1e-1), trainable=trainable, name='weights') 67 | conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') 68 | biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), 69 | trainable=trainable, name='biases') 70 | out = tf.nn.bias_add(conv, biases) 71 | self.conv1_1 = tf.nn.relu(out, name=scope) 72 | self.parameters += [kernel, biases] 73 | 74 | # conv1_2 75 | with tf.name_scope('conv1_2') as scope: 76 | kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32, 77 | stddev=1e-1), trainable=trainable, name='weights') 78 | conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME') 79 | biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), 80 | trainable=trainable, name='biases') 81 | out = tf.nn.bias_add(conv, biases) 82 | self.conv1_2 = tf.nn.relu(out, name=scope) 83 | self.parameters += [kernel, biases] 84 | 85 | # pool1 86 | self.pool1 = tf.nn.max_pool(self.conv1_2, 87 | ksize=[1, 2, 2, 1], 88 | strides=[1, 2, 2, 1], 89 | padding='SAME', 90 | name='pool1') 91 | 92 | # conv2_1 93 | with tf.name_scope('conv2_1') as scope: 94 | kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32, 95 | stddev=1e-1), trainable=trainable, name='weights') 96 | conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME') 97 | biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), 98 | trainable=trainable, name='biases') 99 | out = tf.nn.bias_add(conv, biases) 100 | self.conv2_1 = tf.nn.relu(out, name=scope) 101 | self.parameters += [kernel, biases] 102 | 103 | # conv2_2 104 | with tf.name_scope('conv2_2') as scope: 105 | kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32, 106 | stddev=1e-1), trainable=trainable, name='weights') 107 | conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME') 108 | biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=trainable, name='biases') 109 | out = tf.nn.bias_add(conv, biases) 110 | self.conv2_2 = tf.nn.relu(out, name=scope) 111 | self.parameters += [kernel, biases] 112 | 113 | # pool2 114 | self.pool2 = tf.nn.max_pool(self.conv2_2, 115 | ksize=[1, 2, 2, 1], 116 | strides=[1, 2, 2, 1], 117 | padding='SAME', 118 | name='pool2') 119 | 120 | # conv3_1 121 | with tf.name_scope('conv3_1') as scope: 122 | kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32, 123 | stddev=1e-1), trainable=trainable, name='weights') 124 | conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME') 125 | biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), 126 | trainable=trainable, name='biases') 127 | out = tf.nn.bias_add(conv, biases) 128 | self.conv3_1 = tf.nn.relu(out, name=scope) 129 | self.parameters += [kernel, biases] 130 | 131 | # conv3_2 132 | with tf.name_scope('conv3_2') as scope: 133 | kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, 134 | stddev=1e-1), trainable=trainable, name='weights') 135 | conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME') 136 | biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), 137 | trainable=trainable, name='biases') 138 | out = tf.nn.bias_add(conv, biases) 139 | self.conv3_2 = tf.nn.relu(out, name=scope) 140 | self.parameters += [kernel, biases] 141 | 142 | # conv3_3 143 | with tf.name_scope('conv3_3') as scope: 144 | kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, 145 | stddev=1e-1), trainable=trainable, name='weights') 146 | conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME') 147 | biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), 148 | trainable=trainable, name='biases') 149 | out = tf.nn.bias_add(conv, biases) 150 | self.conv3_3 = tf.nn.relu(out, name=scope) 151 | self.parameters += [kernel, biases] 152 | 153 | # pool3 154 | self.pool3 = tf.nn.max_pool(self.conv3_3, 155 | ksize=[1, 2, 2, 1], 156 | strides=[1, 2, 2, 1], 157 | padding='SAME', 158 | name='pool3') 159 | 160 | # conv4_1 161 | with tf.name_scope('conv4_1') as scope: 162 | kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32, 163 | stddev=1e-1), trainable=trainable, name='weights') 164 | conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME') 165 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 166 | trainable=trainable, name='biases') 167 | out = tf.nn.bias_add(conv, biases) 168 | self.conv4_1 = tf.nn.relu(out, name=scope) 169 | self.parameters += [kernel, biases] 170 | 171 | # conv4_2 172 | with tf.name_scope('conv4_2') as scope: 173 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 174 | stddev=1e-1), trainable=trainable, name='weights') 175 | conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME') 176 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 177 | trainable=trainable, name='biases') 178 | out = tf.nn.bias_add(conv, biases) 179 | self.conv4_2 = tf.nn.relu(out, name=scope) 180 | self.parameters += [kernel, biases] 181 | 182 | # conv4_3 183 | with tf.name_scope('conv4_3') as scope: 184 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 185 | stddev=1e-1), trainable=trainable, name='weights') 186 | conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME') 187 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 188 | trainable=trainable, name='biases') 189 | out = tf.nn.bias_add(conv, biases) 190 | self.conv4_3 = tf.nn.relu(out, name=scope) 191 | self.parameters += [kernel, biases] 192 | 193 | # pool4 194 | self.pool4 = tf.nn.max_pool(self.conv4_3, 195 | ksize=[1, 2, 2, 1], 196 | strides=[1, 2, 2, 1], 197 | padding='SAME', 198 | name='pool4') 199 | 200 | # conv5_1 201 | with tf.name_scope('conv5_1') as scope: 202 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 203 | stddev=1e-1), trainable=trainable, name='weights') 204 | conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME') 205 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 206 | trainable=trainable, name='biases') 207 | out = tf.nn.bias_add(conv, biases) 208 | self.conv5_1 = tf.nn.relu(out, name=scope) 209 | self.parameters += [kernel, biases] 210 | 211 | # conv5_2 212 | with tf.name_scope('conv5_2') as scope: 213 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 214 | stddev=1e-1), trainable=trainable, name='weights') 215 | conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME') 216 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 217 | trainable=trainable, name='biases') 218 | out = tf.nn.bias_add(conv, biases) 219 | self.conv5_2 = tf.nn.relu(out, name=scope) 220 | self.parameters += [kernel, biases] 221 | 222 | # conv5_3 223 | with tf.name_scope('conv5_3') as scope: 224 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 225 | stddev=1e-1), trainable=trainable, name='weights') 226 | conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME') 227 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 228 | trainable=trainable, name='biases') 229 | out = tf.nn.bias_add(conv, biases) 230 | self.conv5_3 = tf.nn.relu(out, name=scope) 231 | self.parameters += [kernel, biases] 232 | 233 | self.InnerPro = tf.einsum('ijkm,ijkn->imn',self.conv5_3,self.conv5_3) 234 | self.InnerPro = tf.reshape(self.InnerPro,[-1,512*512]) 235 | self.InnerPro = tf.divide(self.InnerPro,14.0*14.0) 236 | self.ySsqrt = tf.multiply(tf.sign(self.InnerPro),tf.sqrt(tf.abs(self.InnerPro)+1e-12)) 237 | self.zL2 = tf.nn.l2_normalize(self.ySsqrt, dim=1) 238 | 239 | 240 | 241 | 242 | def fc_layers(self): 243 | 244 | with tf.name_scope('fc') as scope: 245 | 246 | fcw = tf.get_variable('weights', [512*512, 200], 247 | initializer=tf.contrib.layers.xavier_initializer(), trainable=True) 248 | fcb = tf.Variable(tf.constant(1.0, shape=[200], dtype=tf.float32), 249 | name='biases', trainable=True) 250 | self.fcl = tf.nn.bias_add(tf.matmul(self.zL2, 251 | tf.contrib.layers.dropout(fcw,self.drop_prob)), 252 | tf.contrib.layers.dropout(fcb,self.drop_prob)) 253 | self.last_layer_parameters += [fcw, fcb] 254 | self.parameters += [fcw, fcb] 255 | 256 | def load_weights(self, sess): 257 | #saver=tf.train.Saver(self.parameters) 258 | #save_path="save" 259 | #saver.restore(sess,save_path) 260 | weights = np.load(self.weight_file) 261 | #return 262 | keys = sorted(weights.keys()) 263 | for i, k in enumerate(keys): 264 | removed_layer_variables = ['fc6_W','fc6_b','fc7_W','fc7_b','fc8_W','fc8_b'] 265 | if not k in removed_layer_variables: 266 | print(k) 267 | print("",i, k, np.shape(weights[k])) 268 | sess.run(self.parameters[i].assign(weights[k])) 269 | 270 | if __name__ == '__main__': 271 | 272 | train_data = h5py.File('new_train.h5', 'r') 273 | val_data = h5py.File('new_val.h5', 'r') 274 | 275 | 276 | print('Input data read complete') 277 | 278 | X_train, Y_train = train_data['X'], train_data['Y'] 279 | X_val, Y_val = val_data['X'], val_data['Y'] 280 | valsize = len(X_val) 281 | valsize = int(valsize*0.1) 282 | X_val, Y_val = shuffle(X_val, Y_val) 283 | X_val, Y_val = X_val[0:32], Y_val[0:32] 284 | print("Data shapes -- (train, val)", X_train.shape, X_val.shape) 285 | X_train, Y_train = shuffle(X_train, Y_train) 286 | 287 | 288 | sess = tf.Session() 289 | imgs = tf.placeholder(tf.float32, [None, 224, 224, 3]) 290 | target = tf.placeholder("float", [None, 200]) 291 | drop_prob = tf.placeholder("float") 292 | isFineTune=False 293 | vgg = vgg16(imgs, 'vgg16_weights.npz', sess, isFineTune, drop_prob) 294 | 295 | 296 | 297 | print('VGG network created') 298 | 299 | 300 | loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=vgg.fcl, labels=target)) 301 | learning_rate_wft = tf.placeholder(tf.float32, shape=[]) 302 | learning_rate_woft = tf.placeholder(tf.float32, shape=[]) 303 | 304 | if isFineTune: 305 | optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9).minimize(loss) 306 | else: optimizer = tf.train.MomentumOptimizer(learning_rate=0.9, momentum=0.9).minimize(loss) 307 | ###optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss) 308 | correct_prediction = tf.equal(tf.argmax(vgg.fcl,1), tf.argmax(target,1)) 309 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 310 | 311 | num_correct_preds = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) 312 | 313 | sess.run(tf.global_variables_initializer()) 314 | 315 | vgg.load_weights(sess) 316 | 317 | batch_size = 32 318 | 319 | 320 | print('Starting training') 321 | 322 | lr = 1.0 323 | finetune_step = -1 324 | base_lr = 1.0 325 | break_training_epoch = 15 326 | best_accuracy = 0.0 327 | for epoch in range(100): 328 | avg_cost = 0. 329 | total_batch = int(len(X_train)/batch_size) 330 | X_train, Y_train = shuffle(X_train, Y_train) 331 | 332 | 333 | 334 | 335 | for i in range(total_batch): 336 | batch_xs, batch_ys = X_train[i*batch_size:i*batch_size+batch_size], Y_train[i*batch_size:i*batch_size+batch_size] 337 | 338 | batch_xs = random_crop(batch_xs) 339 | batch_xs = random_flip_right_to_left(batch_xs) 340 | 341 | start = time.time() 342 | [cp,cost,opt] = sess.run([correct_prediction,loss,optimizer], feed_dict={imgs: batch_xs, target: batch_ys, drop_prob:0.8}) 343 | if i%20==0: 344 | print('Last layer training, time to run optimizer for batch size:', batch_size,'is --> ',time.time()-start,'seconds',"loss:",cost,"correct_prediction",cp) 345 | 346 | if i % 100 == 0: 347 | #print ('Learning rate: ', (str(lr))) 348 | if epoch <= finetune_step: 349 | print("Training last layer of BCNN_DD") 350 | 351 | print("Epoch:", '%03d' % (epoch+1), "Step:", '%03d' % i,"Loss:", str(cost)) 352 | print("Training Accuracy:", sess.run([correct_prediction,accuracy],feed_dict={imgs: batch_xs, target: batch_ys, drop_prob:0.8})) 353 | 354 | val_batch_size = 32 355 | total_val_count = len(X_val) 356 | correct_val_count = 0 357 | val_loss = 0.0 358 | total_val_batch = int(total_val_count/val_batch_size) 359 | for i in range(total_val_batch): 360 | batch_val_x, batch_val_y = X_val[i*val_batch_size:i*val_batch_size+val_batch_size], Y_val[i*val_batch_size:i*val_batch_size+val_batch_size] 361 | val_loss += sess.run(loss, feed_dict={imgs: batch_val_x, target: batch_val_y, drop_prob:1.0}) 362 | 363 | pred = sess.run(num_correct_preds, feed_dict = {imgs: batch_val_x, target: batch_val_y, drop_prob:1.0}) 364 | correct_val_count+=pred 365 | acc = 100.0*correct_val_count/(1.0*total_val_count) 366 | if acc>best_accuracy: 367 | best_accuracy = acc 368 | saver=tf.train.Saver(vgg.parameters) 369 | save_path="save" 370 | saver.save(sess,save_path) 371 | print("##############################") 372 | print("Validation Loss:", val_loss) 373 | print("correct_val_count,", correct_val_count, "total_val_count", total_val_count) 374 | print("Validation Data Accuracy:", acc) 375 | print("##############################") 376 | 377 | 378 | 379 | test_data = h5py.File('new_test.h5', 'r') 380 | X_test, Y_test = test_data['X'], test_data['Y'] 381 | total_test_count = len(X_test) 382 | correct_test_count = 0 383 | test_batch_size = 10 384 | total_test_batch = int(total_test_count/test_batch_size) 385 | for i in range(total_test_batch): 386 | batch_test_x, batch_test_y = X_test[i*test_batch_size:i*test_batch_size+test_batch_size], Y_test[i*test_batch_size:i*test_batch_size+test_batch_size] 387 | 388 | pred = sess.run(num_correct_preds, feed_dict = {imgs: batch_test_x, target: batch_test_y, drop_prob:1.0}) 389 | correct_test_count+=pred 390 | 391 | print("##############################") 392 | print("correct_test_count,",correct_test_count," total_test_count", total_test_count) 393 | print("accuracy :", 100.0*correct_test_count/(1.0*total_test_count)) 394 | print("##############################") 395 | 396 | 397 | 398 | -------------------------------------------------------------------------------- /BCNN/create_h5_dataset.py: -------------------------------------------------------------------------------- 1 | from tflearn.data_utils import build_hdf5_image_dataset 2 | import h5py 3 | 4 | trainset = "train" 5 | testset = "test" 6 | build_hdf5_image_dataset(testset, image_shape=(224, 224), 7 | mode='folder', output_path='new_test.h5', 8 | categorical_labels=True, normalize=False) 9 | 10 | build_hdf5_image_dataset(testset, image_shape=(224, 224), 11 | mode='folder', output_path='new_val.h5', 12 | categorical_labels=True, normalize=False) 13 | 14 | print('Done creating new_test.h5') 15 | build_hdf5_image_dataset(trainset, image_shape=(224, 224), 16 | mode='folder', output_path='new_train.h5', 17 | categorical_labels=True, normalize=False) 18 | print ('Done creating new_train.h5') 19 | 20 | -------------------------------------------------------------------------------- /DIP Project - Final Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/DIP Project - Final Report.pdf -------------------------------------------------------------------------------- /DIP Project - Slides.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/DIP Project - Slides.pptx -------------------------------------------------------------------------------- /Interpret_CUB_200_2011.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/Interpret_CUB_200_2011.docx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Digital-Image-Processing-Project 2 | 数字图像处理大作业,图像细粒度分类,CUB-200-2011,Peking University 3 | -------------------------------------------------------------------------------- /Transfer_Learning/Figure_CUB200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/Transfer_Learning/Figure_CUB200.png -------------------------------------------------------------------------------- /Transfer_Learning/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/Transfer_Learning/README.txt -------------------------------------------------------------------------------- /Transfer_Learning/Transfer_Model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/Transfer_Learning/Transfer_Model -------------------------------------------------------------------------------- /Transfer_Learning/cub_util.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2018/5/21 4 | # @Author : yangguofeng 5 | # @File : cub_util.py 6 | # @Software: Sublime Test 3 7 | 8 | import os 9 | import numpy as np 10 | from scipy.misc import imread, imresize 11 | 12 | class CUB200(object): 13 | def __init__(self, path, image_size=(224, 224)): 14 | self._path = path 15 | self._size = image_size 16 | 17 | def _classes(self): 18 | return os.listdir(self._path) 19 | 20 | def _load_image(self, category, im_name): 21 | return imresize(imread(os.path.join(self._path, category, im_name), mode="RGB"), self._size) 22 | 23 | def load_dataset(self, num_per_class=None): 24 | classes = self._classes() 25 | all_images = [] 26 | all_labels = [] 27 | for c in classes: 28 | class_images = os.listdir(os.path.join(self._path, c)) 29 | if num_per_class is not None: 30 | class_images = np.random.choice(class_images, num_per_class) 31 | for image_name in class_images: 32 | all_images.append(self._load_image(c, image_name)) 33 | all_labels.append(c) 34 | return np.array(all_images).astype(float), np.array(all_labels) 35 | 36 | if __name__ == "__main__": 37 | import matplotlib.pyplot as plt 38 | DATA_DIR = os.path.expanduser(os.path.join("DATA_DIR", "CUB_200_2011")) 39 | CUB_DIR = os.path.join(DATA_DIR, "CUB_200_2011", "images") 40 | X, lbl = CUB200(CUB_DIR).load_dataset() 41 | n = X.shape[0] 42 | rnd_birds = np.vstack([np.hstack([X[np.random.choice(n)] for i in range(20)]) 43 | for j in range(10)]) 44 | plt.figure(figsize=(6, 6)) 45 | plt.imshow(rnd_birds / 255) 46 | plt.gca().get_xaxis().set_visible(False) 47 | plt.gca().get_yaxis().set_visible(False) 48 | plt.title("CUB_200_2011 200 Birds", fontsize=30) 49 | plt.show() -------------------------------------------------------------------------------- /Transfer_Learning/transfer.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 # -*- coding: utf-8 -*- # @Time : 2018/5/21 # @Author : yangguofeng # @File : transfer.py # @Software: Sublime Test 3 import os import pandas as pd import numpy as np from sklearn.svm import LinearSVC from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score from sklearn.pipeline import make_pipeline from sklearn.preprocessing import LabelEncoder from sklearn.externals import joblib from mlxtend.classifier import StackingClassifier from mlxtend.feature_selection import ColumnSelector import matplotlib import matplotlib.pyplot as plt import tensorflow as tf import keras from keras.applications import VGG19, InceptionV3, Xception, ResNet50 from keras.applications.imagenet_utils import preprocess_input as preprocess_type1 from keras.applications.inception_v3 import preprocess_input as preprocess_type2 from cub_util import CUB200 NUM_CLASSES = 200 DATA_DIR = os.path.expanduser(os.path.join("/home/guofeng/yangguofeng/Transfer_Learning", "CUB_200_2011")) CUB_DIR = os.path.join(DATA_DIR, "CUB_200_2011", "images") FEATURES_DIR = os.path.join(DATA_DIR, "CUB_200_2011", "features") assert os.path.exists(CUB_DIR) # Load Data def load_features_compute_once(model, im_size, preprocess, save_path): if os.path.exists(save_path): data = pd.read_csv(save_path, compression='gzip', header=0, index_col=0) X = data.values y = data.index.values else: X, y = CUB200(CUB_DIR, image_size=im_size).load_dataset() X = model(include_top=False, weights="imagenet", pooling='avg').predict(preprocess(X)) pd.DataFrame(X, index=y).to_csv(save_path, compression='gzip', header=True, index=True) return X, y # Build ResNet50 Model X_resnet, y = load_features_compute_once(ResNet50, (244, 244), preprocess_type1, os.path.join(FEATURES_DIR, "CUB200_resnet")) X_resnet.shape clf = LinearSVC() # Evaluation ResNet50 Model results = cross_val_score(clf, X_resnet, y, cv=3, n_jobs=-1) print(results) print("Overall accuracy: {:.3}".format(np.mean(results) * 100.)) # Build Stacking Model X_resnet, y = load_features_compute_once(ResNet50, (244, 244), preprocess_type1, os.path.join(FEATURES_DIR, "CUB200_resnet")) X_vgg, _ = load_features_compute_once(VGG19, (244, 244), preprocess_type1, os.path.join(FEATURES_DIR, "CUB200_VGG19")) X_incept, _ = load_features_compute_once(InceptionV3, (299, 299), preprocess_type2, os.path.join(FEATURES_DIR, "CUB200_inception")) X_xcept, _ = load_features_compute_once(Xception, (299, 299), preprocess_type2, os.path.join(FEATURES_DIR, "CUB200_xception")) X_all = np.hstack([X_vgg, X_resnet, X_incept, X_xcept]) inx = np.cumsum([0] + [X_vgg.shape[1], X_resnet.shape[1], X_incept.shape[1], X_xcept.shape[1]]) y = LabelEncoder().fit_transform(y) base_classifier = LogisticRegression meta_classifier = LinearSVC pipes = [make_pipeline(ColumnSelector(cols=list(range(inx[i], inx[i+1]))), base_classifier()) for i in range(4)] stacking_classifier = StackingClassifier(classifiers=pipes, meta_classifier=meta_classifier(), use_probas=True, average_probas=True, verbose=1) # Evaluation Stacking Model results = cross_val_score(stacking_classifier, X_all, y, cv=3, n_jobs=-1) print(results) print("Overall accuracy: {:.3}".format(np.mean(results) * 100.)) # Model Save #joblib.dump(stacking_classifier,"Transfer_Model") # Load Model #Model = joblib.load("Transfer_Model") # Predict #results= Model.predit(test_X) #print(results) #test_X为特征集 -------------------------------------------------------------------------------- /大作业布置.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/大作业布置.pdf -------------------------------------------------------------------------------- /细分类讲解.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tectal/Digital-Image-Processing-Project/3e4dd5997c1e9425ba5f13c22119e109ca27d2b1/细分类讲解.pdf --------------------------------------------------------------------------------