├── paper.pdf
├── README.md
├── mixup_generator.py
├── baseline_resnet.py
├── WarmUp_LR.py
├── cosine_epoch.py
├── mixup.py
└── cosine_batch.py


/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sherdencooper/tricks-in-deeplearning/HEAD/paper.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tricks-in-deeplearning
 2 | 
 3 | This repo is part work of https://github.com/SJTU-DL-lab/Bag_of_Tricks_CNN and the pytorch implementation achieves about 96.6% acc.
 4 | 
 5 | Using different tricks to improve performance of resetnet by Keras
 6 | 
 7 | Paper：https://arxiv.org/abs/1812.01187
 8 | 
 9 | Resnet model and other CNN models implemented by keras can be found from: https://github.com/BIGBALLON/cifar-10-cnn
10 | 
11 | Train baseline Resnet32 model （done)  accuracy: 91.64%
12 | 
13 | Adding warmup LR (done) accracy:92.32%(+0.68%)
14 | 
15 | Adding cosine decay（done) accuracy:93.01%（+0.69%）
16 | 
17 | Adding cosine decay based on batch (done). But it does not improve for accuracy:92.93%
18 | 
19 | Adding mixup(done) accuracy:94.10%(+1.09%)
20 | 
21 | I tried label smoothing but it does not improve. According to https://www.researchgate.net/publication/327004087_Empirical_study_on_label_smoothing_in_neural_networks, label smoothing is not suitable for cifar 10.
22 | 
23 | Using smaller batch size :accuracy 94.38%(+0.28%)
24 | 
25 | Using resnet110: accuracy 95.21%(+0.83%)
26 | 


--------------------------------------------------------------------------------
/mixup_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class MixupGenerator():
 5 |     def __init__(self, X_train, y_train, batch_size=32, alpha=0.2, shuffle=True, datagen=None):
 6 |         self.X_train = X_train
 7 |         self.y_train = y_train
 8 |         self.batch_size = batch_size
 9 |         self.alpha = alpha
10 |         self.shuffle = shuffle
11 |         self.sample_num = len(X_train)
12 |         self.datagen = datagen
13 | 
14 |     def __call__(self):
15 |         while True:
16 |             indexes = self.__get_exploration_order()
17 |             itr_num = int(len(indexes) // (self.batch_size * 2))
18 | 
19 |             for i in range(itr_num):
20 |                 batch_ids = indexes[i * self.batch_size * 2:(i + 1) * self.batch_size * 2]
21 |                 X, y = self.__data_generation(batch_ids)
22 | 
23 |                 yield X, y
24 | 
25 |     def __get_exploration_order(self):
26 |         indexes = np.arange(self.sample_num)
27 | 
28 |         if self.shuffle:
29 |             np.random.shuffle(indexes)
30 | 
31 |         return indexes
32 | 
33 |     def __data_generation(self, batch_ids):
34 |         _, h, w, c = self.X_train.shape
35 |         l = np.random.beta(self.alpha, self.alpha, self.batch_size)
36 |         X_l = l.reshape(self.batch_size, 1, 1, 1)
37 |         y_l = l.reshape(self.batch_size, 1)
38 | 
39 |         X1 = self.X_train[batch_ids[:self.batch_size]]
40 |         X2 = self.X_train[batch_ids[self.batch_size:]]
41 |         X = X1 * X_l + X2 * (1 - X_l)
42 | 
43 |         if self.datagen:
44 |             for i in range(self.batch_size):
45 |                 X[i] = self.datagen.random_transform(X[i])
46 |                 X[i] = self.datagen.standardize(X[i])
47 | 
48 |         if isinstance(self.y_train, list):
49 |             y = []
50 | 
51 |             for y_train_ in self.y_train:
52 |                 y1 = y_train_[batch_ids[:self.batch_size]]
53 |                 y2 = y_train_[batch_ids[self.batch_size:]]
54 |                 y.append(y1 * y_l + y2 * (1 - y_l))
55 |         else:
56 |             y1 = self.y_train[batch_ids[:self.batch_size]]
57 |             y2 = self.y_train[batch_ids[self.batch_size:]]
58 |             y = y1 * y_l + y2 * (1 - y_l)
59 | 
60 |         return X, y
61 | 


--------------------------------------------------------------------------------
/baseline_resnet.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import numpy as np
  3 | from keras.layers import Dense
  4 | from keras.layers import Input
  5 | from keras.layers import Conv2D
  6 | from keras.layers import add
  7 | from keras.layers import Activation
  8 | from keras.layers import GlobalAveragePooling2D
  9 | from keras.models import Model
 10 | from keras.datasets import cifar10
 11 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint
 12 | from keras import optimizers
 13 | from keras.preprocessing.image import ImageDataGenerator
 14 | from keras import regularizers
 15 | from keras.layers import BatchNormalization
 16 | import tensorflow as tf
 17 | from keras.backend.tensorflow_backend import set_session
 18 | 
 19 | '''
 20 | Tensorflow backend default
 21 | If you want to use th backend:
 22 | from keras import backend as K
 23 | '''
 24 | 
 25 | config = tf.ConfigProto()         #using GPU
 26 | config.gpu_options.allow_growth = True
 27 | sess = tf.Session(config=config)
 28 | 
 29 | STACK_NUM = 5
 30 | NUM_CLASSES = 10
 31 | WIDTH = 32
 32 | HEIGHT = 32
 33 | BATCH_SIZE = 256       #decrease the number if you run out your GPU memory
 34 | EPOCHES = 200
 35 | ITERATIONS = 50000//BATCH_SIZE +1
 36 | WEIGHT_DECAY = 1e-4           #according to the paper
 37 | FILE_PATH = "/cluster/home/it_stu25/dllab/model/best_model.h5"
 38 | 
 39 | def residual_block(x, filters, increase=False):
 40 |     stride = (1, 1)
 41 |     if increase:
 42 |         stride = (2, 2)
 43 | 
 44 |     o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
 45 |     conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same',
 46 |                     kernel_initializer="he_normal",
 47 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 48 |     o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1))
 49 |     conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same',
 50 |                     kernel_initializer="he_normal",
 51 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2)
 52 |     if increase:
 53 |         projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same',
 54 |                             kernel_initializer="he_normal",
 55 |                             kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 56 |         block = add([conv_2, projection])
 57 |     else:
 58 |         block = add([conv_2, x])
 59 |     return block
 60 | 
 61 | 
 62 | def residual_network(input_tensor, stack_num = 5):
 63 |     x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same',
 64 |                kernel_initializer="he_normal",
 65 |                kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor)
 66 | 
 67 |     for _ in range(stack_num):
 68 |         x = residual_block(x, 16, False)
 69 | 
 70 |     # input: 32x32x16 output: 16x16x32
 71 |     x = residual_block(x, 32, True)
 72 |     for _ in range(1, stack_num):
 73 |         x = residual_block(x, 32, False)
 74 | 
 75 |     # input: 16x16x32 output: 8x8x64
 76 |     x = residual_block(x, 64, True)
 77 |     for _ in range(1, stack_num):
 78 |         x = residual_block(x, 64, False)
 79 | 
 80 |     x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
 81 |     x = Activation('relu')(x)
 82 |     x = GlobalAveragePooling2D()(x)
 83 | 
 84 |     # input: 64 output: 10
 85 |     x = Dense(10, activation='softmax', kernel_initializer="he_normal",
 86 |               kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x)
 87 |     return x
 88 | 
 89 | def color_preprocessing(x_train, x_test):
 90 |     x_train = x_train.astype('float32')
 91 |     x_test = x_test.astype('float32')
 92 |     mean = [125.307, 122.95, 113.865]
 93 |     std = [62.9932, 62.0887, 66.7048]
 94 |     for i in range(3):
 95 |         x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
 96 |         x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i]
 97 |     return x_train, x_test
 98 | 
 99 | def scheduler(epoch):
100 |     if epoch < 81:
101 |         return 0.1
102 |     if epoch < 122:
103 |         return 0.01
104 |     return 0.001
105 | 
106 | 
107 | if __name__ == '__main__':
108 | 
109 |     (x_train, y_train),(x_test,y_test) = cifar10.load_data()
110 |     y_train = keras.utils.to_categorical(y_train, 10)
111 |     y_test = keras.utils.to_categorical(y_test, 10)
112 | 
113 |     x_train, x_test = color_preprocessing(x_train, x_test)
114 |     img_input = Input(shape=(HEIGHT,WIDTH,3))     #channel_last
115 |     output = residual_network(img_input, STACK_NUM)
116 |     resnet = Model(img_input, output)
117 |     #other optimizers may achieve better performance
118 |     sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)     #according the paper
119 |     resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])
120 |     checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max')
121 |     cbks = [LearningRateScheduler(schedule=scheduler),checkpoint]
122 |     datagen = ImageDataGenerator(horizontal_flip=True,
123 |                                  width_shift_range=0.125,
124 |                                  height_shift_range=0.125,
125 |                                  fill_mode='constant', cval=0.)
126 |     datagen.fit(x_train)
127 | 
128 |     resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE),
129 |                          steps_per_epoch=ITERATIONS,shuffle=1,
130 |                          epochs=EPOCHES,
131 |                          callbacks=cbks,
132 |                          validation_data=(x_test,y_test))
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/WarmUp_LR.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import numpy as np
  3 | from keras.layers import Dense
  4 | from keras.layers import Input
  5 | from keras.layers import Conv2D
  6 | from keras.layers import add
  7 | from keras.layers import Activation
  8 | from keras.layers import GlobalAveragePooling2D
  9 | from keras.models import Model
 10 | from keras.datasets import cifar10
 11 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint
 12 | from keras import optimizers
 13 | from keras.preprocessing.image import ImageDataGenerator
 14 | from keras import regularizers
 15 | from keras.layers import BatchNormalization
 16 | import tensorflow as tf
 17 | from keras.backend.tensorflow_backend import set_session
 18 | 
 19 | '''
 20 | Tensorflow backend default
 21 | If you want to use th backend:
 22 | from keras import backend as K
 23 | '''
 24 | 
 25 | config = tf.ConfigProto()         #using GPU
 26 | config.gpu_options.allow_growth = True
 27 | sess = tf.Session(config=config)
 28 | 
 29 | STACK_NUM = 5
 30 | BASE_LR = 0.1
 31 | NUM_CLASSES = 10
 32 | WIDTH = 32
 33 | HEIGHT = 32
 34 | BATCH_SIZE = 256       #decrease the number if you run out your GPU memory
 35 | EPOCHES = 200
 36 | ITERATIONS = 50000//BATCH_SIZE +1
 37 | WEIGHT_DECAY = 1e-4           #according to the paper
 38 | FILE_PATH = "/cluster/home/it_stu25/dllab/model/best_model.h5"
 39 | 
 40 | def residual_block(x, filters, increase=False):
 41 |     stride = (1, 1)
 42 |     if increase:
 43 |         stride = (2, 2)
 44 | 
 45 |     o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
 46 |     conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same',
 47 |                     kernel_initializer="he_normal",
 48 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 49 |     o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1))
 50 |     conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same',
 51 |                     kernel_initializer="he_normal",
 52 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2)
 53 |     if increase:
 54 |         projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same',
 55 |                             kernel_initializer="he_normal",
 56 |                             kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 57 |         block = add([conv_2, projection])
 58 |     else:
 59 |         block = add([conv_2, x])
 60 |     return block
 61 | 
 62 | 
 63 | def residual_network(input_tensor, stack_num = 5):
 64 |     x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same',
 65 |                kernel_initializer="he_normal",
 66 |                kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor)
 67 | 
 68 |     for _ in range(stack_num):
 69 |         x = residual_block(x, 16, False)
 70 | 
 71 |     # input: 32x32x16 output: 16x16x32
 72 |     x = residual_block(x, 32, True)
 73 |     for _ in range(1, stack_num):
 74 |         x = residual_block(x, 32, False)
 75 | 
 76 |     # input: 16x16x32 output: 8x8x64
 77 |     x = residual_block(x, 64, True)
 78 |     for _ in range(1, stack_num):
 79 |         x = residual_block(x, 64, False)
 80 | 
 81 |     x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
 82 |     x = Activation('relu')(x)
 83 |     x = GlobalAveragePooling2D()(x)
 84 | 
 85 |     # input: 64 output: 10
 86 |     x = Dense(10, activation='softmax', kernel_initializer="he_normal",
 87 |               kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x)
 88 |     return x
 89 | 
 90 | def color_preprocessing(x_train, x_test):
 91 |     x_train = x_train.astype('float32')
 92 |     x_test = x_test.astype('float32')
 93 |     mean = [125.307, 122.95, 113.865]
 94 |     std = [62.9932, 62.0887, 66.7048]
 95 |     for i in range(3):
 96 |         x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
 97 |         x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i]
 98 |     return x_train, x_test
 99 | 
100 | def scheduler(epoch):
101 |     if epoch<5:
102 |         return BASE_LR*(epoch+1)/5
103 |     if epoch < 81:
104 |         return 0.1
105 |     if epoch < 122:
106 |         return 0.01
107 |     return 0.001
108 | 
109 | 
110 | if __name__ == '__main__':
111 | 
112 |     (x_train, y_train),(x_test,y_test) = cifar10.load_data()
113 |     y_train = keras.utils.to_categorical(y_train, 10)
114 |     y_test = keras.utils.to_categorical(y_test, 10)
115 | 
116 |     x_train, x_test = color_preprocessing(x_train, x_test)
117 |     img_input = Input(shape=(HEIGHT,WIDTH,3))     #channel_last
118 |     output = residual_network(img_input, STACK_NUM)
119 |     resnet = Model(img_input, output)
120 |     #other optimizers may achieve better performance
121 |     sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)     #according the paper
122 |     resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])
123 |     checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max')
124 |     cbks = [LearningRateScheduler(schedule=scheduler),checkpoint]
125 |     datagen = ImageDataGenerator(horizontal_flip=True,
126 |                                  width_shift_range=0.125,
127 |                                  height_shift_range=0.125,
128 |                                  fill_mode='constant', cval=0.)
129 |     datagen.fit(x_train)
130 | 
131 |     resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE),
132 |                          steps_per_epoch=ITERATIONS,shuffle=1,
133 |                          epochs=EPOCHES,
134 |                          callbacks=cbks,
135 |                          validation_data=(x_test,y_test))
136 | 
137 |     
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/cosine_epoch.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import numpy as np
  3 | from keras.layers import Dense
  4 | import math
  5 | from keras.layers import Input
  6 | from keras.layers import Conv2D
  7 | from keras.layers import add
  8 | from keras.layers import Activation
  9 | from keras.layers import GlobalAveragePooling2D
 10 | from keras.models import Model
 11 | from keras.datasets import cifar10
 12 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint,TensorBoard
 13 | from keras import optimizers
 14 | from keras.preprocessing.image import ImageDataGenerator
 15 | from keras import regularizers
 16 | from keras.layers import BatchNormalization
 17 | import tensorflow as tf
 18 | from keras.backend.tensorflow_backend import set_session
 19 | 
 20 | '''
 21 | Tensorflow backend default
 22 | If you want to use th backend:
 23 | from keras import backend as K
 24 | '''
 25 | 
 26 | config = tf.ConfigProto()         #using GPU
 27 | config.gpu_options.allow_growth = True
 28 | sess = tf.Session(config=config)
 29 | 
 30 | STACK_NUM = 5
 31 | BASE_LR = 0.1
 32 | NUM_CLASSES = 10
 33 | WIDTH = 32
 34 | HEIGHT = 32
 35 | BATCH_SIZE = 256       #decrease the number if you run out your GPU memory
 36 | EPOCHES = 500
 37 | ITERATIONS = 50000//BATCH_SIZE +1
 38 | WEIGHT_DECAY = 1e-4           #according to the paper
 39 | FILE_PATH = "/cluster/home/it_stu25/dllab/model/best_model.h5"
 40 | TENSORBOARD_PATH = '/cluster/home/it_stu25/dllab/log/'
 41 | 
 42 | def residual_block(x, filters, increase=False):
 43 |     stride = (1, 1)
 44 |     if increase:
 45 |         stride = (2, 2)
 46 | 
 47 |     o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
 48 |     conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same',
 49 |                     kernel_initializer="he_normal",
 50 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 51 |     o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1))
 52 |     conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same',
 53 |                     kernel_initializer="he_normal",
 54 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2)
 55 |     if increase:
 56 |         projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same',
 57 |                             kernel_initializer="he_normal",
 58 |                             kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 59 |         block = add([conv_2, projection])
 60 |     else:
 61 |         block = add([conv_2, x])
 62 |     return block
 63 | 
 64 | 
 65 | def residual_network(input_tensor, stack_num = 5):
 66 |     x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same',
 67 |                kernel_initializer="he_normal",
 68 |                kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor)
 69 | 
 70 |     for _ in range(stack_num):
 71 |         x = residual_block(x, 16, False)
 72 | 
 73 |     # input: 32x32x16 output: 16x16x32
 74 |     x = residual_block(x, 32, True)
 75 |     for _ in range(1, stack_num):
 76 |         x = residual_block(x, 32, False)
 77 | 
 78 |     # input: 16x16x32 output: 8x8x64
 79 |     x = residual_block(x, 64, True)
 80 |     for _ in range(1, stack_num):
 81 |         x = residual_block(x, 64, False)
 82 | 
 83 |     x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
 84 |     x = Activation('relu')(x)
 85 |     x = GlobalAveragePooling2D()(x)
 86 | 
 87 |     # input: 64 output: 10
 88 |     x = Dense(10, activation='softmax', kernel_initializer="he_normal",
 89 |               kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x)
 90 |     return x
 91 | 
 92 | def color_preprocessing(x_train, x_test):
 93 |     x_train = x_train.astype('float32')
 94 |     x_test = x_test.astype('float32')
 95 |     mean = [125.307, 122.95, 113.865]
 96 |     std = [62.9932, 62.0887, 66.7048]
 97 |     for i in range(3):
 98 |         x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
 99 |         x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i]
100 |     return x_train, x_test
101 | 
102 | def scheduler(epoch):
103 |     if epoch<5:
104 |         return BASE_LR*(epoch+1)/5
105 |     return 0.5 * BASE_LR * (1 + math.cos(math.pi * (epoch-5)/(EPOCHES-5)))
106 | 
107 | 
108 | if __name__ == '__main__':
109 | 
110 |     (x_train, y_train),(x_test,y_test) = cifar10.load_data()
111 |     y_train = keras.utils.to_categorical(y_train, 10)
112 |     y_test = keras.utils.to_categorical(y_test, 10)
113 | 
114 |     x_train, x_test = color_preprocessing(x_train, x_test)
115 |     img_input = Input(shape=(HEIGHT,WIDTH,3))     #channel_last
116 |     output = residual_network(img_input, STACK_NUM)
117 |     resnet = Model(img_input, output)
118 |     #other optimizers may achieve better performance
119 |     sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)     #according the paper
120 |     resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])
121 |     checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max')
122 |     cbks = [LearningRateScheduler(schedule=scheduler),checkpoint,TensorBoard(log_dir=TENSORBOARD_PATH)]
123 |     datagen = ImageDataGenerator(horizontal_flip=True,
124 |                                  width_shift_range=0.125,
125 |                                  height_shift_range=0.125,
126 |                                  fill_mode='constant', cval=0.)
127 |     datagen.fit(x_train)
128 | 
129 |     resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE),
130 |                          steps_per_epoch=ITERATIONS,shuffle=1,
131 |                          epochs=EPOCHES,
132 |                          callbacks=cbks,
133 |                          validation_data=(x_test,y_test))
134 | 
135 |     
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/mixup.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import numpy as np
  3 | from keras.layers import Dense
  4 | import math
  5 | from keras.layers import Input
  6 | from keras.layers import Conv2D
  7 | from keras.layers import add
  8 | from keras.layers import Activation
  9 | from keras.layers import GlobalAveragePooling2D
 10 | from keras.models import Model
 11 | from keras.datasets import cifar10
 12 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint,TensorBoard
 13 | from keras import optimizers
 14 | from keras.preprocessing.image import ImageDataGenerator
 15 | from keras import regularizers
 16 | from keras.layers import BatchNormalization
 17 | import tensorflow as tf
 18 | from keras.backend.tensorflow_backend import set_session
 19 | from mixup_generator import MixupGenerator
 20 | 
 21 | '''
 22 | Tensorflow backend default
 23 | If you want to use th backend:
 24 | from keras import backend as K
 25 | '''
 26 | 
 27 | config = tf.ConfigProto()         #using GPU
 28 | config.gpu_options.allow_growth = True
 29 | sess = tf.Session(config=config)
 30 | 
 31 | STACK_NUM = 5
 32 | BASE_LR = 0.1
 33 | NUM_CLASSES = 10
 34 | WIDTH = 32
 35 | HEIGHT = 32
 36 | BATCH_SIZE = 128       #decrease the number if you run out your GPU memory
 37 | EPOCHES = 500
 38 | ITERATIONS = 50000//BATCH_SIZE +1
 39 | WEIGHT_DECAY = 1e-4           #according to the paper
 40 | FILE_PATH = "/cluster/home/it_stu27/dllab/model/best_model.h5"
 41 | TENSORBOARD_PATH = '/cluster/home/it_stu27/dllab/log/'
 42 | 
 43 | def residual_block(x, filters, increase=False):
 44 |     stride = (1, 1)
 45 |     if increase:
 46 |         stride = (2, 2)
 47 | 
 48 |     o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
 49 |     conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same',
 50 |                     kernel_initializer="he_normal",
 51 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 52 |     o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1))
 53 |     conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same',
 54 |                     kernel_initializer="he_normal",
 55 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2)
 56 |     if increase:
 57 |         projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same',
 58 |                             kernel_initializer="he_normal",
 59 |                             kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 60 |         block = add([conv_2, projection])
 61 |     else:
 62 |         block = add([conv_2, x])
 63 |     return block
 64 | 
 65 | 
 66 | def residual_network(input_tensor, stack_num = 5):
 67 |     x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same',
 68 |                kernel_initializer="he_normal",
 69 |                kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor)
 70 | 
 71 |     for _ in range(stack_num):
 72 |         x = residual_block(x, 16, False)
 73 | 
 74 |     # input: 32x32x16 output: 16x16x32
 75 |     x = residual_block(x, 32, True)
 76 |     for _ in range(1, stack_num):
 77 |         x = residual_block(x, 32, False)
 78 | 
 79 |     # input: 16x16x32 output: 8x8x64
 80 |     x = residual_block(x, 64, True)
 81 |     for _ in range(1, stack_num):
 82 |         x = residual_block(x, 64, False)
 83 | 
 84 |     x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
 85 |     x = Activation('relu')(x)
 86 |     x = GlobalAveragePooling2D()(x)
 87 | 
 88 |     # input: 64 output: 10
 89 |     x = Dense(10, activation='softmax', kernel_initializer="he_normal",
 90 |               kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x)
 91 |     return x
 92 | 
 93 | def color_preprocessing(x_train, x_test):
 94 |     x_train = x_train.astype('float32')
 95 |     x_test = x_test.astype('float32')
 96 |     mean = [125.307, 122.95, 113.865]
 97 |     std = [62.9932, 62.0887, 66.7048]
 98 |     for i in range(3):
 99 |         x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
100 |         x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i]
101 |     return x_train, x_test
102 | 
103 | def scheduler(epoch):
104 |     if epoch<5:
105 |         return BASE_LR*(epoch+1)/5
106 |     return 0.5 * BASE_LR * (1 + math.cos(math.pi * (epoch-5)/(EPOCHES-5)))
107 | 
108 | 
109 | if __name__ == '__main__':
110 | 
111 |     (x_train, y_train),(x_test,y_test) = cifar10.load_data()
112 |     y_train = keras.utils.to_categorical(y_train, 10)
113 |     y_test = keras.utils.to_categorical(y_test, 10)
114 | 
115 |     x_train, x_test = color_preprocessing(x_train, x_test)
116 |     img_input = Input(shape=(HEIGHT,WIDTH,3))     #channel_last
117 |     output = residual_network(img_input, STACK_NUM)
118 |     resnet = Model(img_input, output)
119 |     #other optimizers may achieve better performance
120 |     sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)     #according the paper
121 |     resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])
122 |     checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max')
123 |     cbks = [LearningRateScheduler(schedule=scheduler),checkpoint,TensorBoard(log_dir=TENSORBOARD_PATH)]
124 |     datagen = ImageDataGenerator(horizontal_flip=True,
125 |                                  width_shift_range=0.125,
126 |                                  height_shift_range=0.125,
127 |                                  fill_mode='constant', cval=0.)
128 |     
129 |     #datagen.fit(x_train)
130 |     training_generator = MixupGenerator(x_train, y_train, batch_size=BATCH_SIZE, alpha=0.2, datagen=datagen)()
131 |     resnet.fit_generator(generator=training_generator,
132 |                          steps_per_epoch=ITERATIONS,shuffle=1,
133 |                          epochs=EPOCHES,
134 |                          callbacks=cbks,
135 |                          validation_data=(x_test,y_test))
136 | 
137 |     
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/cosine_batch.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import numpy as np
  3 | import math
  4 | from keras.layers import Dense
  5 | from keras.layers import Input
  6 | from keras.layers import Conv2D
  7 | from keras.layers import add
  8 | from keras.layers import Activation
  9 | from keras.layers import GlobalAveragePooling2D
 10 | from keras.models import Model
 11 | from keras.datasets import cifar10
 12 | from keras.callbacks import Callback
 13 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint
 14 | from keras import optimizers
 15 | from keras.preprocessing.image import ImageDataGenerator
 16 | from keras import regularizers
 17 | from keras.layers import BatchNormalization
 18 | import tensorflow as tf
 19 | from keras import backend as K
 20 | from keras.backend.tensorflow_backend import set_session
 21 | 
 22 | '''
 23 | Tensorflow backend default
 24 | '''
 25 | 
 26 | config = tf.ConfigProto()         #using GPU
 27 | config.gpu_options.allow_growth = True
 28 | sess = tf.Session(config=config)
 29 | 
 30 | STACK_NUM = 5
 31 | NUM_CLASSES = 10
 32 | WIDTH = 32
 33 | HEIGHT = 32
 34 | BATCH_SIZE = 256       #decrease the number if you run out your GPU memory
 35 | EPOCHES = 500
 36 | ITERATIONS = 50000//BATCH_SIZE +1
 37 | WEIGHT_DECAY = 1e-4           #according to the paper
 38 | FILE_PATH = '/cluster/home/it_stu25/dllab/model/best_model.h5'
 39 | BASE_LR = 0.1
 40 | 
 41 | class cosine(Callback):
 42 |     def __init__(self,
 43 |         base_lr,
 44 |         steps_per_epoch,
 45 |         warm_up_epoches
 46 |         ):
 47 |         super(cosine, self).__init__()
 48 |         self.base_lr = base_lr
 49 |         self.steps_per_epoch = steps_per_epoch
 50 |         self.warm_up_epoches = warm_up_epoches
 51 |         self.WarmUp = 0
 52 |         self.step_n = 0
 53 |     
 54 |     def on_epoch_begin(self, epoch, logs=None):
 55 |         if(epoch<self.warm_up_epoches):
 56 |             self.WarmUp=1
 57 |             self.step_n = epoch*(self.steps_per_epoch)
 58 |         else:
 59 |             self.WarmUp = 0
 60 |             self.step_n = (epoch-self.warm_up_epoches)*(self.steps_per_epoch)
 61 |         
 62 | 
 63 |     def on_batch_begin(self, batch, logs=None):
 64 |         if (self.WarmUp):
 65 |             lr = (batch+self.step_n)*(self.base_lr)/(self.steps_per_epoch*self.warm_up_epoches)
 66 |             K.set_value(self.model.optimizer.lr, lr)
 67 |         else:
 68 |             lr = 0.5 * self.base_lr * ( 1 + math.cos( math.pi * (batch + self.step_n)/(self.steps_per_epoch*(EPOCHES - self.warm_up_epoches))))
 69 |             K.set_value(self.model.optimizer.lr, lr)
 70 | 
 71 | def residual_block(x, filters, increase=False):
 72 |     stride = (1, 1)
 73 |     if increase:
 74 |         stride = (2, 2)
 75 | 
 76 |     o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
 77 |     conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same',
 78 |                     kernel_initializer="he_normal",
 79 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 80 |     o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1))
 81 |     conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same',
 82 |                     kernel_initializer="he_normal",
 83 |                     kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2)
 84 |     if increase:
 85 |         projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same',
 86 |                             kernel_initializer="he_normal",
 87 |                             kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1)
 88 |         block = add([conv_2, projection])
 89 |     else:
 90 |         block = add([conv_2, x])
 91 |     return block
 92 | 
 93 | 
 94 | def residual_network(input_tensor, stack_num = 5):
 95 |     x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same',
 96 |                kernel_initializer="he_normal",
 97 |                kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor)
 98 | 
 99 |     for _ in range(stack_num):
100 |         x = residual_block(x, 16, False)
101 | 
102 |     # input: 32x32x16 output: 16x16x32
103 |     x = residual_block(x, 32, True)
104 |     for _ in range(1, stack_num):
105 |         x = residual_block(x, 32, False)
106 | 
107 |     # input: 16x16x32 output: 8x8x64
108 |     x = residual_block(x, 64, True)
109 |     for _ in range(1, stack_num):
110 |         x = residual_block(x, 64, False)
111 | 
112 |     x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
113 |     x = Activation('relu')(x)
114 |     x = GlobalAveragePooling2D()(x)
115 | 
116 |     # input: 64 output: 10
117 |     x = Dense(10, activation='softmax', kernel_initializer="he_normal",
118 |               kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x)
119 |     return x
120 | 
121 | def color_preprocessing(x_train, x_test):
122 |     x_train = x_train.astype('float32')
123 |     x_test = x_test.astype('float32')
124 |     mean = [125.307, 122.95, 113.865]
125 |     std = [62.9932, 62.0887, 66.7048]
126 |     for i in range(3):
127 |         x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i]
128 |         x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i]
129 |     return x_train, x_test
130 | 
131 | 
132 | if __name__ == '__main__':
133 | 
134 |     (x_train, y_train),(x_test,y_test) = cifar10.load_data()
135 |     y_train = keras.utils.to_categorical(y_train, 10)
136 |     y_test = keras.utils.to_categorical(y_test, 10)
137 | 
138 |     x_train, x_test = color_preprocessing(x_train, x_test)
139 |     img_input = Input(shape=(HEIGHT,WIDTH,3))     #channel_last
140 |     output = residual_network(img_input, STACK_NUM)
141 |     resnet = Model(img_input, output)
142 |     #other optimizers may achieve better performance
143 |     sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True)     #according the paper
144 |     resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])
145 |     checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max')
146 |     cbks = [cosine(base_lr = BASE_LR, steps_per_epoch = ITERATIONS,warm_up_epoches = 5),checkpoint]
147 |     datagen = ImageDataGenerator(horizontal_flip=True,
148 |                                  width_shift_range=0.125,
149 |                                  height_shift_range=0.125,
150 |                                  fill_mode='constant', cval=0.)
151 |     datagen.fit(x_train)
152 | 
153 |     resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE),
154 |                          steps_per_epoch=ITERATIONS,
155 |                          epochs=EPOCHES,
156 |                          callbacks=cbks,
157 |                          validation_data=(x_test,y_test))
158 | 
159 | 
160 |     resnet.save("resnet_baseline.h5")
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------