├── paper.pdf ├── README.md ├── mixup_generator.py ├── baseline_resnet.py ├── WarmUp_LR.py ├── cosine_epoch.py ├── mixup.py └── cosine_batch.py /paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sherdencooper/tricks-in-deeplearning/HEAD/paper.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tricks-in-deeplearning 2 | 3 | This repo is part work of https://github.com/SJTU-DL-lab/Bag_of_Tricks_CNN and the pytorch implementation achieves about 96.6% acc. 4 | 5 | Using different tricks to improve performance of resetnet by Keras 6 | 7 | Paper:https://arxiv.org/abs/1812.01187 8 | 9 | Resnet model and other CNN models implemented by keras can be found from: https://github.com/BIGBALLON/cifar-10-cnn 10 | 11 | Train baseline Resnet32 model (done) accuracy: 91.64% 12 | 13 | Adding warmup LR (done) accracy:92.32%(+0.68%) 14 | 15 | Adding cosine decay(done) accuracy:93.01%(+0.69%) 16 | 17 | Adding cosine decay based on batch (done). But it does not improve for accuracy:92.93% 18 | 19 | Adding mixup(done) accuracy:94.10%(+1.09%) 20 | 21 | I tried label smoothing but it does not improve. According to https://www.researchgate.net/publication/327004087_Empirical_study_on_label_smoothing_in_neural_networks, label smoothing is not suitable for cifar 10. 22 | 23 | Using smaller batch size :accuracy 94.38%(+0.28%) 24 | 25 | Using resnet110: accuracy 95.21%(+0.83%) 26 | -------------------------------------------------------------------------------- /mixup_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class MixupGenerator(): 5 | def __init__(self, X_train, y_train, batch_size=32, alpha=0.2, shuffle=True, datagen=None): 6 | self.X_train = X_train 7 | self.y_train = y_train 8 | self.batch_size = batch_size 9 | self.alpha = alpha 10 | self.shuffle = shuffle 11 | self.sample_num = len(X_train) 12 | self.datagen = datagen 13 | 14 | def __call__(self): 15 | while True: 16 | indexes = self.__get_exploration_order() 17 | itr_num = int(len(indexes) // (self.batch_size * 2)) 18 | 19 | for i in range(itr_num): 20 | batch_ids = indexes[i * self.batch_size * 2:(i + 1) * self.batch_size * 2] 21 | X, y = self.__data_generation(batch_ids) 22 | 23 | yield X, y 24 | 25 | def __get_exploration_order(self): 26 | indexes = np.arange(self.sample_num) 27 | 28 | if self.shuffle: 29 | np.random.shuffle(indexes) 30 | 31 | return indexes 32 | 33 | def __data_generation(self, batch_ids): 34 | _, h, w, c = self.X_train.shape 35 | l = np.random.beta(self.alpha, self.alpha, self.batch_size) 36 | X_l = l.reshape(self.batch_size, 1, 1, 1) 37 | y_l = l.reshape(self.batch_size, 1) 38 | 39 | X1 = self.X_train[batch_ids[:self.batch_size]] 40 | X2 = self.X_train[batch_ids[self.batch_size:]] 41 | X = X1 * X_l + X2 * (1 - X_l) 42 | 43 | if self.datagen: 44 | for i in range(self.batch_size): 45 | X[i] = self.datagen.random_transform(X[i]) 46 | X[i] = self.datagen.standardize(X[i]) 47 | 48 | if isinstance(self.y_train, list): 49 | y = [] 50 | 51 | for y_train_ in self.y_train: 52 | y1 = y_train_[batch_ids[:self.batch_size]] 53 | y2 = y_train_[batch_ids[self.batch_size:]] 54 | y.append(y1 * y_l + y2 * (1 - y_l)) 55 | else: 56 | y1 = self.y_train[batch_ids[:self.batch_size]] 57 | y2 = self.y_train[batch_ids[self.batch_size:]] 58 | y = y1 * y_l + y2 * (1 - y_l) 59 | 60 | return X, y 61 | -------------------------------------------------------------------------------- /baseline_resnet.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | from keras.layers import Dense 4 | from keras.layers import Input 5 | from keras.layers import Conv2D 6 | from keras.layers import add 7 | from keras.layers import Activation 8 | from keras.layers import GlobalAveragePooling2D 9 | from keras.models import Model 10 | from keras.datasets import cifar10 11 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint 12 | from keras import optimizers 13 | from keras.preprocessing.image import ImageDataGenerator 14 | from keras import regularizers 15 | from keras.layers import BatchNormalization 16 | import tensorflow as tf 17 | from keras.backend.tensorflow_backend import set_session 18 | 19 | ''' 20 | Tensorflow backend default 21 | If you want to use th backend: 22 | from keras import backend as K 23 | ''' 24 | 25 | config = tf.ConfigProto() #using GPU 26 | config.gpu_options.allow_growth = True 27 | sess = tf.Session(config=config) 28 | 29 | STACK_NUM = 5 30 | NUM_CLASSES = 10 31 | WIDTH = 32 32 | HEIGHT = 32 33 | BATCH_SIZE = 256 #decrease the number if you run out your GPU memory 34 | EPOCHES = 200 35 | ITERATIONS = 50000//BATCH_SIZE +1 36 | WEIGHT_DECAY = 1e-4 #according to the paper 37 | FILE_PATH = "/cluster/home/it_stu25/dllab/model/best_model.h5" 38 | 39 | def residual_block(x, filters, increase=False): 40 | stride = (1, 1) 41 | if increase: 42 | stride = (2, 2) 43 | 44 | o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x)) 45 | conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same', 46 | kernel_initializer="he_normal", 47 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 48 | o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1)) 49 | conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same', 50 | kernel_initializer="he_normal", 51 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2) 52 | if increase: 53 | projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same', 54 | kernel_initializer="he_normal", 55 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 56 | block = add([conv_2, projection]) 57 | else: 58 | block = add([conv_2, x]) 59 | return block 60 | 61 | 62 | def residual_network(input_tensor, stack_num = 5): 63 | x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', 64 | kernel_initializer="he_normal", 65 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor) 66 | 67 | for _ in range(stack_num): 68 | x = residual_block(x, 16, False) 69 | 70 | # input: 32x32x16 output: 16x16x32 71 | x = residual_block(x, 32, True) 72 | for _ in range(1, stack_num): 73 | x = residual_block(x, 32, False) 74 | 75 | # input: 16x16x32 output: 8x8x64 76 | x = residual_block(x, 64, True) 77 | for _ in range(1, stack_num): 78 | x = residual_block(x, 64, False) 79 | 80 | x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x) 81 | x = Activation('relu')(x) 82 | x = GlobalAveragePooling2D()(x) 83 | 84 | # input: 64 output: 10 85 | x = Dense(10, activation='softmax', kernel_initializer="he_normal", 86 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x) 87 | return x 88 | 89 | def color_preprocessing(x_train, x_test): 90 | x_train = x_train.astype('float32') 91 | x_test = x_test.astype('float32') 92 | mean = [125.307, 122.95, 113.865] 93 | std = [62.9932, 62.0887, 66.7048] 94 | for i in range(3): 95 | x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i] 96 | x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i] 97 | return x_train, x_test 98 | 99 | def scheduler(epoch): 100 | if epoch < 81: 101 | return 0.1 102 | if epoch < 122: 103 | return 0.01 104 | return 0.001 105 | 106 | 107 | if __name__ == '__main__': 108 | 109 | (x_train, y_train),(x_test,y_test) = cifar10.load_data() 110 | y_train = keras.utils.to_categorical(y_train, 10) 111 | y_test = keras.utils.to_categorical(y_test, 10) 112 | 113 | x_train, x_test = color_preprocessing(x_train, x_test) 114 | img_input = Input(shape=(HEIGHT,WIDTH,3)) #channel_last 115 | output = residual_network(img_input, STACK_NUM) 116 | resnet = Model(img_input, output) 117 | #other optimizers may achieve better performance 118 | sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True) #according the paper 119 | resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy']) 120 | checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max') 121 | cbks = [LearningRateScheduler(schedule=scheduler),checkpoint] 122 | datagen = ImageDataGenerator(horizontal_flip=True, 123 | width_shift_range=0.125, 124 | height_shift_range=0.125, 125 | fill_mode='constant', cval=0.) 126 | datagen.fit(x_train) 127 | 128 | resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE), 129 | steps_per_epoch=ITERATIONS,shuffle=1, 130 | epochs=EPOCHES, 131 | callbacks=cbks, 132 | validation_data=(x_test,y_test)) 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /WarmUp_LR.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | from keras.layers import Dense 4 | from keras.layers import Input 5 | from keras.layers import Conv2D 6 | from keras.layers import add 7 | from keras.layers import Activation 8 | from keras.layers import GlobalAveragePooling2D 9 | from keras.models import Model 10 | from keras.datasets import cifar10 11 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint 12 | from keras import optimizers 13 | from keras.preprocessing.image import ImageDataGenerator 14 | from keras import regularizers 15 | from keras.layers import BatchNormalization 16 | import tensorflow as tf 17 | from keras.backend.tensorflow_backend import set_session 18 | 19 | ''' 20 | Tensorflow backend default 21 | If you want to use th backend: 22 | from keras import backend as K 23 | ''' 24 | 25 | config = tf.ConfigProto() #using GPU 26 | config.gpu_options.allow_growth = True 27 | sess = tf.Session(config=config) 28 | 29 | STACK_NUM = 5 30 | BASE_LR = 0.1 31 | NUM_CLASSES = 10 32 | WIDTH = 32 33 | HEIGHT = 32 34 | BATCH_SIZE = 256 #decrease the number if you run out your GPU memory 35 | EPOCHES = 200 36 | ITERATIONS = 50000//BATCH_SIZE +1 37 | WEIGHT_DECAY = 1e-4 #according to the paper 38 | FILE_PATH = "/cluster/home/it_stu25/dllab/model/best_model.h5" 39 | 40 | def residual_block(x, filters, increase=False): 41 | stride = (1, 1) 42 | if increase: 43 | stride = (2, 2) 44 | 45 | o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x)) 46 | conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same', 47 | kernel_initializer="he_normal", 48 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 49 | o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1)) 50 | conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same', 51 | kernel_initializer="he_normal", 52 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2) 53 | if increase: 54 | projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same', 55 | kernel_initializer="he_normal", 56 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 57 | block = add([conv_2, projection]) 58 | else: 59 | block = add([conv_2, x]) 60 | return block 61 | 62 | 63 | def residual_network(input_tensor, stack_num = 5): 64 | x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', 65 | kernel_initializer="he_normal", 66 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor) 67 | 68 | for _ in range(stack_num): 69 | x = residual_block(x, 16, False) 70 | 71 | # input: 32x32x16 output: 16x16x32 72 | x = residual_block(x, 32, True) 73 | for _ in range(1, stack_num): 74 | x = residual_block(x, 32, False) 75 | 76 | # input: 16x16x32 output: 8x8x64 77 | x = residual_block(x, 64, True) 78 | for _ in range(1, stack_num): 79 | x = residual_block(x, 64, False) 80 | 81 | x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x) 82 | x = Activation('relu')(x) 83 | x = GlobalAveragePooling2D()(x) 84 | 85 | # input: 64 output: 10 86 | x = Dense(10, activation='softmax', kernel_initializer="he_normal", 87 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x) 88 | return x 89 | 90 | def color_preprocessing(x_train, x_test): 91 | x_train = x_train.astype('float32') 92 | x_test = x_test.astype('float32') 93 | mean = [125.307, 122.95, 113.865] 94 | std = [62.9932, 62.0887, 66.7048] 95 | for i in range(3): 96 | x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i] 97 | x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i] 98 | return x_train, x_test 99 | 100 | def scheduler(epoch): 101 | if epoch<5: 102 | return BASE_LR*(epoch+1)/5 103 | if epoch < 81: 104 | return 0.1 105 | if epoch < 122: 106 | return 0.01 107 | return 0.001 108 | 109 | 110 | if __name__ == '__main__': 111 | 112 | (x_train, y_train),(x_test,y_test) = cifar10.load_data() 113 | y_train = keras.utils.to_categorical(y_train, 10) 114 | y_test = keras.utils.to_categorical(y_test, 10) 115 | 116 | x_train, x_test = color_preprocessing(x_train, x_test) 117 | img_input = Input(shape=(HEIGHT,WIDTH,3)) #channel_last 118 | output = residual_network(img_input, STACK_NUM) 119 | resnet = Model(img_input, output) 120 | #other optimizers may achieve better performance 121 | sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True) #according the paper 122 | resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy']) 123 | checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max') 124 | cbks = [LearningRateScheduler(schedule=scheduler),checkpoint] 125 | datagen = ImageDataGenerator(horizontal_flip=True, 126 | width_shift_range=0.125, 127 | height_shift_range=0.125, 128 | fill_mode='constant', cval=0.) 129 | datagen.fit(x_train) 130 | 131 | resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE), 132 | steps_per_epoch=ITERATIONS,shuffle=1, 133 | epochs=EPOCHES, 134 | callbacks=cbks, 135 | validation_data=(x_test,y_test)) 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /cosine_epoch.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | from keras.layers import Dense 4 | import math 5 | from keras.layers import Input 6 | from keras.layers import Conv2D 7 | from keras.layers import add 8 | from keras.layers import Activation 9 | from keras.layers import GlobalAveragePooling2D 10 | from keras.models import Model 11 | from keras.datasets import cifar10 12 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint,TensorBoard 13 | from keras import optimizers 14 | from keras.preprocessing.image import ImageDataGenerator 15 | from keras import regularizers 16 | from keras.layers import BatchNormalization 17 | import tensorflow as tf 18 | from keras.backend.tensorflow_backend import set_session 19 | 20 | ''' 21 | Tensorflow backend default 22 | If you want to use th backend: 23 | from keras import backend as K 24 | ''' 25 | 26 | config = tf.ConfigProto() #using GPU 27 | config.gpu_options.allow_growth = True 28 | sess = tf.Session(config=config) 29 | 30 | STACK_NUM = 5 31 | BASE_LR = 0.1 32 | NUM_CLASSES = 10 33 | WIDTH = 32 34 | HEIGHT = 32 35 | BATCH_SIZE = 256 #decrease the number if you run out your GPU memory 36 | EPOCHES = 500 37 | ITERATIONS = 50000//BATCH_SIZE +1 38 | WEIGHT_DECAY = 1e-4 #according to the paper 39 | FILE_PATH = "/cluster/home/it_stu25/dllab/model/best_model.h5" 40 | TENSORBOARD_PATH = '/cluster/home/it_stu25/dllab/log/' 41 | 42 | def residual_block(x, filters, increase=False): 43 | stride = (1, 1) 44 | if increase: 45 | stride = (2, 2) 46 | 47 | o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x)) 48 | conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same', 49 | kernel_initializer="he_normal", 50 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 51 | o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1)) 52 | conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same', 53 | kernel_initializer="he_normal", 54 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2) 55 | if increase: 56 | projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same', 57 | kernel_initializer="he_normal", 58 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 59 | block = add([conv_2, projection]) 60 | else: 61 | block = add([conv_2, x]) 62 | return block 63 | 64 | 65 | def residual_network(input_tensor, stack_num = 5): 66 | x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', 67 | kernel_initializer="he_normal", 68 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor) 69 | 70 | for _ in range(stack_num): 71 | x = residual_block(x, 16, False) 72 | 73 | # input: 32x32x16 output: 16x16x32 74 | x = residual_block(x, 32, True) 75 | for _ in range(1, stack_num): 76 | x = residual_block(x, 32, False) 77 | 78 | # input: 16x16x32 output: 8x8x64 79 | x = residual_block(x, 64, True) 80 | for _ in range(1, stack_num): 81 | x = residual_block(x, 64, False) 82 | 83 | x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x) 84 | x = Activation('relu')(x) 85 | x = GlobalAveragePooling2D()(x) 86 | 87 | # input: 64 output: 10 88 | x = Dense(10, activation='softmax', kernel_initializer="he_normal", 89 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x) 90 | return x 91 | 92 | def color_preprocessing(x_train, x_test): 93 | x_train = x_train.astype('float32') 94 | x_test = x_test.astype('float32') 95 | mean = [125.307, 122.95, 113.865] 96 | std = [62.9932, 62.0887, 66.7048] 97 | for i in range(3): 98 | x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i] 99 | x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i] 100 | return x_train, x_test 101 | 102 | def scheduler(epoch): 103 | if epoch<5: 104 | return BASE_LR*(epoch+1)/5 105 | return 0.5 * BASE_LR * (1 + math.cos(math.pi * (epoch-5)/(EPOCHES-5))) 106 | 107 | 108 | if __name__ == '__main__': 109 | 110 | (x_train, y_train),(x_test,y_test) = cifar10.load_data() 111 | y_train = keras.utils.to_categorical(y_train, 10) 112 | y_test = keras.utils.to_categorical(y_test, 10) 113 | 114 | x_train, x_test = color_preprocessing(x_train, x_test) 115 | img_input = Input(shape=(HEIGHT,WIDTH,3)) #channel_last 116 | output = residual_network(img_input, STACK_NUM) 117 | resnet = Model(img_input, output) 118 | #other optimizers may achieve better performance 119 | sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True) #according the paper 120 | resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy']) 121 | checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max') 122 | cbks = [LearningRateScheduler(schedule=scheduler),checkpoint,TensorBoard(log_dir=TENSORBOARD_PATH)] 123 | datagen = ImageDataGenerator(horizontal_flip=True, 124 | width_shift_range=0.125, 125 | height_shift_range=0.125, 126 | fill_mode='constant', cval=0.) 127 | datagen.fit(x_train) 128 | 129 | resnet.fit_generator(datagen.flow(x_train,y_train,batch_size=BATCH_SIZE), 130 | steps_per_epoch=ITERATIONS,shuffle=1, 131 | epochs=EPOCHES, 132 | callbacks=cbks, 133 | validation_data=(x_test,y_test)) 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /mixup.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | from keras.layers import Dense 4 | import math 5 | from keras.layers import Input 6 | from keras.layers import Conv2D 7 | from keras.layers import add 8 | from keras.layers import Activation 9 | from keras.layers import GlobalAveragePooling2D 10 | from keras.models import Model 11 | from keras.datasets import cifar10 12 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint,TensorBoard 13 | from keras import optimizers 14 | from keras.preprocessing.image import ImageDataGenerator 15 | from keras import regularizers 16 | from keras.layers import BatchNormalization 17 | import tensorflow as tf 18 | from keras.backend.tensorflow_backend import set_session 19 | from mixup_generator import MixupGenerator 20 | 21 | ''' 22 | Tensorflow backend default 23 | If you want to use th backend: 24 | from keras import backend as K 25 | ''' 26 | 27 | config = tf.ConfigProto() #using GPU 28 | config.gpu_options.allow_growth = True 29 | sess = tf.Session(config=config) 30 | 31 | STACK_NUM = 5 32 | BASE_LR = 0.1 33 | NUM_CLASSES = 10 34 | WIDTH = 32 35 | HEIGHT = 32 36 | BATCH_SIZE = 128 #decrease the number if you run out your GPU memory 37 | EPOCHES = 500 38 | ITERATIONS = 50000//BATCH_SIZE +1 39 | WEIGHT_DECAY = 1e-4 #according to the paper 40 | FILE_PATH = "/cluster/home/it_stu27/dllab/model/best_model.h5" 41 | TENSORBOARD_PATH = '/cluster/home/it_stu27/dllab/log/' 42 | 43 | def residual_block(x, filters, increase=False): 44 | stride = (1, 1) 45 | if increase: 46 | stride = (2, 2) 47 | 48 | o1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x)) 49 | conv_1 = Conv2D(filters, kernel_size=(3, 3), strides=stride, padding='same', 50 | kernel_initializer="he_normal", 51 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 52 | o2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(conv_1)) 53 | conv_2 = Conv2D(filters, kernel_size=(3, 3), strides=(1, 1), padding='same', 54 | kernel_initializer="he_normal", 55 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o2) 56 | if increase: 57 | projection = Conv2D(filters, kernel_size=(1, 1), strides=(2, 2), padding='same', 58 | kernel_initializer="he_normal", 59 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(o1) 60 | block = add([conv_2, projection]) 61 | else: 62 | block = add([conv_2, x]) 63 | return block 64 | 65 | 66 | def residual_network(input_tensor, stack_num = 5): 67 | x = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same', 68 | kernel_initializer="he_normal", 69 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(input_tensor) 70 | 71 | for _ in range(stack_num): 72 | x = residual_block(x, 16, False) 73 | 74 | # input: 32x32x16 output: 16x16x32 75 | x = residual_block(x, 32, True) 76 | for _ in range(1, stack_num): 77 | x = residual_block(x, 32, False) 78 | 79 | # input: 16x16x32 output: 8x8x64 80 | x = residual_block(x, 64, True) 81 | for _ in range(1, stack_num): 82 | x = residual_block(x, 64, False) 83 | 84 | x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x) 85 | x = Activation('relu')(x) 86 | x = GlobalAveragePooling2D()(x) 87 | 88 | # input: 64 output: 10 89 | x = Dense(10, activation='softmax', kernel_initializer="he_normal", 90 | kernel_regularizer=regularizers.l2(WEIGHT_DECAY))(x) 91 | return x 92 | 93 | def color_preprocessing(x_train, x_test): 94 | x_train = x_train.astype('float32') 95 | x_test = x_test.astype('float32') 96 | mean = [125.307, 122.95, 113.865] 97 | std = [62.9932, 62.0887, 66.7048] 98 | for i in range(3): 99 | x_train[:, :, :, i] = (x_train[:, :, :, i] - mean[i]) / std[i] 100 | x_test[:, :, :, i] = (x_test[:, :, :, i] - mean[i]) / std[i] 101 | return x_train, x_test 102 | 103 | def scheduler(epoch): 104 | if epoch<5: 105 | return BASE_LR*(epoch+1)/5 106 | return 0.5 * BASE_LR * (1 + math.cos(math.pi * (epoch-5)/(EPOCHES-5))) 107 | 108 | 109 | if __name__ == '__main__': 110 | 111 | (x_train, y_train),(x_test,y_test) = cifar10.load_data() 112 | y_train = keras.utils.to_categorical(y_train, 10) 113 | y_test = keras.utils.to_categorical(y_test, 10) 114 | 115 | x_train, x_test = color_preprocessing(x_train, x_test) 116 | img_input = Input(shape=(HEIGHT,WIDTH,3)) #channel_last 117 | output = residual_network(img_input, STACK_NUM) 118 | resnet = Model(img_input, output) 119 | #other optimizers may achieve better performance 120 | sgd = optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True) #according the paper 121 | resnet.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy']) 122 | checkpoint = ModelCheckpoint(FILE_PATH, monitor='val_acc', verbose=1, save_best_only=True,mode='max') 123 | cbks = [LearningRateScheduler(schedule=scheduler),checkpoint,TensorBoard(log_dir=TENSORBOARD_PATH)] 124 | datagen = ImageDataGenerator(horizontal_flip=True, 125 | width_shift_range=0.125, 126 | height_shift_range=0.125, 127 | fill_mode='constant', cval=0.) 128 | 129 | #datagen.fit(x_train) 130 | training_generator = MixupGenerator(x_train, y_train, batch_size=BATCH_SIZE, alpha=0.2, datagen=datagen)() 131 | resnet.fit_generator(generator=training_generator, 132 | steps_per_epoch=ITERATIONS,shuffle=1, 133 | epochs=EPOCHES, 134 | callbacks=cbks, 135 | validation_data=(x_test,y_test)) 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /cosine_batch.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | import math 4 | from keras.layers import Dense 5 | from keras.layers import Input 6 | from keras.layers import Conv2D 7 | from keras.layers import add 8 | from keras.layers import Activation 9 | from keras.layers import GlobalAveragePooling2D 10 | from keras.models import Model 11 | from keras.datasets import cifar10 12 | from keras.callbacks import Callback 13 | from keras.callbacks import LearningRateScheduler, ModelCheckpoint 14 | from keras import optimizers 15 | from keras.preprocessing.image import ImageDataGenerator 16 | from keras import regularizers 17 | from keras.layers import BatchNormalization 18 | import tensorflow as tf 19 | from keras import backend as K 20 | from keras.backend.tensorflow_backend import set_session 21 | 22 | ''' 23 | Tensorflow backend default 24 | ''' 25 | 26 | config = tf.ConfigProto() #using GPU 27 | config.gpu_options.allow_growth = True 28 | sess = tf.Session(config=config) 29 | 30 | STACK_NUM = 5 31 | NUM_CLASSES = 10 32 | WIDTH = 32 33 | HEIGHT = 32 34 | BATCH_SIZE = 256 #decrease the number if you run out your GPU memory 35 | EPOCHES = 500 36 | ITERATIONS = 50000//BATCH_SIZE +1 37 | WEIGHT_DECAY = 1e-4 #according to the paper 38 | FILE_PATH = '/cluster/home/it_stu25/dllab/model/best_model.h5' 39 | BASE_LR = 0.1 40 | 41 | class cosine(Callback): 42 | def __init__(self, 43 | base_lr, 44 | steps_per_epoch, 45 | warm_up_epoches 46 | ): 47 | super(cosine, self).__init__() 48 | self.base_lr = base_lr 49 | self.steps_per_epoch = steps_per_epoch 50 | self.warm_up_epoches = warm_up_epoches 51 | self.WarmUp = 0 52 | self.step_n = 0 53 | 54 | def on_epoch_begin(self, epoch, logs=None): 55 | if(epoch