└── init_deepnn_fig3.py /init_deepnn_fig3.py: -------------------------------------------------------------------------------- 1 | # Code to replicate Figure 3 in "On Weight Iniitalization in Deep Neural Networks" 2 | # by Siddharth Krishna Kumar. The code uses the python packages keras, numpy and matplotlib 3 | # The code makes no effort to optimize the runs. The point is merely 4 | # to show that the initialization scheme proposed in the paper starts converging a lot faster 5 | # than the Xavier initialization. 6 | import keras 7 | from keras.models import Sequential 8 | from keras.initializers import VarianceScaling,glorot_normal 9 | from keras.datasets import cifar10 10 | from keras.layers.core import Flatten, Dense 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation, Flatten,Conv2D, MaxPooling2D 13 | from keras import metrics 14 | from keras import initializers 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | 18 | # Setting some defaults regarding batch size and the optimizer; CIFAR 10 has 10 classes 19 | batch_size = 32 20 | num_classes = 10 21 | epochs = 10 22 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 23 | 24 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 25 | 26 | 27 | print(x_train.shape[0], 'train samples') 28 | print(x_test.shape[0], 'test samples') 29 | 30 | x_train = x_train.astype('float32') 31 | x_test = x_test.astype('float32') 32 | x_train /= 255 33 | x_test /= 255 34 | 35 | # Convert class vectors to binary class matrices. 36 | y_train = keras.utils.to_categorical(y_train, num_classes) 37 | y_test = keras.utils.to_categorical(y_test, num_classes) 38 | 39 | def custom_initializer(factor): 40 | '''An initializer taking in a custom scaling factor. Based on the analysis in the paper, 41 | for a neural network with sigmoid activations, the scaling factor should be approximately 42 | 3.6 43 | 44 | INPUT: Scaling factor to be used in the weight initialization 45 | OUTPUT: An initializer to be fed to Keras 46 | ''' 47 | return VarianceScaling(scale=factor, 48 | mode='fan_in', 49 | distribution='normal', 50 | seed=None) 51 | 52 | def model_for_testing(activataion_fn = 'sigmoid',initializer = 'glorot_normal'): 53 | ''' Neural Network with the architecture described in Figure 2 of the paper. 54 | 55 | INPUTS: Valid Keras activation function and initializer 56 | OUTPUT: Keras Model 57 | ''' 58 | model = Sequential() 59 | model.add(Conv2D(32, (3, 3), kernel_initializer=initializer , padding='same', 60 | activation = activataion_fn, input_shape=x_train.shape[1:])) 61 | model.add(Conv2D(32, (3, 3), kernel_initializer=initializer , padding='same', 62 | activation = activataion_fn)) 63 | model.add(Conv2D(32, (3, 3), kernel_initializer=initializer , padding='same', 64 | activation = activataion_fn)) 65 | model.add(MaxPooling2D((2,2), strides=(2,2))) 66 | model.add(Conv2D(64, (3, 3), kernel_initializer=initializer , padding='same', 67 | activation = activataion_fn)) 68 | model.add(Conv2D(64, (3, 3), kernel_initializer=initializer , padding='same', 69 | activation = activataion_fn)) 70 | model.add(Conv2D(64, (3, 3), kernel_initializer=initializer , padding='same', 71 | activation = activataion_fn)) 72 | model.add(MaxPooling2D((2,2), strides=(2,2))) 73 | model.add(Conv2D(128, (3, 3), kernel_initializer=initializer , padding='same', 74 | activation = activataion_fn)) 75 | model.add(Conv2D(128, (3, 3), kernel_initializer=initializer , padding='same', 76 | activation = activataion_fn)) 77 | model.add(Conv2D(128, (3, 3), kernel_initializer=initializer , padding='same', 78 | activation = activataion_fn)) 79 | model.add(MaxPooling2D((2,2), strides=(2,2))) 80 | model.add(Flatten()) 81 | model.add(Dense(num_classes, activation='softmax')) 82 | model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=[metrics.top_k_categorical_accuracy,'accuracy']) 83 | return model 84 | 85 | # Running the model with the Xavier initialization 86 | glorot_model = model_for_testing(activataion_fn = 'sigmoid',initializer = 'glorot_normal') 87 | sigmoid_results_glorot = glorot_model.fit(x_train, y_train, 88 | batch_size=batch_size, 89 | epochs=epochs, 90 | validation_data=(x_test, y_test), 91 | shuffle=True) 92 | 93 | # Running the model with the initializer provided in the paper 94 | sid_initializer_exact = custom_initializer(3.6) 95 | sid_model = model_for_testing(activataion_fn = 'sigmoid',initializer = sid_initializer_exact) 96 | sigmoid_results_sid = sid_model.fit(x_train, y_train, 97 | batch_size=batch_size, 98 | epochs=epochs, 99 | validation_data=(x_test, y_test), 100 | shuffle=True) 101 | 102 | 103 | # Plotting the results 104 | X = np.arange(1,epochs+1) 105 | plt.figure(figsize=(10, 6), dpi=80) 106 | plt.xlabel('Iteration Number',fontsize = 18) 107 | plt.ylabel('Top 5 Categorical Accuracy',fontsize = 18) 108 | plt.tick_params(labelsize=20) 109 | plt.plot(X, sigmoid_results_sid.history['top_k_categorical_accuracy'], color="red", marker = "o", 110 | markersize=10, label = "Recommended Initialization") 111 | plt.plot(X, sigmoid_results_glorot.history['top_k_categorical_accuracy'], color="blue", marker = "o", 112 | markersize=10, label = "Xavier Initialization") 113 | plt.legend(loc='upper left') 114 | plt.show() 115 | 116 | 117 | 118 | 119 | 120 | --------------------------------------------------------------------------------