├── images ├── dog.png ├── plot.png └── vgg16 architecture.png ├── implementation.py └── README.md /images/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashushekar/VGG16/HEAD/images/dog.png -------------------------------------------------------------------------------- /images/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashushekar/VGG16/HEAD/images/plot.png -------------------------------------------------------------------------------- /images/vgg16 architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ashushekar/VGG16/HEAD/images/vgg16 architecture.png -------------------------------------------------------------------------------- /implementation.py: -------------------------------------------------------------------------------- 1 | """ 2 | VGG-16 Implementation on Cats&Dogs Dataset 3 | """ 4 | 5 | import keras 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import tensorflow as tf 9 | from keras.backend.tensorflow_backend import set_session 10 | from keras.callbacks import ModelCheckpoint, EarlyStopping 11 | from keras.layers import Conv2D, Dense, Flatten, MaxPool2D 12 | from keras.models import Sequential, load_model 13 | from keras.optimizers import Adam 14 | from keras.preprocessing import image 15 | from keras.preprocessing.image import ImageDataGenerator 16 | 17 | config = tf.ConfigProto() 18 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 19 | config.log_device_placement = True # to log device placement (on which device the operation ran) 20 | sess = tf.Session(config=config) 21 | set_session(sess) 22 | 23 | # Get the data 24 | trdata = ImageDataGenerator() 25 | traindata = trdata.flow_from_directory(directory="../Datasets/Cats&Dogs/train",target_size=(224,224)) 26 | tsdata = ImageDataGenerator() 27 | testdata = tsdata.flow_from_directory(directory="../Datasets/Cats&Dogs/validation", target_size=(224,224)) 28 | 29 | # Generate the model 30 | model = Sequential() 31 | # Layer 1: Convolutional 32 | model.add(Conv2D(input_shape=(224, 224, 3), filters=64, kernel_size=(3, 3), 33 | padding='same', activation='relu')) 34 | # Layer 2: Convolutional 35 | model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu')) 36 | # Layer 3: MaxPooling 37 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 38 | 39 | # Layer 4: Convolutional 40 | model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu')) 41 | # Layer 5: Convolutional 42 | model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu')) 43 | # Layer 6: MaxPooling 44 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 45 | 46 | # Layer 7: Convolutional 47 | model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu')) 48 | # Layer 8: Convolutional 49 | model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu')) 50 | # Layer 9: Convolutional 51 | model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu')) 52 | # Layer 10: MaxPooling 53 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 54 | 55 | # Layer 11: Convolutional 56 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 57 | # Layer 12: Convolutional 58 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 59 | # Layer 13: Convolutional 60 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 61 | # Layer 14: MaxPooling 62 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 63 | 64 | # Layer 15: Convolutional 65 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 66 | # Layer 16: Convolutional 67 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 68 | # Layer 17: Convolutional 69 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 70 | # Layer 18: MaxPooling 71 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 72 | 73 | # Layer 19: Flatten 74 | model.add(Flatten()) 75 | # Layer 20: Fully Connected Layer 76 | model.add(Dense(units=4096, activation='relu')) 77 | # Layer 21: Fully Connected Layer 78 | model.add(Dense(units=4096, activation='relu')) 79 | # Layer 22: Softmax Layer 80 | model.add(Dense(units=2, activation='softmax')) 81 | 82 | # Add Optimizer and check accuracy metrics 83 | optimizer = Adam(learning_rate=0.001) 84 | model.compile(optimizer=optimizer, loss=keras.losses.categorical_crossentropy, 85 | metrics=['accuracy']) 86 | # Check model summary 87 | print(model.summary()) 88 | 89 | checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, 90 | save_weights_only=False, mode='auto', period=1) 91 | earlystop = EarlyStopping(monitor='val_acc', min_delta=0, patience=20, verbose=1, mode='auto') 92 | hist = model.fit_generator(steps_per_epoch=100, generator=traindata, validation_data=testdata, 93 | validation_steps=10, epochs=100, 94 | callbacks=[checkpoint, earlystop]) 95 | 96 | plt.plot(hist.history["acc"]) 97 | plt.plot(hist.history['val_acc']) 98 | plt.plot(hist.history['loss']) 99 | plt.plot(hist.history['val_loss']) 100 | plt.title("model accuracy") 101 | plt.ylabel("Accuracy") 102 | plt.xlabel("Epoch") 103 | plt.legend(["Accuracy","Validation Accuracy","loss","Validation Loss"]) 104 | plt.show(block=True) 105 | 106 | # Try on test data 107 | img = image.load_img("../Datasets/Cats&Dogs/test1/39.jpg",target_size=(224,224)) 108 | img = np.asarray(img) 109 | plt.imshow(img) 110 | img = np.expand_dims(img, axis=0) 111 | saved_model = load_model("vgg16_1.h5") 112 | output = saved_model.predict(img) 113 | if output[0][0] > output[0][1]: 114 | print("cat") 115 | else: 116 | print('dog') 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Step by step VGG16 implementation in Keras 2 | 3 | VGG16 is a convolution neural net (CNN ) architecture which was used to win ILSVR(Imagenet) competition in 2014. 4 | It is considered to be one of the excellent vision model architecture till date. Most unique thing about VGG16 5 | is that instead of having a large number of hyper-parameter they focused on having convolution layers of 3x3 6 | filter with a stride 1 and always used same padding and maxpool layer of 2x2 filter of stride 2. 7 | It follows this arrangement of convolution and max pool layers consistently throughout the whole architecture. 8 | In the end it has 2 FC(fully connected layers) followed by a softmax for output. The 16 in VGG16 refers to it has 9 | 16 layers that have weights. This network is a pretty large network and it has about 138 million (approx) parameters. 10 | 11 | Very Deep Convolutional Networks for Large-Scale Image Recognition. [https://arxiv.org/abs/1409.1556] 12 | 13 | ![vgg16 architecture](https://user-images.githubusercontent.com/35737777/69682136-5bdd4780-10a8-11ea-9079-50283f5451df.png) 14 | 15 | The implementation of VGG16 can be done on Cats vs Dogs dataset. 16 | 17 | ### Packages Needed 18 | 19 | ```python 20 | import numpy as np 21 | import keras 22 | import tensorflow as tf 23 | import matplotlib.pyplot as plt 24 | from keras.layers import Conv2D, Dense, Flatten, MaxPool2D 25 | from keras.models import Sequential, load_model 26 | from keras.optimizers import Adam 27 | from keras.preprocessing import image 28 | from keras.preprocessing.image import ImageDataGenerator 29 | from keras.callbacks import ModelCheckpoint, EarlyStopping 30 | from keras.backend.tensorflow_backend import set_session 31 | ``` 32 | We will be using Sequential method which means that all the layers of the model will be arranged in sequence. Here we 33 | have imported ImageDataGenerator from _keras.preprocessing_. The objective of ImageDataGenerator is to import data with 34 | labels easily into the model. It is a very useful class as it has many function to rescale, rotate, zoom, flip etc. The 35 | most useful thing about this class is that it does not affect the data stored on the disk. This class alters the data on 36 | the go while passing it to the model. 37 | 38 | ### Image Data Generator 39 | 40 | Let us create an object of _ImageDataGenerator_ for both training and testing data and passing the folder which has train 41 | data to the object _trdata_ and similarly passing folder which has test data to the object of _tsdata_. 42 | 43 | ```python 44 | trdata = ImageDataGenerator() 45 | traindata = trdata.flow_from_directory(directory="../Datasets/Cats&Dogs/train",target_size=(224,224)) 46 | tsdata = ImageDataGenerator() 47 | testdata = tsdata.flow_from_directory(directory="../Datasets/Cats&Dogs/validation", target_size=(224,224)) 48 | ``` 49 | 50 | The ImageDataGenerator will automatically label all the data inside cat folder as cat and vis-à-vis for dog folder. In 51 | this way data is easily ready to be passed to the neural network. 52 | 53 | ### Model Structure 54 | ```python 55 | # Generate the model 56 | model = Sequential() 57 | # Layer 1: Convolutional 58 | model.add(Conv2D(input_shape=(224, 224, 3), filters=64, kernel_size=(3, 3), 59 | padding='same', activation='relu')) 60 | # Layer 2: Convolutional 61 | model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu')) 62 | # Layer 3: MaxPooling 63 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 64 | 65 | # Layer 4: Convolutional 66 | model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu')) 67 | # Layer 5: Convolutional 68 | model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu')) 69 | # Layer 6: MaxPooling 70 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 71 | 72 | # Layer 7: Convolutional 73 | model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu')) 74 | # Layer 8: Convolutional 75 | model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu')) 76 | # Layer 9: Convolutional 77 | model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu')) 78 | # Layer 10: MaxPooling 79 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 80 | 81 | # Layer 11: Convolutional 82 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 83 | # Layer 12: Convolutional 84 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 85 | # Layer 13: Convolutional 86 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 87 | # Layer 14: MaxPooling 88 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 89 | 90 | # Layer 15: Convolutional 91 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 92 | # Layer 16: Convolutional 93 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 94 | # Layer 17: Convolutional 95 | model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu')) 96 | # Layer 18: MaxPooling 97 | model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) 98 | 99 | # Layer 19: Flatten 100 | model.add(Flatten()) 101 | # Layer 20: Fully Connected Layer 102 | model.add(Dense(units=4096, activation='relu')) 103 | # Layer 21: Fully Connected Layer 104 | model.add(Dense(units=4096, activation='relu')) 105 | # Layer 22: Softmax Layer 106 | model.add(Dense(units=2, activation='softmax')) 107 | ``` 108 | 109 | Here we have started with initialising the model by specifying that the model is a sequential model. 110 | After initialising the model then we can add: 111 | 1. 2 x convolution layer of 64 channel of 3x3 kernal and same padding 112 | 2. 1 x maxpool layer of 2x2 pool size and stride 2x2 113 | 3. 2 x convolution layer of 128 channel of 3x3 kernal and same padding 114 | 4. 1 x maxpool layer of 2x2 pool size and stride 2x2 115 | 5. 3 x convolution layer of 256 channel of 3x3 kernal and same padding 116 | 6. 1 x maxpool layer of 2x2 pool size and stride 2x2 117 | 7. 3 x convolution layer of 512 channel of 3x3 kernal and same padding 118 | 8. 1 x maxpool layer of 2x2 pool size and stride 2x2 119 | 9. 3 x convolution layer of 512 channel of 3x3 kernal and same padding 120 | 10. 1 x maxpool layer of 2x2 pool size and stride 2x2 121 | 122 | We have also add ReLU activation to each layers so that all the negative values are not passed to the next layer. 123 | 124 | After creating all the convolution we pass the data to the dense layer: 125 | 126 | 11. 1 x Dense layer of 4096 units 127 | 12. 1 x Dense layer of 4096 units 128 | 13. 1 x Dense Softmax layer of 2 units 129 | 130 | #### Adam Optimizer 131 | Let us use Adam optimiser to reach to the global minima while training out model. If we stuck in local minima while 132 | training then the adam optimiser will help us to get out of local minima and reach global minima. We will also 133 | specify the learning rate of the optimiser, here in this case it is set at 0.001. If our training is bouncing a lot on 134 | epochs then we need to decrease the learning rate so that we can reach global minima. 135 | 136 | ```python 137 | # Add Optimizer 138 | optimizer = Adam(learning_rate=0.001) 139 | model.compile(optimizer=optimizer, loss=keras.losses.categorical_crossentropy, 140 | metrics=['accuracy']) 141 | # Check model summary 142 | print(model.summary()) 143 | ``` 144 | 145 | #### Model Summary 146 | 147 | ```sh 148 | Model: "sequential_1" 149 | _________________________________________________________________ 150 | Layer (type) Output Shape Param # 151 | ================================================================= 152 | conv2d_1 (Conv2D) (None, 224, 224, 64) 1792 153 | _________________________________________________________________ 154 | conv2d_2 (Conv2D) (None, 224, 224, 64) 36928 155 | _________________________________________________________________ 156 | max_pooling2d_1 (MaxPooling2 (None, 112, 112, 64) 0 157 | _________________________________________________________________ 158 | conv2d_3 (Conv2D) (None, 112, 112, 128) 73856 159 | _________________________________________________________________ 160 | conv2d_4 (Conv2D) (None, 112, 112, 128) 147584 161 | _________________________________________________________________ 162 | max_pooling2d_2 (MaxPooling2 (None, 56, 56, 128) 0 163 | _________________________________________________________________ 164 | conv2d_5 (Conv2D) (None, 56, 56, 256) 295168 165 | _________________________________________________________________ 166 | conv2d_6 (Conv2D) (None, 56, 56, 256) 590080 167 | _________________________________________________________________ 168 | conv2d_7 (Conv2D) (None, 56, 56, 256) 590080 169 | _________________________________________________________________ 170 | max_pooling2d_3 (MaxPooling2 (None, 28, 28, 256) 0 171 | _________________________________________________________________ 172 | conv2d_8 (Conv2D) (None, 28, 28, 512) 1180160 173 | _________________________________________________________________ 174 | conv2d_9 (Conv2D) (None, 28, 28, 512) 2359808 175 | _________________________________________________________________ 176 | conv2d_10 (Conv2D) (None, 28, 28, 512) 2359808 177 | _________________________________________________________________ 178 | max_pooling2d_4 (MaxPooling2 (None, 14, 14, 512) 0 179 | _________________________________________________________________ 180 | conv2d_11 (Conv2D) (None, 14, 14, 512) 2359808 181 | _________________________________________________________________ 182 | conv2d_12 (Conv2D) (None, 14, 14, 512) 2359808 183 | _________________________________________________________________ 184 | conv2d_13 (Conv2D) (None, 14, 14, 512) 2359808 185 | _________________________________________________________________ 186 | max_pooling2d_5 (MaxPooling2 (None, 7, 7, 512) 0 187 | _________________________________________________________________ 188 | flatten_1 (Flatten) (None, 25088) 0 189 | _________________________________________________________________ 190 | dense_1 (Dense) (None, 4096) 102764544 191 | _________________________________________________________________ 192 | dense_2 (Dense) (None, 4096) 16781312 193 | _________________________________________________________________ 194 | dense_3 (Dense) (None, 2) 8194 195 | ================================================================= 196 | Total params: 134,268,738 197 | Trainable params: 134,268,738 198 | Non-trainable params: 0 199 | _________________________________________________________________ 200 | ``` 201 | 202 | ### Model Implementation 203 | 204 | #### Model Checkpoint Saving 205 | 206 | ModelCheckpoint helps us to save the model by monitoring a specific parameter of the model. In this case we have monitoring 207 | validation accuracy by passing _val_acc_ to **ModelCheckpoint**. The model will only be saved to disk if the validation 208 | accuracy of the model in current epoch is greater than what it was in the last epoch. 209 | 210 | ```python 211 | checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', 212 | verbose=1, save_best_only=True, 213 | save_weights_only=False, mode='auto', period=1) 214 | ``` 215 | 216 | #### Early Stopping 217 | 218 | EarlyStopping helps us to stop the training of the model early if there is no increase in the parameter which we have set 219 | to monitor in **EarlyStopping**. In this case we have monitoring validation accuracy by passing _val_acc_ to **EarlyStopping**. 220 | We have set patience to 20 which means that the model will stop to train if it does not see any rise in validation accuracy 221 | in 20 epochs. 222 | 223 | ```python 224 | earlystop = EarlyStopping(monitor='val_acc', min_delta=0, patience=20, verbose=1, mode='auto') 225 | ``` 226 | 227 | #### Fit Generator 228 | We are using _model.fit_generator_ as we have **ImageDataGenerator** to pass data to the model. We will pass train and test 229 | data to _fit_generator_. In _fit_generator_, _steps_per_epoch_ will set the batch size to pass training data to the model 230 | and validation_steps will do the same for test data. We can tweak it anytime based on our system specifications. 231 | 232 | ```python 233 | hist = model.fit_generator(steps_per_epoch=100, generator=traindata, validation_data=testdata, 234 | validation_steps=10, epochs=100, 235 | callbacks=[checkpoint, earlystop]) 236 | ``` 237 | 238 | ### Plot Visualisation 239 | We will visualise training/validation accuracy and loss using matplotlib. 240 | ![plot](https://user-images.githubusercontent.com/35737777/69684979-a6fc5800-10b2-11ea-874f-878037c95e74.png) 241 | 242 | ### Test the model 243 | 244 | To do predictions on the trained model we need to load the best saved model and pre-process the image and pass the image 245 | to the model for output. 246 | 247 | ```python 248 | img = image.load_img("../Datasets/Cats&Dogs/test1/39.jpg",target_size=(224,224)) 249 | img = np.asarray(img) 250 | plt.imshow(img) 251 | img = np.expand_dims(img, axis=0) 252 | saved_model = load_model("vgg16_1.h5") 253 | output = saved_model.predict(img) 254 | if output[0][0] > output[0][1]: 255 | print("cat") 256 | else: 257 | print('dog') 258 | ``` 259 | 260 | ![dog](https://user-images.githubusercontent.com/35737777/69684980-a6fc5800-10b2-11ea-896a-4f6f9c269e7b.png) 261 | --------------------------------------------------------------------------------