├── .gitignore ├── CIFAR10_Image_Classifier ├── Cifar10_ImageClassifier.ipynb ├── Readme.md └── cifar10_img.png ├── ConvolutionalNeuralNetworkVisualizer ├── ConvolutionalNeuralNetwork_Visualizer.ipynb ├── Readme.md └── cool_cat.jpg ├── FaceDetectionOpenCV ├── Readme.md ├── deploy.prototxt.txt ├── face_detection_ssd_parallel.py └── res10_300x300_ssd_iter_140000.caffemodel ├── MultiThreadedVideoProcessing ├── Readme.md ├── video_processing_parallel.py └── video_processing_simple.py ├── NeuralStyleTransfer ├── Neural_Style_Transfer_Tensorflow.ipynb ├── README.md ├── content.jpg ├── neural_style_transfer.png └── style_mosaic.jpg ├── README.md ├── StreetViewHouseNumbers_Classifier ├── Colab_ImageClassifier_SVHN.ipynb ├── Readme.md └── SVHN_Overview_Image.png ├── TransferLearningResnet ├── Readme.md ├── Transfer_Learning_ResNet_ImageClassifier.ipynb └── cifar10_img.png └── VideoActivityRecognition3DResnet ├── Readme.md ├── img_activity_recognition.jpg └── video_activity_recognition_3DResnets.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | CIFAR10_Image_Classifier.zip 2 | ConvolutionalNeuralNetworkVisualizer.zip 3 | NeuralStyleTransfer.zip 4 | StreetViewHouseNumbers_Classifier.zip 5 | TransferLearningResnet.zip -------------------------------------------------------------------------------- /CIFAR10_Image_Classifier/Cifar10_ImageClassifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Cifar10_ImageClassifier_v2.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true, 10 | "machine_shape": "hm", 11 | "authorship_tag": "ABX9TyPCgLrK1cIJtC5kaIsuHVGK", 12 | "include_colab_link": true 13 | }, 14 | "kernelspec": { 15 | "display_name": "Python 3", 16 | "name": "python3" 17 | }, 18 | "accelerator": "GPU" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "id": "MjzPfXdR50ff" 35 | }, 36 | "source": [ 37 | "# importing required libraries\n", 38 | "import numpy as np\n", 39 | "import matplotlib.pyplot as plt \n", 40 | "from tensorflow.keras.datasets import cifar10\n", 41 | "from tensorflow.keras.models import Sequential\n", 42 | "from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, BatchNormalization, Dropout" 43 | ], 44 | "execution_count": 1, 45 | "outputs": [] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "metadata": { 50 | "colab": { 51 | "base_uri": "https://localhost:8080/" 52 | }, 53 | "id": "xwJrg32a7itw", 54 | "outputId": "b9c76392-242f-435b-d63e-c2c906f8698a" 55 | }, 56 | "source": [ 57 | "# Loading the cifar-10 dataset \n", 58 | "# cifar10 is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 10 categories. \n", 59 | "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", 60 | "print(x_train.shape, y_train.shape)\n", 61 | "print(x_test.shape, y_test.shape)\n", 62 | "print(np.min(x_train), np.max(x_train)) # to check if scaling to the range 0-1 is needed\n", 63 | "print(np.min(y_train), np.max(y_train))" 64 | ], 65 | "execution_count": 2, 66 | "outputs": [ 67 | { 68 | "output_type": "stream", 69 | "text": [ 70 | "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", 71 | "170500096/170498071 [==============================] - 4s 0us/step\n", 72 | "(50000, 32, 32, 3) (50000, 1)\n", 73 | "(10000, 32, 32, 3) (10000, 1)\n", 74 | "0 255\n", 75 | "0 9\n" 76 | ], 77 | "name": "stdout" 78 | } 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "metadata": { 84 | "id": "CL2-Mc0TE_he" 85 | }, 86 | "source": [ 87 | "# scaling x_train and x_test values to the range 0-1\n", 88 | "x_train_scaled = x_train/255.\n", 89 | "x_test_scaled = x_test/255.\n", 90 | "# y_train, y_test values are already labelled as integers from 0 to 9 , so no preprocessing required" 91 | ], 92 | "execution_count": 3, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "colab": { 99 | "base_uri": "https://localhost:8080/" 100 | }, 101 | "id": "b2A2O8cjFhbE", 102 | "outputId": "56207d39-de18-44be-be46-75676649f9ec" 103 | }, 104 | "source": [ 105 | "# Defining the CNN architecture using keras Sequential API (a minified version of VGGNet)\n", 106 | "model = Sequential()\n", 107 | "model.add(Conv2D(32, kernel_size=(3,3), padding=\"same\", activation=\"relu\", input_shape=(32,32,3)))\n", 108 | "model.add(BatchNormalization())\n", 109 | "model.add(Conv2D(32, kernel_size=(3,3), padding=\"same\", activation=\"relu\"))\n", 110 | "model.add(BatchNormalization())\n", 111 | "model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))\n", 112 | "model.add(Dropout(0.25))\n", 113 | "model.add(Conv2D(64, kernel_size=(3,3), padding=\"same\", activation=\"relu\"))\n", 114 | "model.add(BatchNormalization())\n", 115 | "model.add(Conv2D(64, kernel_size=(3,3), padding=\"same\", activation=\"relu\"))\n", 116 | "model.add(BatchNormalization())\n", 117 | "model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))\n", 118 | "model.add(Dropout(0.25))\n", 119 | "model.add(Flatten())\n", 120 | "model.add(Dense(512, activation=\"relu\"))\n", 121 | "model.add(BatchNormalization())\n", 122 | "model.add(Dropout(0.5))\n", 123 | "model.add(Dense(10, activation=\"softmax\"))\n", 124 | "\n", 125 | "# printing model summary\n", 126 | "model.summary()\n", 127 | "\n", 128 | "# Compiling the model \n", 129 | "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])" 130 | ], 131 | "execution_count": 4, 132 | "outputs": [ 133 | { 134 | "output_type": "stream", 135 | "text": [ 136 | "Model: \"sequential\"\n", 137 | "_________________________________________________________________\n", 138 | "Layer (type) Output Shape Param # \n", 139 | "=================================================================\n", 140 | "conv2d (Conv2D) (None, 32, 32, 32) 896 \n", 141 | "_________________________________________________________________\n", 142 | "batch_normalization (BatchNo (None, 32, 32, 32) 128 \n", 143 | "_________________________________________________________________\n", 144 | "conv2d_1 (Conv2D) (None, 32, 32, 32) 9248 \n", 145 | "_________________________________________________________________\n", 146 | "batch_normalization_1 (Batch (None, 32, 32, 32) 128 \n", 147 | "_________________________________________________________________\n", 148 | "max_pooling2d (MaxPooling2D) (None, 16, 16, 32) 0 \n", 149 | "_________________________________________________________________\n", 150 | "dropout (Dropout) (None, 16, 16, 32) 0 \n", 151 | "_________________________________________________________________\n", 152 | "conv2d_2 (Conv2D) (None, 16, 16, 64) 18496 \n", 153 | "_________________________________________________________________\n", 154 | "batch_normalization_2 (Batch (None, 16, 16, 64) 256 \n", 155 | "_________________________________________________________________\n", 156 | "conv2d_3 (Conv2D) (None, 16, 16, 64) 36928 \n", 157 | "_________________________________________________________________\n", 158 | "batch_normalization_3 (Batch (None, 16, 16, 64) 256 \n", 159 | "_________________________________________________________________\n", 160 | "max_pooling2d_1 (MaxPooling2 (None, 8, 8, 64) 0 \n", 161 | "_________________________________________________________________\n", 162 | "dropout_1 (Dropout) (None, 8, 8, 64) 0 \n", 163 | "_________________________________________________________________\n", 164 | "flatten (Flatten) (None, 4096) 0 \n", 165 | "_________________________________________________________________\n", 166 | "dense (Dense) (None, 512) 2097664 \n", 167 | "_________________________________________________________________\n", 168 | "batch_normalization_4 (Batch (None, 512) 2048 \n", 169 | "_________________________________________________________________\n", 170 | "dropout_2 (Dropout) (None, 512) 0 \n", 171 | "_________________________________________________________________\n", 172 | "dense_1 (Dense) (None, 10) 5130 \n", 173 | "=================================================================\n", 174 | "Total params: 2,171,178\n", 175 | "Trainable params: 2,169,770\n", 176 | "Non-trainable params: 1,408\n", 177 | "_________________________________________________________________\n" 178 | ], 179 | "name": "stdout" 180 | } 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "metadata": { 186 | "colab": { 187 | "base_uri": "https://localhost:8080/" 188 | }, 189 | "id": "T1JX59wSF-Lv", 190 | "outputId": "a73c23f1-8bd2-43c6-9df9-726c890a66d7" 191 | }, 192 | "source": [ 193 | "# Training the model \n", 194 | "history = model.fit(x_train_scaled, y_train, batch_size=64, epochs=40, validation_data=(x_test_scaled, y_test), verbose=2)" 195 | ], 196 | "execution_count": 6, 197 | "outputs": [ 198 | { 199 | "output_type": "stream", 200 | "text": [ 201 | "Epoch 1/40\n", 202 | "782/782 - 20s - loss: 1.4748 - accuracy: 0.5088 - val_loss: 1.0989 - val_accuracy: 0.6155\n", 203 | "Epoch 2/40\n", 204 | "782/782 - 4s - loss: 0.9532 - accuracy: 0.6642 - val_loss: 3.6966 - val_accuracy: 0.2994\n", 205 | "Epoch 3/40\n", 206 | "782/782 - 4s - loss: 0.8142 - accuracy: 0.7133 - val_loss: 1.0699 - val_accuracy: 0.6588\n", 207 | "Epoch 4/40\n", 208 | "782/782 - 4s - loss: 0.7352 - accuracy: 0.7435 - val_loss: 0.6923 - val_accuracy: 0.7580\n", 209 | "Epoch 5/40\n", 210 | "782/782 - 4s - loss: 0.6651 - accuracy: 0.7642 - val_loss: 0.6709 - val_accuracy: 0.7713\n", 211 | "Epoch 6/40\n", 212 | "782/782 - 4s - loss: 0.6095 - accuracy: 0.7856 - val_loss: 0.7959 - val_accuracy: 0.7364\n", 213 | "Epoch 7/40\n", 214 | "782/782 - 4s - loss: 0.5581 - accuracy: 0.8040 - val_loss: 0.5927 - val_accuracy: 0.7970\n", 215 | "Epoch 8/40\n", 216 | "782/782 - 4s - loss: 0.5189 - accuracy: 0.8168 - val_loss: 0.6359 - val_accuracy: 0.7847\n", 217 | "Epoch 9/40\n", 218 | "782/782 - 4s - loss: 0.4788 - accuracy: 0.8311 - val_loss: 0.5534 - val_accuracy: 0.8137\n", 219 | "Epoch 10/40\n", 220 | "782/782 - 4s - loss: 0.4419 - accuracy: 0.8447 - val_loss: 0.6788 - val_accuracy: 0.7834\n", 221 | "Epoch 11/40\n", 222 | "782/782 - 4s - loss: 0.4076 - accuracy: 0.8566 - val_loss: 0.6475 - val_accuracy: 0.7919\n", 223 | "Epoch 12/40\n", 224 | "782/782 - 4s - loss: 0.3912 - accuracy: 0.8626 - val_loss: 0.6400 - val_accuracy: 0.7905\n", 225 | "Epoch 13/40\n", 226 | "782/782 - 4s - loss: 0.3676 - accuracy: 0.8695 - val_loss: 0.5595 - val_accuracy: 0.8173\n", 227 | "Epoch 14/40\n", 228 | "782/782 - 4s - loss: 0.3337 - accuracy: 0.8804 - val_loss: 0.6950 - val_accuracy: 0.7885\n", 229 | "Epoch 15/40\n", 230 | "782/782 - 4s - loss: 0.3138 - accuracy: 0.8882 - val_loss: 0.6289 - val_accuracy: 0.8036\n", 231 | "Epoch 16/40\n", 232 | "782/782 - 4s - loss: 0.3163 - accuracy: 0.8884 - val_loss: 0.5864 - val_accuracy: 0.8213\n", 233 | "Epoch 17/40\n", 234 | "782/782 - 4s - loss: 0.2875 - accuracy: 0.8976 - val_loss: 0.5803 - val_accuracy: 0.8184\n", 235 | "Epoch 18/40\n", 236 | "782/782 - 4s - loss: 0.2745 - accuracy: 0.9024 - val_loss: 0.6515 - val_accuracy: 0.8099\n", 237 | "Epoch 19/40\n", 238 | "782/782 - 4s - loss: 0.2605 - accuracy: 0.9081 - val_loss: 0.6302 - val_accuracy: 0.8155\n", 239 | "Epoch 20/40\n", 240 | "782/782 - 4s - loss: 0.2476 - accuracy: 0.9112 - val_loss: 0.5855 - val_accuracy: 0.8273\n", 241 | "Epoch 21/40\n", 242 | "782/782 - 4s - loss: 0.2432 - accuracy: 0.9142 - val_loss: 0.6192 - val_accuracy: 0.8196\n", 243 | "Epoch 22/40\n", 244 | "782/782 - 4s - loss: 0.2368 - accuracy: 0.9165 - val_loss: 0.6565 - val_accuracy: 0.8243\n", 245 | "Epoch 23/40\n", 246 | "782/782 - 4s - loss: 0.2182 - accuracy: 0.9230 - val_loss: 0.5994 - val_accuracy: 0.8281\n", 247 | "Epoch 24/40\n", 248 | "782/782 - 4s - loss: 0.2184 - accuracy: 0.9231 - val_loss: 0.6279 - val_accuracy: 0.8204\n", 249 | "Epoch 25/40\n", 250 | "782/782 - 4s - loss: 0.2067 - accuracy: 0.9278 - val_loss: 0.5764 - val_accuracy: 0.8358\n", 251 | "Epoch 26/40\n", 252 | "782/782 - 4s - loss: 0.1998 - accuracy: 0.9291 - val_loss: 0.5831 - val_accuracy: 0.8359\n", 253 | "Epoch 27/40\n", 254 | "782/782 - 4s - loss: 0.1929 - accuracy: 0.9316 - val_loss: 0.6074 - val_accuracy: 0.8339\n", 255 | "Epoch 28/40\n", 256 | "782/782 - 4s - loss: 0.1918 - accuracy: 0.9311 - val_loss: 0.6144 - val_accuracy: 0.8290\n", 257 | "Epoch 29/40\n", 258 | "782/782 - 4s - loss: 0.1897 - accuracy: 0.9332 - val_loss: 0.5774 - val_accuracy: 0.8377\n", 259 | "Epoch 30/40\n", 260 | "782/782 - 4s - loss: 0.1908 - accuracy: 0.9328 - val_loss: 0.6057 - val_accuracy: 0.8367\n", 261 | "Epoch 31/40\n", 262 | "782/782 - 4s - loss: 0.1724 - accuracy: 0.9394 - val_loss: 0.6022 - val_accuracy: 0.8353\n", 263 | "Epoch 32/40\n", 264 | "782/782 - 4s - loss: 0.1679 - accuracy: 0.9412 - val_loss: 0.6457 - val_accuracy: 0.8297\n", 265 | "Epoch 33/40\n", 266 | "782/782 - 4s - loss: 0.1659 - accuracy: 0.9401 - val_loss: 0.7429 - val_accuracy: 0.8173\n", 267 | "Epoch 34/40\n", 268 | "782/782 - 4s - loss: 0.1665 - accuracy: 0.9422 - val_loss: 0.6386 - val_accuracy: 0.8347\n", 269 | "Epoch 35/40\n", 270 | "782/782 - 4s - loss: 0.1641 - accuracy: 0.9411 - val_loss: 0.6328 - val_accuracy: 0.8298\n", 271 | "Epoch 36/40\n", 272 | "782/782 - 4s - loss: 0.1527 - accuracy: 0.9462 - val_loss: 0.6865 - val_accuracy: 0.8236\n", 273 | "Epoch 37/40\n", 274 | "782/782 - 4s - loss: 0.1524 - accuracy: 0.9456 - val_loss: 0.6909 - val_accuracy: 0.8270\n", 275 | "Epoch 38/40\n", 276 | "782/782 - 4s - loss: 0.1472 - accuracy: 0.9484 - val_loss: 0.7240 - val_accuracy: 0.8218\n", 277 | "Epoch 39/40\n", 278 | "782/782 - 4s - loss: 0.1456 - accuracy: 0.9484 - val_loss: 0.6283 - val_accuracy: 0.8368\n", 279 | "Epoch 40/40\n", 280 | "782/782 - 4s - loss: 0.1409 - accuracy: 0.9503 - val_loss: 0.6493 - val_accuracy: 0.8256\n" 281 | ], 282 | "name": "stdout" 283 | } 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "metadata": { 289 | "id": "tChTuxzcGUcA", 290 | "colab": { 291 | "base_uri": "https://localhost:8080/", 292 | "height": 299 293 | }, 294 | "outputId": "3f1a63ee-0d41-433c-a70d-1bce7aef8ded" 295 | }, 296 | "source": [ 297 | "# plotting loss and accuracy values \n", 298 | "num_epochs=40\n", 299 | "plt.style.use('ggplot')\n", 300 | "plt.figure()\n", 301 | "plt.plot(np.arange(num_epochs), history.history[\"loss\"], label=\"train_loss\")\n", 302 | "plt.plot(np.arange(num_epochs), history.history[\"val_loss\"], label=\"val_loss\")\n", 303 | "plt.plot(np.arange(num_epochs), history.history[\"accuracy\"], label=\"train_acc\")\n", 304 | "plt.plot(np.arange(num_epochs), history.history[\"val_accuracy\"], label=\"val_acc\")\n", 305 | "plt.title(\"Accuracy and Loss Curves\")\n", 306 | "plt.xlabel(\"Epoch #\")\n", 307 | "plt.ylabel(\"Accuracy/Loss\")\n", 308 | "plt.legend()\n", 309 | "plt.show()\n" 310 | ], 311 | "execution_count": 7, 312 | "outputs": [ 313 | { 314 | "output_type": "display_data", 315 | "data": { 316 | "image/png": "\n", 317 | "text/plain": [ 318 | "
" 319 | ] 320 | }, 321 | "metadata": { 322 | "tags": [] 323 | } 324 | } 325 | ] 326 | } 327 | ] 328 | } -------------------------------------------------------------------------------- /CIFAR10_Image_Classifier/Readme.md: -------------------------------------------------------------------------------- 1 | # This project is an Image Classifier for the CIFAR-10 dataset containing 10 different image classes - airplanes, automobiles, birds, cats, deer, dogs, frogs, horses, ships, and trucks. 2 | 3 | ![Example images from cifar-10 dataset](cifar10_img.png) 4 | [*Image Source*](https://www.cs.toronto.edu/~kriz/cifar.html) 5 | 6 | ## Project folder includes 7 | 1. Readme.md (this file) 8 | 2. Google colab notebook for building, training and testing a Convolutional Neural Network on the CIFAR-10 dataset. 9 | 3. Misc 10 | - CIFAR-10 overview image 11 | 12 | ## Programming language and Libraries used 13 | 1. Python programming language 14 | 2. Deep learning libraries tensorflow 2.x and Keras API 15 | 3. Python libraries including numpy, etc 16 | 17 | ## Notes 18 | 1. Purpose of this project is to demonstrate how to build, train and evaluate a simple CNN model for classifying images in the cifar-10 dataset 19 | -------------------------------------------------------------------------------- /CIFAR10_Image_Classifier/cifar10_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/CIFAR10_Image_Classifier/cifar10_img.png -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworkVisualizer/Readme.md: -------------------------------------------------------------------------------- 1 | # This project is a Convolutional Neural Network Visualizer for visualizing outputs from inner convolutional layers within a CNN model. 2 | 3 | ![Example Cool Cat Image](cool_cat.jpg) 4 | [*Image Source*](https://i.pinimg.com/originals/73/fd/d4/73fdd4752a176af8f388b31a67e93d87.jpg) 5 | 6 | ## Project folder includes 7 | 1. Readme.md (this file) 8 | 2. Google colab notebook for visualizing the CNN layer outputs on the Cool Cat image 9 | 3. Misc 10 | - Cool Cat Image - 11 | In Zambia’s South Luangwa National Park, a photographer had been watching a pride of lions while they slept off a feast from a buffalo kill. When this female walked away, he anticipated that she might be going for a drink and so he positioned his vehicle on the opposite side of the waterhole. The cool cat picture is one of the highly commended 2018 Image from Wildlife Photographer of the Year. 12 | 13 | ## Programming language and Libraries used 14 | 1. Python programming language 15 | 2. Deep learning libraries tensorflow 2.x and Keras API 16 | 3. Python libraries including numpy, etc 17 | 18 | ## Notes 19 | 1. Purpose of this project is to demonstrate how to use the functional api in keras to visualize outputs from inner layers within a CNN. 20 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworkVisualizer/cool_cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/ConvolutionalNeuralNetworkVisualizer/cool_cat.jpg -------------------------------------------------------------------------------- /FaceDetectionOpenCV/Readme.md: -------------------------------------------------------------------------------- 1 | # Real-Time Face Detection Using OpenCV 2 | 1. face_detection_ssd_parallel.py script is used for performing real-time face detection on input stream from webcam connected to a laptop or desktop 3 | 2. Pre-trained deep learning model for face detection from OpenCV is used Link 4 | 3. Imutils library is used for reading frames from webcam in a multi-threaded approach for achieving higher FPS Link 5 | 4. Model architecture is a Single Shot Detector (SSD) framework with a ResNet backbone 6 | 5. Model files are in caffe format 7 | * deploy.prototxt.txt - defines model architecture 8 | * res10_300x300_ssd_iter_140000.caffemodel - contains trained model weights 9 | 6. Model files can also be directly downloaded from OpenCV repository. Useful links: Link1 Link2 Link3 10 | 11 | ## Programming language and libraries used 12 | 1. Python programming language 13 | 2. OpenCV library 14 | 3. Imutils library (install using -> pip install imutils) 15 | 4. Other Python libraries including Numpy library 16 | 17 | -------------------------------------------------------------------------------- /FaceDetectionOpenCV/deploy.prototxt.txt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 300 6 | dim: 300 7 | } 8 | 9 | layer { 10 | name: "data_bn" 11 | type: "BatchNorm" 12 | bottom: "data" 13 | top: "data_bn" 14 | param { 15 | lr_mult: 0.0 16 | } 17 | param { 18 | lr_mult: 0.0 19 | } 20 | param { 21 | lr_mult: 0.0 22 | } 23 | } 24 | layer { 25 | name: "data_scale" 26 | type: "Scale" 27 | bottom: "data_bn" 28 | top: "data_bn" 29 | param { 30 | lr_mult: 1.0 31 | decay_mult: 1.0 32 | } 33 | param { 34 | lr_mult: 2.0 35 | decay_mult: 1.0 36 | } 37 | scale_param { 38 | bias_term: true 39 | } 40 | } 41 | layer { 42 | name: "conv1_h" 43 | type: "Convolution" 44 | bottom: "data_bn" 45 | top: "conv1_h" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | param { 51 | lr_mult: 2.0 52 | decay_mult: 1.0 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 3 57 | kernel_size: 7 58 | stride: 2 59 | weight_filler { 60 | type: "msra" 61 | variance_norm: FAN_OUT 62 | } 63 | bias_filler { 64 | type: "constant" 65 | value: 0.0 66 | } 67 | } 68 | } 69 | layer { 70 | name: "conv1_bn_h" 71 | type: "BatchNorm" 72 | bottom: "conv1_h" 73 | top: "conv1_h" 74 | param { 75 | lr_mult: 0.0 76 | } 77 | param { 78 | lr_mult: 0.0 79 | } 80 | param { 81 | lr_mult: 0.0 82 | } 83 | } 84 | layer { 85 | name: "conv1_scale_h" 86 | type: "Scale" 87 | bottom: "conv1_h" 88 | top: "conv1_h" 89 | param { 90 | lr_mult: 1.0 91 | decay_mult: 1.0 92 | } 93 | param { 94 | lr_mult: 2.0 95 | decay_mult: 1.0 96 | } 97 | scale_param { 98 | bias_term: true 99 | } 100 | } 101 | layer { 102 | name: "conv1_relu" 103 | type: "ReLU" 104 | bottom: "conv1_h" 105 | top: "conv1_h" 106 | } 107 | layer { 108 | name: "conv1_pool" 109 | type: "Pooling" 110 | bottom: "conv1_h" 111 | top: "conv1_pool" 112 | pooling_param { 113 | kernel_size: 3 114 | stride: 2 115 | } 116 | } 117 | layer { 118 | name: "layer_64_1_conv1_h" 119 | type: "Convolution" 120 | bottom: "conv1_pool" 121 | top: "layer_64_1_conv1_h" 122 | param { 123 | lr_mult: 1.0 124 | decay_mult: 1.0 125 | } 126 | convolution_param { 127 | num_output: 32 128 | bias_term: false 129 | pad: 1 130 | kernel_size: 3 131 | stride: 1 132 | weight_filler { 133 | type: "msra" 134 | } 135 | bias_filler { 136 | type: "constant" 137 | value: 0.0 138 | } 139 | } 140 | } 141 | layer { 142 | name: "layer_64_1_bn2_h" 143 | type: "BatchNorm" 144 | bottom: "layer_64_1_conv1_h" 145 | top: "layer_64_1_conv1_h" 146 | param { 147 | lr_mult: 0.0 148 | } 149 | param { 150 | lr_mult: 0.0 151 | } 152 | param { 153 | lr_mult: 0.0 154 | } 155 | } 156 | layer { 157 | name: "layer_64_1_scale2_h" 158 | type: "Scale" 159 | bottom: "layer_64_1_conv1_h" 160 | top: "layer_64_1_conv1_h" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1.0 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 1.0 168 | } 169 | scale_param { 170 | bias_term: true 171 | } 172 | } 173 | layer { 174 | name: "layer_64_1_relu2" 175 | type: "ReLU" 176 | bottom: "layer_64_1_conv1_h" 177 | top: "layer_64_1_conv1_h" 178 | } 179 | layer { 180 | name: "layer_64_1_conv2_h" 181 | type: "Convolution" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv2_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | convolution_param { 189 | num_output: 32 190 | bias_term: false 191 | pad: 1 192 | kernel_size: 3 193 | stride: 1 194 | weight_filler { 195 | type: "msra" 196 | } 197 | bias_filler { 198 | type: "constant" 199 | value: 0.0 200 | } 201 | } 202 | } 203 | layer { 204 | name: "layer_64_1_sum" 205 | type: "Eltwise" 206 | bottom: "layer_64_1_conv2_h" 207 | bottom: "conv1_pool" 208 | top: "layer_64_1_sum" 209 | } 210 | layer { 211 | name: "layer_128_1_bn1_h" 212 | type: "BatchNorm" 213 | bottom: "layer_64_1_sum" 214 | top: "layer_128_1_bn1_h" 215 | param { 216 | lr_mult: 0.0 217 | } 218 | param { 219 | lr_mult: 0.0 220 | } 221 | param { 222 | lr_mult: 0.0 223 | } 224 | } 225 | layer { 226 | name: "layer_128_1_scale1_h" 227 | type: "Scale" 228 | bottom: "layer_128_1_bn1_h" 229 | top: "layer_128_1_bn1_h" 230 | param { 231 | lr_mult: 1.0 232 | decay_mult: 1.0 233 | } 234 | param { 235 | lr_mult: 2.0 236 | decay_mult: 1.0 237 | } 238 | scale_param { 239 | bias_term: true 240 | } 241 | } 242 | layer { 243 | name: "layer_128_1_relu1" 244 | type: "ReLU" 245 | bottom: "layer_128_1_bn1_h" 246 | top: "layer_128_1_bn1_h" 247 | } 248 | layer { 249 | name: "layer_128_1_conv1_h" 250 | type: "Convolution" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_conv1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | convolution_param { 258 | num_output: 128 259 | bias_term: false 260 | pad: 1 261 | kernel_size: 3 262 | stride: 2 263 | weight_filler { 264 | type: "msra" 265 | } 266 | bias_filler { 267 | type: "constant" 268 | value: 0.0 269 | } 270 | } 271 | } 272 | layer { 273 | name: "layer_128_1_bn2" 274 | type: "BatchNorm" 275 | bottom: "layer_128_1_conv1_h" 276 | top: "layer_128_1_conv1_h" 277 | param { 278 | lr_mult: 0.0 279 | } 280 | param { 281 | lr_mult: 0.0 282 | } 283 | param { 284 | lr_mult: 0.0 285 | } 286 | } 287 | layer { 288 | name: "layer_128_1_scale2" 289 | type: "Scale" 290 | bottom: "layer_128_1_conv1_h" 291 | top: "layer_128_1_conv1_h" 292 | param { 293 | lr_mult: 1.0 294 | decay_mult: 1.0 295 | } 296 | param { 297 | lr_mult: 2.0 298 | decay_mult: 1.0 299 | } 300 | scale_param { 301 | bias_term: true 302 | } 303 | } 304 | layer { 305 | name: "layer_128_1_relu2" 306 | type: "ReLU" 307 | bottom: "layer_128_1_conv1_h" 308 | top: "layer_128_1_conv1_h" 309 | } 310 | layer { 311 | name: "layer_128_1_conv2" 312 | type: "Convolution" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv2" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | convolution_param { 320 | num_output: 128 321 | bias_term: false 322 | pad: 1 323 | kernel_size: 3 324 | stride: 1 325 | weight_filler { 326 | type: "msra" 327 | } 328 | bias_filler { 329 | type: "constant" 330 | value: 0.0 331 | } 332 | } 333 | } 334 | layer { 335 | name: "layer_128_1_conv_expand_h" 336 | type: "Convolution" 337 | bottom: "layer_128_1_bn1_h" 338 | top: "layer_128_1_conv_expand_h" 339 | param { 340 | lr_mult: 1.0 341 | decay_mult: 1.0 342 | } 343 | convolution_param { 344 | num_output: 128 345 | bias_term: false 346 | pad: 0 347 | kernel_size: 1 348 | stride: 2 349 | weight_filler { 350 | type: "msra" 351 | } 352 | bias_filler { 353 | type: "constant" 354 | value: 0.0 355 | } 356 | } 357 | } 358 | layer { 359 | name: "layer_128_1_sum" 360 | type: "Eltwise" 361 | bottom: "layer_128_1_conv2" 362 | bottom: "layer_128_1_conv_expand_h" 363 | top: "layer_128_1_sum" 364 | } 365 | layer { 366 | name: "layer_256_1_bn1" 367 | type: "BatchNorm" 368 | bottom: "layer_128_1_sum" 369 | top: "layer_256_1_bn1" 370 | param { 371 | lr_mult: 0.0 372 | } 373 | param { 374 | lr_mult: 0.0 375 | } 376 | param { 377 | lr_mult: 0.0 378 | } 379 | } 380 | layer { 381 | name: "layer_256_1_scale1" 382 | type: "Scale" 383 | bottom: "layer_256_1_bn1" 384 | top: "layer_256_1_bn1" 385 | param { 386 | lr_mult: 1.0 387 | decay_mult: 1.0 388 | } 389 | param { 390 | lr_mult: 2.0 391 | decay_mult: 1.0 392 | } 393 | scale_param { 394 | bias_term: true 395 | } 396 | } 397 | layer { 398 | name: "layer_256_1_relu1" 399 | type: "ReLU" 400 | bottom: "layer_256_1_bn1" 401 | top: "layer_256_1_bn1" 402 | } 403 | layer { 404 | name: "layer_256_1_conv1" 405 | type: "Convolution" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_conv1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | convolution_param { 413 | num_output: 256 414 | bias_term: false 415 | pad: 1 416 | kernel_size: 3 417 | stride: 2 418 | weight_filler { 419 | type: "msra" 420 | } 421 | bias_filler { 422 | type: "constant" 423 | value: 0.0 424 | } 425 | } 426 | } 427 | layer { 428 | name: "layer_256_1_bn2" 429 | type: "BatchNorm" 430 | bottom: "layer_256_1_conv1" 431 | top: "layer_256_1_conv1" 432 | param { 433 | lr_mult: 0.0 434 | } 435 | param { 436 | lr_mult: 0.0 437 | } 438 | param { 439 | lr_mult: 0.0 440 | } 441 | } 442 | layer { 443 | name: "layer_256_1_scale2" 444 | type: "Scale" 445 | bottom: "layer_256_1_conv1" 446 | top: "layer_256_1_conv1" 447 | param { 448 | lr_mult: 1.0 449 | decay_mult: 1.0 450 | } 451 | param { 452 | lr_mult: 2.0 453 | decay_mult: 1.0 454 | } 455 | scale_param { 456 | bias_term: true 457 | } 458 | } 459 | layer { 460 | name: "layer_256_1_relu2" 461 | type: "ReLU" 462 | bottom: "layer_256_1_conv1" 463 | top: "layer_256_1_conv1" 464 | } 465 | layer { 466 | name: "layer_256_1_conv2" 467 | type: "Convolution" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv2" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | convolution_param { 475 | num_output: 256 476 | bias_term: false 477 | pad: 1 478 | kernel_size: 3 479 | stride: 1 480 | weight_filler { 481 | type: "msra" 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.0 486 | } 487 | } 488 | } 489 | layer { 490 | name: "layer_256_1_conv_expand" 491 | type: "Convolution" 492 | bottom: "layer_256_1_bn1" 493 | top: "layer_256_1_conv_expand" 494 | param { 495 | lr_mult: 1.0 496 | decay_mult: 1.0 497 | } 498 | convolution_param { 499 | num_output: 256 500 | bias_term: false 501 | pad: 0 502 | kernel_size: 1 503 | stride: 2 504 | weight_filler { 505 | type: "msra" 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.0 510 | } 511 | } 512 | } 513 | layer { 514 | name: "layer_256_1_sum" 515 | type: "Eltwise" 516 | bottom: "layer_256_1_conv2" 517 | bottom: "layer_256_1_conv_expand" 518 | top: "layer_256_1_sum" 519 | } 520 | layer { 521 | name: "layer_512_1_bn1" 522 | type: "BatchNorm" 523 | bottom: "layer_256_1_sum" 524 | top: "layer_512_1_bn1" 525 | param { 526 | lr_mult: 0.0 527 | } 528 | param { 529 | lr_mult: 0.0 530 | } 531 | param { 532 | lr_mult: 0.0 533 | } 534 | } 535 | layer { 536 | name: "layer_512_1_scale1" 537 | type: "Scale" 538 | bottom: "layer_512_1_bn1" 539 | top: "layer_512_1_bn1" 540 | param { 541 | lr_mult: 1.0 542 | decay_mult: 1.0 543 | } 544 | param { 545 | lr_mult: 2.0 546 | decay_mult: 1.0 547 | } 548 | scale_param { 549 | bias_term: true 550 | } 551 | } 552 | layer { 553 | name: "layer_512_1_relu1" 554 | type: "ReLU" 555 | bottom: "layer_512_1_bn1" 556 | top: "layer_512_1_bn1" 557 | } 558 | layer { 559 | name: "layer_512_1_conv1_h" 560 | type: "Convolution" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_conv1_h" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | convolution_param { 568 | num_output: 128 569 | bias_term: false 570 | pad: 1 571 | kernel_size: 3 572 | stride: 1 # 2 573 | weight_filler { 574 | type: "msra" 575 | } 576 | bias_filler { 577 | type: "constant" 578 | value: 0.0 579 | } 580 | } 581 | } 582 | layer { 583 | name: "layer_512_1_bn2_h" 584 | type: "BatchNorm" 585 | bottom: "layer_512_1_conv1_h" 586 | top: "layer_512_1_conv1_h" 587 | param { 588 | lr_mult: 0.0 589 | } 590 | param { 591 | lr_mult: 0.0 592 | } 593 | param { 594 | lr_mult: 0.0 595 | } 596 | } 597 | layer { 598 | name: "layer_512_1_scale2_h" 599 | type: "Scale" 600 | bottom: "layer_512_1_conv1_h" 601 | top: "layer_512_1_conv1_h" 602 | param { 603 | lr_mult: 1.0 604 | decay_mult: 1.0 605 | } 606 | param { 607 | lr_mult: 2.0 608 | decay_mult: 1.0 609 | } 610 | scale_param { 611 | bias_term: true 612 | } 613 | } 614 | layer { 615 | name: "layer_512_1_relu2" 616 | type: "ReLU" 617 | bottom: "layer_512_1_conv1_h" 618 | top: "layer_512_1_conv1_h" 619 | } 620 | layer { 621 | name: "layer_512_1_conv2_h" 622 | type: "Convolution" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv2_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | convolution_param { 630 | num_output: 256 631 | bias_term: false 632 | pad: 2 # 1 633 | kernel_size: 3 634 | stride: 1 635 | dilation: 2 636 | weight_filler { 637 | type: "msra" 638 | } 639 | bias_filler { 640 | type: "constant" 641 | value: 0.0 642 | } 643 | } 644 | } 645 | layer { 646 | name: "layer_512_1_conv_expand_h" 647 | type: "Convolution" 648 | bottom: "layer_512_1_bn1" 649 | top: "layer_512_1_conv_expand_h" 650 | param { 651 | lr_mult: 1.0 652 | decay_mult: 1.0 653 | } 654 | convolution_param { 655 | num_output: 256 656 | bias_term: false 657 | pad: 0 658 | kernel_size: 1 659 | stride: 1 # 2 660 | weight_filler { 661 | type: "msra" 662 | } 663 | bias_filler { 664 | type: "constant" 665 | value: 0.0 666 | } 667 | } 668 | } 669 | layer { 670 | name: "layer_512_1_sum" 671 | type: "Eltwise" 672 | bottom: "layer_512_1_conv2_h" 673 | bottom: "layer_512_1_conv_expand_h" 674 | top: "layer_512_1_sum" 675 | } 676 | layer { 677 | name: "last_bn_h" 678 | type: "BatchNorm" 679 | bottom: "layer_512_1_sum" 680 | top: "layer_512_1_sum" 681 | param { 682 | lr_mult: 0.0 683 | } 684 | param { 685 | lr_mult: 0.0 686 | } 687 | param { 688 | lr_mult: 0.0 689 | } 690 | } 691 | layer { 692 | name: "last_scale_h" 693 | type: "Scale" 694 | bottom: "layer_512_1_sum" 695 | top: "layer_512_1_sum" 696 | param { 697 | lr_mult: 1.0 698 | decay_mult: 1.0 699 | } 700 | param { 701 | lr_mult: 2.0 702 | decay_mult: 1.0 703 | } 704 | scale_param { 705 | bias_term: true 706 | } 707 | } 708 | layer { 709 | name: "last_relu" 710 | type: "ReLU" 711 | bottom: "layer_512_1_sum" 712 | top: "fc7" 713 | } 714 | 715 | layer { 716 | name: "conv6_1_h" 717 | type: "Convolution" 718 | bottom: "fc7" 719 | top: "conv6_1_h" 720 | param { 721 | lr_mult: 1 722 | decay_mult: 1 723 | } 724 | param { 725 | lr_mult: 2 726 | decay_mult: 0 727 | } 728 | convolution_param { 729 | num_output: 128 730 | pad: 0 731 | kernel_size: 1 732 | stride: 1 733 | weight_filler { 734 | type: "xavier" 735 | } 736 | bias_filler { 737 | type: "constant" 738 | value: 0 739 | } 740 | } 741 | } 742 | layer { 743 | name: "conv6_1_relu" 744 | type: "ReLU" 745 | bottom: "conv6_1_h" 746 | top: "conv6_1_h" 747 | } 748 | layer { 749 | name: "conv6_2_h" 750 | type: "Convolution" 751 | bottom: "conv6_1_h" 752 | top: "conv6_2_h" 753 | param { 754 | lr_mult: 1 755 | decay_mult: 1 756 | } 757 | param { 758 | lr_mult: 2 759 | decay_mult: 0 760 | } 761 | convolution_param { 762 | num_output: 256 763 | pad: 1 764 | kernel_size: 3 765 | stride: 2 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0 772 | } 773 | } 774 | } 775 | layer { 776 | name: "conv6_2_relu" 777 | type: "ReLU" 778 | bottom: "conv6_2_h" 779 | top: "conv6_2_h" 780 | } 781 | layer { 782 | name: "conv7_1_h" 783 | type: "Convolution" 784 | bottom: "conv6_2_h" 785 | top: "conv7_1_h" 786 | param { 787 | lr_mult: 1 788 | decay_mult: 1 789 | } 790 | param { 791 | lr_mult: 2 792 | decay_mult: 0 793 | } 794 | convolution_param { 795 | num_output: 64 796 | pad: 0 797 | kernel_size: 1 798 | stride: 1 799 | weight_filler { 800 | type: "xavier" 801 | } 802 | bias_filler { 803 | type: "constant" 804 | value: 0 805 | } 806 | } 807 | } 808 | layer { 809 | name: "conv7_1_relu" 810 | type: "ReLU" 811 | bottom: "conv7_1_h" 812 | top: "conv7_1_h" 813 | } 814 | layer { 815 | name: "conv7_2_h" 816 | type: "Convolution" 817 | bottom: "conv7_1_h" 818 | top: "conv7_2_h" 819 | param { 820 | lr_mult: 1 821 | decay_mult: 1 822 | } 823 | param { 824 | lr_mult: 2 825 | decay_mult: 0 826 | } 827 | convolution_param { 828 | num_output: 128 829 | pad: 1 830 | kernel_size: 3 831 | stride: 2 832 | weight_filler { 833 | type: "xavier" 834 | } 835 | bias_filler { 836 | type: "constant" 837 | value: 0 838 | } 839 | } 840 | } 841 | layer { 842 | name: "conv7_2_relu" 843 | type: "ReLU" 844 | bottom: "conv7_2_h" 845 | top: "conv7_2_h" 846 | } 847 | layer { 848 | name: "conv8_1_h" 849 | type: "Convolution" 850 | bottom: "conv7_2_h" 851 | top: "conv8_1_h" 852 | param { 853 | lr_mult: 1 854 | decay_mult: 1 855 | } 856 | param { 857 | lr_mult: 2 858 | decay_mult: 0 859 | } 860 | convolution_param { 861 | num_output: 64 862 | pad: 0 863 | kernel_size: 1 864 | stride: 1 865 | weight_filler { 866 | type: "xavier" 867 | } 868 | bias_filler { 869 | type: "constant" 870 | value: 0 871 | } 872 | } 873 | } 874 | layer { 875 | name: "conv8_1_relu" 876 | type: "ReLU" 877 | bottom: "conv8_1_h" 878 | top: "conv8_1_h" 879 | } 880 | layer { 881 | name: "conv8_2_h" 882 | type: "Convolution" 883 | bottom: "conv8_1_h" 884 | top: "conv8_2_h" 885 | param { 886 | lr_mult: 1 887 | decay_mult: 1 888 | } 889 | param { 890 | lr_mult: 2 891 | decay_mult: 0 892 | } 893 | convolution_param { 894 | num_output: 128 895 | pad: 1 896 | kernel_size: 3 897 | stride: 1 898 | weight_filler { 899 | type: "xavier" 900 | } 901 | bias_filler { 902 | type: "constant" 903 | value: 0 904 | } 905 | } 906 | } 907 | layer { 908 | name: "conv8_2_relu" 909 | type: "ReLU" 910 | bottom: "conv8_2_h" 911 | top: "conv8_2_h" 912 | } 913 | layer { 914 | name: "conv9_1_h" 915 | type: "Convolution" 916 | bottom: "conv8_2_h" 917 | top: "conv9_1_h" 918 | param { 919 | lr_mult: 1 920 | decay_mult: 1 921 | } 922 | param { 923 | lr_mult: 2 924 | decay_mult: 0 925 | } 926 | convolution_param { 927 | num_output: 64 928 | pad: 0 929 | kernel_size: 1 930 | stride: 1 931 | weight_filler { 932 | type: "xavier" 933 | } 934 | bias_filler { 935 | type: "constant" 936 | value: 0 937 | } 938 | } 939 | } 940 | layer { 941 | name: "conv9_1_relu" 942 | type: "ReLU" 943 | bottom: "conv9_1_h" 944 | top: "conv9_1_h" 945 | } 946 | layer { 947 | name: "conv9_2_h" 948 | type: "Convolution" 949 | bottom: "conv9_1_h" 950 | top: "conv9_2_h" 951 | param { 952 | lr_mult: 1 953 | decay_mult: 1 954 | } 955 | param { 956 | lr_mult: 2 957 | decay_mult: 0 958 | } 959 | convolution_param { 960 | num_output: 128 961 | pad: 1 962 | kernel_size: 3 963 | stride: 1 964 | weight_filler { 965 | type: "xavier" 966 | } 967 | bias_filler { 968 | type: "constant" 969 | value: 0 970 | } 971 | } 972 | } 973 | layer { 974 | name: "conv9_2_relu" 975 | type: "ReLU" 976 | bottom: "conv9_2_h" 977 | top: "conv9_2_h" 978 | } 979 | layer { 980 | name: "conv4_3_norm" 981 | type: "Normalize" 982 | bottom: "layer_256_1_bn1" 983 | top: "conv4_3_norm" 984 | norm_param { 985 | across_spatial: false 986 | scale_filler { 987 | type: "constant" 988 | value: 20 989 | } 990 | channel_shared: false 991 | } 992 | } 993 | layer { 994 | name: "conv4_3_norm_mbox_loc" 995 | type: "Convolution" 996 | bottom: "conv4_3_norm" 997 | top: "conv4_3_norm_mbox_loc" 998 | param { 999 | lr_mult: 1 1000 | decay_mult: 1 1001 | } 1002 | param { 1003 | lr_mult: 2 1004 | decay_mult: 0 1005 | } 1006 | convolution_param { 1007 | num_output: 16 1008 | pad: 1 1009 | kernel_size: 3 1010 | stride: 1 1011 | weight_filler { 1012 | type: "xavier" 1013 | } 1014 | bias_filler { 1015 | type: "constant" 1016 | value: 0 1017 | } 1018 | } 1019 | } 1020 | layer { 1021 | name: "conv4_3_norm_mbox_loc_perm" 1022 | type: "Permute" 1023 | bottom: "conv4_3_norm_mbox_loc" 1024 | top: "conv4_3_norm_mbox_loc_perm" 1025 | permute_param { 1026 | order: 0 1027 | order: 2 1028 | order: 3 1029 | order: 1 1030 | } 1031 | } 1032 | layer { 1033 | name: "conv4_3_norm_mbox_loc_flat" 1034 | type: "Flatten" 1035 | bottom: "conv4_3_norm_mbox_loc_perm" 1036 | top: "conv4_3_norm_mbox_loc_flat" 1037 | flatten_param { 1038 | axis: 1 1039 | } 1040 | } 1041 | layer { 1042 | name: "conv4_3_norm_mbox_conf" 1043 | type: "Convolution" 1044 | bottom: "conv4_3_norm" 1045 | top: "conv4_3_norm_mbox_conf" 1046 | param { 1047 | lr_mult: 1 1048 | decay_mult: 1 1049 | } 1050 | param { 1051 | lr_mult: 2 1052 | decay_mult: 0 1053 | } 1054 | convolution_param { 1055 | num_output: 8 # 84 1056 | pad: 1 1057 | kernel_size: 3 1058 | stride: 1 1059 | weight_filler { 1060 | type: "xavier" 1061 | } 1062 | bias_filler { 1063 | type: "constant" 1064 | value: 0 1065 | } 1066 | } 1067 | } 1068 | layer { 1069 | name: "conv4_3_norm_mbox_conf_perm" 1070 | type: "Permute" 1071 | bottom: "conv4_3_norm_mbox_conf" 1072 | top: "conv4_3_norm_mbox_conf_perm" 1073 | permute_param { 1074 | order: 0 1075 | order: 2 1076 | order: 3 1077 | order: 1 1078 | } 1079 | } 1080 | layer { 1081 | name: "conv4_3_norm_mbox_conf_flat" 1082 | type: "Flatten" 1083 | bottom: "conv4_3_norm_mbox_conf_perm" 1084 | top: "conv4_3_norm_mbox_conf_flat" 1085 | flatten_param { 1086 | axis: 1 1087 | } 1088 | } 1089 | layer { 1090 | name: "conv4_3_norm_mbox_priorbox" 1091 | type: "PriorBox" 1092 | bottom: "conv4_3_norm" 1093 | bottom: "data" 1094 | top: "conv4_3_norm_mbox_priorbox" 1095 | prior_box_param { 1096 | min_size: 30.0 1097 | max_size: 60.0 1098 | aspect_ratio: 2 1099 | flip: true 1100 | clip: false 1101 | variance: 0.1 1102 | variance: 0.1 1103 | variance: 0.2 1104 | variance: 0.2 1105 | step: 8 1106 | offset: 0.5 1107 | } 1108 | } 1109 | layer { 1110 | name: "fc7_mbox_loc" 1111 | type: "Convolution" 1112 | bottom: "fc7" 1113 | top: "fc7_mbox_loc" 1114 | param { 1115 | lr_mult: 1 1116 | decay_mult: 1 1117 | } 1118 | param { 1119 | lr_mult: 2 1120 | decay_mult: 0 1121 | } 1122 | convolution_param { 1123 | num_output: 24 1124 | pad: 1 1125 | kernel_size: 3 1126 | stride: 1 1127 | weight_filler { 1128 | type: "xavier" 1129 | } 1130 | bias_filler { 1131 | type: "constant" 1132 | value: 0 1133 | } 1134 | } 1135 | } 1136 | layer { 1137 | name: "fc7_mbox_loc_perm" 1138 | type: "Permute" 1139 | bottom: "fc7_mbox_loc" 1140 | top: "fc7_mbox_loc_perm" 1141 | permute_param { 1142 | order: 0 1143 | order: 2 1144 | order: 3 1145 | order: 1 1146 | } 1147 | } 1148 | layer { 1149 | name: "fc7_mbox_loc_flat" 1150 | type: "Flatten" 1151 | bottom: "fc7_mbox_loc_perm" 1152 | top: "fc7_mbox_loc_flat" 1153 | flatten_param { 1154 | axis: 1 1155 | } 1156 | } 1157 | layer { 1158 | name: "fc7_mbox_conf" 1159 | type: "Convolution" 1160 | bottom: "fc7" 1161 | top: "fc7_mbox_conf" 1162 | param { 1163 | lr_mult: 1 1164 | decay_mult: 1 1165 | } 1166 | param { 1167 | lr_mult: 2 1168 | decay_mult: 0 1169 | } 1170 | convolution_param { 1171 | num_output: 12 # 126 1172 | pad: 1 1173 | kernel_size: 3 1174 | stride: 1 1175 | weight_filler { 1176 | type: "xavier" 1177 | } 1178 | bias_filler { 1179 | type: "constant" 1180 | value: 0 1181 | } 1182 | } 1183 | } 1184 | layer { 1185 | name: "fc7_mbox_conf_perm" 1186 | type: "Permute" 1187 | bottom: "fc7_mbox_conf" 1188 | top: "fc7_mbox_conf_perm" 1189 | permute_param { 1190 | order: 0 1191 | order: 2 1192 | order: 3 1193 | order: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "fc7_mbox_conf_flat" 1198 | type: "Flatten" 1199 | bottom: "fc7_mbox_conf_perm" 1200 | top: "fc7_mbox_conf_flat" 1201 | flatten_param { 1202 | axis: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "fc7_mbox_priorbox" 1207 | type: "PriorBox" 1208 | bottom: "fc7" 1209 | bottom: "data" 1210 | top: "fc7_mbox_priorbox" 1211 | prior_box_param { 1212 | min_size: 60.0 1213 | max_size: 111.0 1214 | aspect_ratio: 2 1215 | aspect_ratio: 3 1216 | flip: true 1217 | clip: false 1218 | variance: 0.1 1219 | variance: 0.1 1220 | variance: 0.2 1221 | variance: 0.2 1222 | step: 16 1223 | offset: 0.5 1224 | } 1225 | } 1226 | layer { 1227 | name: "conv6_2_mbox_loc" 1228 | type: "Convolution" 1229 | bottom: "conv6_2_h" 1230 | top: "conv6_2_mbox_loc" 1231 | param { 1232 | lr_mult: 1 1233 | decay_mult: 1 1234 | } 1235 | param { 1236 | lr_mult: 2 1237 | decay_mult: 0 1238 | } 1239 | convolution_param { 1240 | num_output: 24 1241 | pad: 1 1242 | kernel_size: 3 1243 | stride: 1 1244 | weight_filler { 1245 | type: "xavier" 1246 | } 1247 | bias_filler { 1248 | type: "constant" 1249 | value: 0 1250 | } 1251 | } 1252 | } 1253 | layer { 1254 | name: "conv6_2_mbox_loc_perm" 1255 | type: "Permute" 1256 | bottom: "conv6_2_mbox_loc" 1257 | top: "conv6_2_mbox_loc_perm" 1258 | permute_param { 1259 | order: 0 1260 | order: 2 1261 | order: 3 1262 | order: 1 1263 | } 1264 | } 1265 | layer { 1266 | name: "conv6_2_mbox_loc_flat" 1267 | type: "Flatten" 1268 | bottom: "conv6_2_mbox_loc_perm" 1269 | top: "conv6_2_mbox_loc_flat" 1270 | flatten_param { 1271 | axis: 1 1272 | } 1273 | } 1274 | layer { 1275 | name: "conv6_2_mbox_conf" 1276 | type: "Convolution" 1277 | bottom: "conv6_2_h" 1278 | top: "conv6_2_mbox_conf" 1279 | param { 1280 | lr_mult: 1 1281 | decay_mult: 1 1282 | } 1283 | param { 1284 | lr_mult: 2 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 12 # 126 1289 | pad: 1 1290 | kernel_size: 3 1291 | stride: 1 1292 | weight_filler { 1293 | type: "xavier" 1294 | } 1295 | bias_filler { 1296 | type: "constant" 1297 | value: 0 1298 | } 1299 | } 1300 | } 1301 | layer { 1302 | name: "conv6_2_mbox_conf_perm" 1303 | type: "Permute" 1304 | bottom: "conv6_2_mbox_conf" 1305 | top: "conv6_2_mbox_conf_perm" 1306 | permute_param { 1307 | order: 0 1308 | order: 2 1309 | order: 3 1310 | order: 1 1311 | } 1312 | } 1313 | layer { 1314 | name: "conv6_2_mbox_conf_flat" 1315 | type: "Flatten" 1316 | bottom: "conv6_2_mbox_conf_perm" 1317 | top: "conv6_2_mbox_conf_flat" 1318 | flatten_param { 1319 | axis: 1 1320 | } 1321 | } 1322 | layer { 1323 | name: "conv6_2_mbox_priorbox" 1324 | type: "PriorBox" 1325 | bottom: "conv6_2_h" 1326 | bottom: "data" 1327 | top: "conv6_2_mbox_priorbox" 1328 | prior_box_param { 1329 | min_size: 111.0 1330 | max_size: 162.0 1331 | aspect_ratio: 2 1332 | aspect_ratio: 3 1333 | flip: true 1334 | clip: false 1335 | variance: 0.1 1336 | variance: 0.1 1337 | variance: 0.2 1338 | variance: 0.2 1339 | step: 32 1340 | offset: 0.5 1341 | } 1342 | } 1343 | layer { 1344 | name: "conv7_2_mbox_loc" 1345 | type: "Convolution" 1346 | bottom: "conv7_2_h" 1347 | top: "conv7_2_mbox_loc" 1348 | param { 1349 | lr_mult: 1 1350 | decay_mult: 1 1351 | } 1352 | param { 1353 | lr_mult: 2 1354 | decay_mult: 0 1355 | } 1356 | convolution_param { 1357 | num_output: 24 1358 | pad: 1 1359 | kernel_size: 3 1360 | stride: 1 1361 | weight_filler { 1362 | type: "xavier" 1363 | } 1364 | bias_filler { 1365 | type: "constant" 1366 | value: 0 1367 | } 1368 | } 1369 | } 1370 | layer { 1371 | name: "conv7_2_mbox_loc_perm" 1372 | type: "Permute" 1373 | bottom: "conv7_2_mbox_loc" 1374 | top: "conv7_2_mbox_loc_perm" 1375 | permute_param { 1376 | order: 0 1377 | order: 2 1378 | order: 3 1379 | order: 1 1380 | } 1381 | } 1382 | layer { 1383 | name: "conv7_2_mbox_loc_flat" 1384 | type: "Flatten" 1385 | bottom: "conv7_2_mbox_loc_perm" 1386 | top: "conv7_2_mbox_loc_flat" 1387 | flatten_param { 1388 | axis: 1 1389 | } 1390 | } 1391 | layer { 1392 | name: "conv7_2_mbox_conf" 1393 | type: "Convolution" 1394 | bottom: "conv7_2_h" 1395 | top: "conv7_2_mbox_conf" 1396 | param { 1397 | lr_mult: 1 1398 | decay_mult: 1 1399 | } 1400 | param { 1401 | lr_mult: 2 1402 | decay_mult: 0 1403 | } 1404 | convolution_param { 1405 | num_output: 12 # 126 1406 | pad: 1 1407 | kernel_size: 3 1408 | stride: 1 1409 | weight_filler { 1410 | type: "xavier" 1411 | } 1412 | bias_filler { 1413 | type: "constant" 1414 | value: 0 1415 | } 1416 | } 1417 | } 1418 | layer { 1419 | name: "conv7_2_mbox_conf_perm" 1420 | type: "Permute" 1421 | bottom: "conv7_2_mbox_conf" 1422 | top: "conv7_2_mbox_conf_perm" 1423 | permute_param { 1424 | order: 0 1425 | order: 2 1426 | order: 3 1427 | order: 1 1428 | } 1429 | } 1430 | layer { 1431 | name: "conv7_2_mbox_conf_flat" 1432 | type: "Flatten" 1433 | bottom: "conv7_2_mbox_conf_perm" 1434 | top: "conv7_2_mbox_conf_flat" 1435 | flatten_param { 1436 | axis: 1 1437 | } 1438 | } 1439 | layer { 1440 | name: "conv7_2_mbox_priorbox" 1441 | type: "PriorBox" 1442 | bottom: "conv7_2_h" 1443 | bottom: "data" 1444 | top: "conv7_2_mbox_priorbox" 1445 | prior_box_param { 1446 | min_size: 162.0 1447 | max_size: 213.0 1448 | aspect_ratio: 2 1449 | aspect_ratio: 3 1450 | flip: true 1451 | clip: false 1452 | variance: 0.1 1453 | variance: 0.1 1454 | variance: 0.2 1455 | variance: 0.2 1456 | step: 64 1457 | offset: 0.5 1458 | } 1459 | } 1460 | layer { 1461 | name: "conv8_2_mbox_loc" 1462 | type: "Convolution" 1463 | bottom: "conv8_2_h" 1464 | top: "conv8_2_mbox_loc" 1465 | param { 1466 | lr_mult: 1 1467 | decay_mult: 1 1468 | } 1469 | param { 1470 | lr_mult: 2 1471 | decay_mult: 0 1472 | } 1473 | convolution_param { 1474 | num_output: 16 1475 | pad: 1 1476 | kernel_size: 3 1477 | stride: 1 1478 | weight_filler { 1479 | type: "xavier" 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | value: 0 1484 | } 1485 | } 1486 | } 1487 | layer { 1488 | name: "conv8_2_mbox_loc_perm" 1489 | type: "Permute" 1490 | bottom: "conv8_2_mbox_loc" 1491 | top: "conv8_2_mbox_loc_perm" 1492 | permute_param { 1493 | order: 0 1494 | order: 2 1495 | order: 3 1496 | order: 1 1497 | } 1498 | } 1499 | layer { 1500 | name: "conv8_2_mbox_loc_flat" 1501 | type: "Flatten" 1502 | bottom: "conv8_2_mbox_loc_perm" 1503 | top: "conv8_2_mbox_loc_flat" 1504 | flatten_param { 1505 | axis: 1 1506 | } 1507 | } 1508 | layer { 1509 | name: "conv8_2_mbox_conf" 1510 | type: "Convolution" 1511 | bottom: "conv8_2_h" 1512 | top: "conv8_2_mbox_conf" 1513 | param { 1514 | lr_mult: 1 1515 | decay_mult: 1 1516 | } 1517 | param { 1518 | lr_mult: 2 1519 | decay_mult: 0 1520 | } 1521 | convolution_param { 1522 | num_output: 8 # 84 1523 | pad: 1 1524 | kernel_size: 3 1525 | stride: 1 1526 | weight_filler { 1527 | type: "xavier" 1528 | } 1529 | bias_filler { 1530 | type: "constant" 1531 | value: 0 1532 | } 1533 | } 1534 | } 1535 | layer { 1536 | name: "conv8_2_mbox_conf_perm" 1537 | type: "Permute" 1538 | bottom: "conv8_2_mbox_conf" 1539 | top: "conv8_2_mbox_conf_perm" 1540 | permute_param { 1541 | order: 0 1542 | order: 2 1543 | order: 3 1544 | order: 1 1545 | } 1546 | } 1547 | layer { 1548 | name: "conv8_2_mbox_conf_flat" 1549 | type: "Flatten" 1550 | bottom: "conv8_2_mbox_conf_perm" 1551 | top: "conv8_2_mbox_conf_flat" 1552 | flatten_param { 1553 | axis: 1 1554 | } 1555 | } 1556 | layer { 1557 | name: "conv8_2_mbox_priorbox" 1558 | type: "PriorBox" 1559 | bottom: "conv8_2_h" 1560 | bottom: "data" 1561 | top: "conv8_2_mbox_priorbox" 1562 | prior_box_param { 1563 | min_size: 213.0 1564 | max_size: 264.0 1565 | aspect_ratio: 2 1566 | flip: true 1567 | clip: false 1568 | variance: 0.1 1569 | variance: 0.1 1570 | variance: 0.2 1571 | variance: 0.2 1572 | step: 100 1573 | offset: 0.5 1574 | } 1575 | } 1576 | layer { 1577 | name: "conv9_2_mbox_loc" 1578 | type: "Convolution" 1579 | bottom: "conv9_2_h" 1580 | top: "conv9_2_mbox_loc" 1581 | param { 1582 | lr_mult: 1 1583 | decay_mult: 1 1584 | } 1585 | param { 1586 | lr_mult: 2 1587 | decay_mult: 0 1588 | } 1589 | convolution_param { 1590 | num_output: 16 1591 | pad: 1 1592 | kernel_size: 3 1593 | stride: 1 1594 | weight_filler { 1595 | type: "xavier" 1596 | } 1597 | bias_filler { 1598 | type: "constant" 1599 | value: 0 1600 | } 1601 | } 1602 | } 1603 | layer { 1604 | name: "conv9_2_mbox_loc_perm" 1605 | type: "Permute" 1606 | bottom: "conv9_2_mbox_loc" 1607 | top: "conv9_2_mbox_loc_perm" 1608 | permute_param { 1609 | order: 0 1610 | order: 2 1611 | order: 3 1612 | order: 1 1613 | } 1614 | } 1615 | layer { 1616 | name: "conv9_2_mbox_loc_flat" 1617 | type: "Flatten" 1618 | bottom: "conv9_2_mbox_loc_perm" 1619 | top: "conv9_2_mbox_loc_flat" 1620 | flatten_param { 1621 | axis: 1 1622 | } 1623 | } 1624 | layer { 1625 | name: "conv9_2_mbox_conf" 1626 | type: "Convolution" 1627 | bottom: "conv9_2_h" 1628 | top: "conv9_2_mbox_conf" 1629 | param { 1630 | lr_mult: 1 1631 | decay_mult: 1 1632 | } 1633 | param { 1634 | lr_mult: 2 1635 | decay_mult: 0 1636 | } 1637 | convolution_param { 1638 | num_output: 8 # 84 1639 | pad: 1 1640 | kernel_size: 3 1641 | stride: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "conv9_2_mbox_conf_perm" 1653 | type: "Permute" 1654 | bottom: "conv9_2_mbox_conf" 1655 | top: "conv9_2_mbox_conf_perm" 1656 | permute_param { 1657 | order: 0 1658 | order: 2 1659 | order: 3 1660 | order: 1 1661 | } 1662 | } 1663 | layer { 1664 | name: "conv9_2_mbox_conf_flat" 1665 | type: "Flatten" 1666 | bottom: "conv9_2_mbox_conf_perm" 1667 | top: "conv9_2_mbox_conf_flat" 1668 | flatten_param { 1669 | axis: 1 1670 | } 1671 | } 1672 | layer { 1673 | name: "conv9_2_mbox_priorbox" 1674 | type: "PriorBox" 1675 | bottom: "conv9_2_h" 1676 | bottom: "data" 1677 | top: "conv9_2_mbox_priorbox" 1678 | prior_box_param { 1679 | min_size: 264.0 1680 | max_size: 315.0 1681 | aspect_ratio: 2 1682 | flip: true 1683 | clip: false 1684 | variance: 0.1 1685 | variance: 0.1 1686 | variance: 0.2 1687 | variance: 0.2 1688 | step: 300 1689 | offset: 0.5 1690 | } 1691 | } 1692 | layer { 1693 | name: "mbox_loc" 1694 | type: "Concat" 1695 | bottom: "conv4_3_norm_mbox_loc_flat" 1696 | bottom: "fc7_mbox_loc_flat" 1697 | bottom: "conv6_2_mbox_loc_flat" 1698 | bottom: "conv7_2_mbox_loc_flat" 1699 | bottom: "conv8_2_mbox_loc_flat" 1700 | bottom: "conv9_2_mbox_loc_flat" 1701 | top: "mbox_loc" 1702 | concat_param { 1703 | axis: 1 1704 | } 1705 | } 1706 | layer { 1707 | name: "mbox_conf" 1708 | type: "Concat" 1709 | bottom: "conv4_3_norm_mbox_conf_flat" 1710 | bottom: "fc7_mbox_conf_flat" 1711 | bottom: "conv6_2_mbox_conf_flat" 1712 | bottom: "conv7_2_mbox_conf_flat" 1713 | bottom: "conv8_2_mbox_conf_flat" 1714 | bottom: "conv9_2_mbox_conf_flat" 1715 | top: "mbox_conf" 1716 | concat_param { 1717 | axis: 1 1718 | } 1719 | } 1720 | layer { 1721 | name: "mbox_priorbox" 1722 | type: "Concat" 1723 | bottom: "conv4_3_norm_mbox_priorbox" 1724 | bottom: "fc7_mbox_priorbox" 1725 | bottom: "conv6_2_mbox_priorbox" 1726 | bottom: "conv7_2_mbox_priorbox" 1727 | bottom: "conv8_2_mbox_priorbox" 1728 | bottom: "conv9_2_mbox_priorbox" 1729 | top: "mbox_priorbox" 1730 | concat_param { 1731 | axis: 2 1732 | } 1733 | } 1734 | 1735 | layer { 1736 | name: "mbox_conf_reshape" 1737 | type: "Reshape" 1738 | bottom: "mbox_conf" 1739 | top: "mbox_conf_reshape" 1740 | reshape_param { 1741 | shape { 1742 | dim: 0 1743 | dim: -1 1744 | dim: 2 1745 | } 1746 | } 1747 | } 1748 | layer { 1749 | name: "mbox_conf_softmax" 1750 | type: "Softmax" 1751 | bottom: "mbox_conf_reshape" 1752 | top: "mbox_conf_softmax" 1753 | softmax_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | layer { 1758 | name: "mbox_conf_flatten" 1759 | type: "Flatten" 1760 | bottom: "mbox_conf_softmax" 1761 | top: "mbox_conf_flatten" 1762 | flatten_param { 1763 | axis: 1 1764 | } 1765 | } 1766 | 1767 | layer { 1768 | name: "detection_out" 1769 | type: "DetectionOutput" 1770 | bottom: "mbox_loc" 1771 | bottom: "mbox_conf_flatten" 1772 | bottom: "mbox_priorbox" 1773 | top: "detection_out" 1774 | include { 1775 | phase: TEST 1776 | } 1777 | detection_output_param { 1778 | num_classes: 2 1779 | share_location: true 1780 | background_label_id: 0 1781 | nms_param { 1782 | nms_threshold: 0.45 1783 | top_k: 400 1784 | } 1785 | code_type: CENTER_SIZE 1786 | keep_top_k: 200 1787 | confidence_threshold: 0.01 1788 | } 1789 | } 1790 | -------------------------------------------------------------------------------- /FaceDetectionOpenCV/face_detection_ssd_parallel.py: -------------------------------------------------------------------------------- 1 | # importing required libaries 2 | import cv2 3 | import numpy as np 4 | from imutils.video import WebcamVideoStream, FPS # pip install imutils (if imutils library not already installed) 5 | 6 | # defining parameters and helper functions for performing face detection 7 | model_config_filepath = 'deploy.prototxt.txt' 8 | model_weights_filepath = 'res10_300x300_ssd_iter_140000.caffemodel' 9 | confidence_thresh = 0.9 # threshold for filtering weak detections 10 | 11 | # loading inference model using cv2's dnn module 12 | model = cv2.dnn.readNet(model=model_weights_filepath, config=model_config_filepath) 13 | 14 | # defining function for detecting faces in a single input image/video frame 15 | def detect_faces(frame): 16 | # original frame resolution 17 | orig_h, orig_w = frame.shape[:2] 18 | 19 | # preprocessing input frame 20 | h , w = 300 , 300 # required height and width after resizing 21 | resized_frame = cv2.resize(frame, (w,h) ) # performing resizing 22 | # performing mean subtraction and reshaping to a blob/image of shape 1x3x300x300 23 | blob = cv2.dnn.blobFromImage(resized_frame, scalefactor=1, size=(w,h), mean=(104, 177, 123)) # mean values are in BGR ordering 24 | 25 | # performing inference 26 | model.setInput(blob) 27 | detections = model.forward() # returned detections are of shape (1,1,num_detections,7). 28 | 29 | # looping over all detections and annotating input frame with high confidence detections 30 | for i in range(detections.shape[2]) : 31 | confidence = detections[0,0,i,2] # index 2 stores the confidence/probability of the detection 32 | if confidence < confidence_thresh : 33 | continue 34 | 35 | # indices 3,4,5,6 store the bounding box coordinates in order [xmin, ymin, xmax, ymax] with values in the range 0-1 36 | bbox = detections[0,0,i,3:7] * np.array([orig_w, orig_h, orig_w, orig_h]) # scaling bounding box coordinates back to original frame dimensions 37 | bbox = bbox.astype(np.int) # type casting and rounding to int type 38 | cv2.rectangle(frame, (bbox[0], bbox[1]) , (bbox[2], bbox[3]) , (0,0,255) , 2) # drawing rectangular bounding boxes around detections 39 | 40 | return frame 41 | 42 | # setting up input video stream for reading from webcam 43 | webcam_stream = WebcamVideoStream(0) # opening video stream from primary camera 44 | webcam_stream.start() 45 | fps = FPS() # for computing frames processed per second 46 | 47 | # processing video frames 48 | fps.start() 49 | while True : 50 | # reading next frame from input stream 51 | frame = webcam_stream.read() 52 | fps.update() 53 | 54 | # detecting faces in the read frame 55 | frame_with_detections = detect_faces(frame) 56 | 57 | # displaying the frame 58 | cv2.imshow('Detected Faces', frame_with_detections) 59 | key_pressed = cv2.waitKey(1) # a 1 millisecond delay 60 | if key_pressed == ord('q'): 61 | break 62 | fps.stop() 63 | 64 | # closing open streams, etc 65 | webcam_stream.stop() 66 | cv2.destroyAllWindows() 67 | 68 | # printing stats - fps 69 | print("FPS:{}".format(fps.fps())) 70 | -------------------------------------------------------------------------------- /FaceDetectionOpenCV/res10_300x300_ssd_iter_140000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/FaceDetectionOpenCV/res10_300x300_ssd_iter_140000.caffemodel -------------------------------------------------------------------------------- /MultiThreadedVideoProcessing/Readme.md: -------------------------------------------------------------------------------- 1 | # Faster Real-Time Video Processing using Multi-Threading in Python 2 | 1. Two python scripts are included for processing video frames from a webcam connected to a laptop or desktop. One script uses a non-threaded implementation and the second script uses a threaded implementation. 3 | 2. Multi-threaded implementation can help achieve a higher FPS 4 | 3. Delay variable in the code can be used for simulating time taken for performing some video processing task like running a deep learning model for face detection, etc. Different amounts of delay can be used to evaluate performance. 5 | 4. Link to medium blog post with more details 6 | 7 | ## Programming language and libraries used 8 | 1. Python programming language 9 | 2. OpenCV library 10 | 3. Other Python libraries including threading library 11 | 12 | -------------------------------------------------------------------------------- /MultiThreadedVideoProcessing/video_processing_parallel.py: -------------------------------------------------------------------------------- 1 | # importing required libraries 2 | import cv2 3 | import time 4 | from threading import Thread # library for implementing multi-threaded processing 5 | 6 | # defining a helper class for implementing multi-threaded processing 7 | class WebcamStream : 8 | def __init__(self, stream_id=0): 9 | self.stream_id = stream_id # default is 0 for primary camera 10 | 11 | # opening video capture stream 12 | self.vcap = cv2.VideoCapture(self.stream_id) 13 | if self.vcap.isOpened() is False : 14 | print("[Exiting]: Error accessing webcam stream.") 15 | exit(0) 16 | fps_input_stream = int(self.vcap.get(5)) 17 | print("FPS of webcam hardware/input stream: {}".format(fps_input_stream)) 18 | 19 | # reading a single frame from vcap stream for initializing 20 | self.grabbed , self.frame = self.vcap.read() 21 | if self.grabbed is False : 22 | print('[Exiting] No more frames to read') 23 | exit(0) 24 | 25 | # self.stopped is set to False when frames are being read from self.vcap stream 26 | self.stopped = True 27 | 28 | # reference to the thread for reading next available frame from input stream 29 | self.t = Thread(target=self.update, args=()) 30 | self.t.daemon = True # daemon threads keep running in the background while the program is executing 31 | 32 | # method for starting the thread for grabbing next available frame in input stream 33 | def start(self): 34 | self.stopped = False 35 | self.t.start() 36 | 37 | # method for reading next frame 38 | def update(self): 39 | while True : 40 | if self.stopped is True : 41 | break 42 | self.grabbed , self.frame = self.vcap.read() 43 | if self.grabbed is False : 44 | print('[Exiting] No more frames to read') 45 | self.stopped = True 46 | break 47 | self.vcap.release() 48 | 49 | # method for returning latest read frame 50 | def read(self): 51 | return self.frame 52 | 53 | # method called to stop reading frames 54 | def stop(self): 55 | self.stopped = True 56 | 57 | 58 | # initializing and starting multi-threaded webcam capture input stream 59 | webcam_stream = WebcamStream(stream_id=0) # stream_id = 0 is for primary camera 60 | webcam_stream.start() 61 | 62 | # processing frames in input stream 63 | num_frames_processed = 0 64 | start = time.time() 65 | while True : 66 | if webcam_stream.stopped is True : 67 | break 68 | else : 69 | frame = webcam_stream.read() 70 | 71 | # adding a delay for simulating time taken for processing a frame 72 | delay = 0.03 # delay value in seconds. so, delay=1 is equivalent to 1 second 73 | time.sleep(delay) 74 | num_frames_processed += 1 75 | 76 | cv2.imshow('frame' , frame) 77 | key = cv2.waitKey(1) 78 | if key == ord('q'): 79 | break 80 | end = time.time() 81 | webcam_stream.stop() # stop the webcam stream 82 | 83 | # printing time elapsed and fps 84 | elapsed = end-start 85 | fps = num_frames_processed/elapsed 86 | print("FPS: {} , Elapsed Time: {} , Frames Processed: {}".format(fps, elapsed, num_frames_processed)) 87 | 88 | # closing all windows 89 | cv2.destroyAllWindows() 90 | -------------------------------------------------------------------------------- /MultiThreadedVideoProcessing/video_processing_simple.py: -------------------------------------------------------------------------------- 1 | # importing required libraries 2 | import cv2 3 | import time 4 | 5 | # opening video capture stream 6 | vcap = cv2.VideoCapture(0) 7 | if vcap.isOpened() is False : 8 | print("[Exiting]: Error accessing webcam stream.") 9 | exit(0) 10 | fps_input_stream = int(vcap.get(5)) 11 | print("FPS of webcam hardware/input stream: {}".format(fps_input_stream)) 12 | grabbed, frame = vcap.read() # reading single frame for initialization/ hardware warm-up 13 | 14 | # processing frames in input stream 15 | num_frames_processed = 0 16 | start = time.time() 17 | while True : 18 | grabbed, frame = vcap.read() 19 | if grabbed is False : 20 | print('[Exiting] No more frames to read') 21 | break 22 | 23 | # adding a delay for simulating time taken for processing a frame 24 | delay = 0.03 # delay value in seconds. so, delay=1 is equivalent to 1 second 25 | time.sleep(delay) 26 | num_frames_processed += 1 27 | 28 | cv2.imshow('frame' , frame) 29 | key = cv2.waitKey(1) 30 | if key == ord('q'): 31 | break 32 | end = time.time() 33 | 34 | # printing time elapsed and fps 35 | elapsed = end-start 36 | fps = num_frames_processed/elapsed 37 | print("FPS: {} , Elapsed Time: {} , Frames Processed: {}".format(fps, elapsed, num_frames_processed)) 38 | 39 | # releasing input stream , closing all windows 40 | vcap.release() 41 | cv2.destroyAllWindows() 42 | -------------------------------------------------------------------------------- /NeuralStyleTransfer/README.md: -------------------------------------------------------------------------------- 1 | # Neural Style Transfer using TensorFlow 2 | Neural Style Transfer is a technique for generating new artistic images from existing content and style images. This project uses an unsupervised deep learning algorithm for performing neural style transfer. 3 | 4 | ![Generated Style Transferred Image](neural_style_transfer.png) 5 | 6 | ## Programming Language and Libraries used 7 | 1. Python 3.x 8 | 2. Tensorflow 2.x and Keras API 9 | 3. Other python libraries like Numpy, OpenCV 10 | 11 | ## Notes 12 | 1. Link to Medium Article presenting the main ideas and implementation details of Neural Style Transfer using TensorFlow 13 | 14 | -------------------------------------------------------------------------------- /NeuralStyleTransfer/content.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/NeuralStyleTransfer/content.jpg -------------------------------------------------------------------------------- /NeuralStyleTransfer/neural_style_transfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/NeuralStyleTransfer/neural_style_transfer.png -------------------------------------------------------------------------------- /NeuralStyleTransfer/style_mosaic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/NeuralStyleTransfer/style_mosaic.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Computer Vision and Deep Learning Projects 2 | This repo contains code for some of my computer vision and deep learning projects. 3 | 4 | ## List of Projects 5 | | Project | 6 | | ------------- | 7 | | [Image Classifier for The Street View House Numbers (SVHN) Dataset](StreetViewHouseNumbers_Classifier/) | 8 | | [A Simple CNN Image Classifier for Cifar-10 dataset](CIFAR10_Image_Classifier/) | 9 | | [Convolutional Neural Network Visualizer](ConvolutionalNeuralNetworkVisualizer/) | 10 | | [Transfer Learning example using ResNet-50 architecture](TransferLearningResnet/) | 11 | | [Neural Style Transfer using TensorFlow](NeuralStyleTransfer/) [[ Blog ]](https://medium.com/@vasu.gupta9/neural-style-transfer-using-tensorflow-7e0f3e789e0c) | 12 | | [Video Activity Recognition using Pretrained 3D ResNet model](VideoActivityRecognition3DResnet/) [[ Blog ]](https://gvasu.medium.com/recognizing-400-different-activities-in-videos-using-python-and-opencv-ee59cc6d61f6) | 13 | | [Faster Real-Time Video Processing using Multi-Threading in Python](MultiThreadedVideoProcessing/) [[ Blog ]](https://gvasu.medium.com/faster-real-time-video-processing-using-multi-threading-in-python-8902589e1055) | 14 | | [Real-Time Face Detection Using OpenCV](FaceDetectionOpenCV/) -------------------------------------------------------------------------------- /StreetViewHouseNumbers_Classifier/Readme.md: -------------------------------------------------------------------------------- 1 | # This project is an Image Classifier for the Street View House Numbers (SVHN) Dataset obtained from house numbers in Google Street View images. 2 | 3 | ![SVHN overview image](SVHN_Overview_Image.png) 4 | [*Image Source*](http://ufldl.stanford.edu/housenumbers/) 5 | 6 | ## Project folder includes 7 | 1. Readme.md (*this file*) 8 | 2. Google colab notebook for building, training and testing a Convolutional Neural Network on the SVHN dataset. 9 | 3. Misc 10 | * SVHN Overview Image 11 | 12 | ## Libraries used 13 | 1. Python programming language is used 14 | 2. Deep learning libraries Tensorflow 2.x and Keras API 15 | 3. Other python libraries including numpy, etc 16 | 17 | ## Notes 18 | 1. The SVHN dataset has 2 formats. This project uses Format 2: Cropped Digits 19 | -------------------------------------------------------------------------------- /StreetViewHouseNumbers_Classifier/SVHN_Overview_Image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/StreetViewHouseNumbers_Classifier/SVHN_Overview_Image.png -------------------------------------------------------------------------------- /TransferLearningResnet/Readme.md: -------------------------------------------------------------------------------- 1 | # Purpose of this project is to demonstrate how to build and train a Deep Neural Network using Transfer Learning 2 | 1. Dataset used is Cifar-10 which contains images from 10 different classes (https://www.cs.toronto.edu/~kriz/cifar.html) 3 | 2. Architecture used is ResNet-50 (https://keras.io/api/applications/) 4 | 5 | ![Example images from cifar-10 dataset](cifar10_img.png) 6 | [*Image Source*](https://www.cs.toronto.edu/~kriz/cifar.html) 7 | 8 | ## Project folder includes 9 | 1. Readme.md (this file) 10 | 2. Google colab notebook for building, training and testing a ResNet50 architecture using transfer learning on the CIFAR-10 dataset. 11 | 3. Misc 12 | - CIFAR-10 overview image 13 | 14 | ## Programming language and Libraries used 15 | 1. Python programming language 16 | 2. Deep learning libraries tensorflow 2.x and Keras API 17 | 3. Python libraries including numpy, etc 18 | -------------------------------------------------------------------------------- /TransferLearningResnet/Transfer_Learning_ResNet_ImageClassifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Transfer_Learning_ResNet_ImageClassifier.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true, 10 | "authorship_tag": "ABX9TyMgQ4Qi0SYj/dBzG221AxvF", 11 | "include_colab_link": true 12 | }, 13 | "kernelspec": { 14 | "name": "python3", 15 | "display_name": "Python 3" 16 | }, 17 | "language_info": { 18 | "name": "python" 19 | }, 20 | "accelerator": "GPU" 21 | }, 22 | "cells": [ 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "view-in-github", 27 | "colab_type": "text" 28 | }, 29 | "source": [ 30 | "\"Open" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": { 36 | "id": "ZdvsU9L3T_8P" 37 | }, 38 | "source": [ 39 | "# Purpose of this notebook is to show an example of training a Deep Neural Network using Transfer Learning\n", 40 | "1. Dataset used is Cifar-10 which contains images from 10 different classes (https://www.cs.toronto.edu/~kriz/cifar.html)\n", 41 | "2. Architecture used is ResNet-50 (https://keras.io/api/applications/)\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "metadata": { 47 | "id": "5puSqI9GSmk2" 48 | }, 49 | "source": [ 50 | "# importing required libraries\n", 51 | "import numpy as np \n", 52 | "import cv2\n", 53 | "import matplotlib.pyplot as plt \n", 54 | "import tensorflow as tf \n", 55 | "from tensorflow.keras.models import Model \n", 56 | "from tensorflow.keras.layers import Dense , Input\n", 57 | "from tensorflow.keras.datasets import cifar10\n", 58 | "from tensorflow.keras.applications import ResNet50\n", 59 | "from tensorflow.keras.applications.resnet import preprocess_input as resnet_preprocess_input\n" 60 | ], 61 | "execution_count": 5, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "metadata": { 67 | "colab": { 68 | "base_uri": "https://localhost:8080/" 69 | }, 70 | "id": "oKNeyDZ9VrKj", 71 | "outputId": "dd9cfa2e-d9e1-437f-ea17-695d7c39356c" 72 | }, 73 | "source": [ 74 | "# loading cifar-10 dataset \n", 75 | "(x_train, y_train) , (x_test, y_test) = cifar10.load_data()\n", 76 | "\n", 77 | "# reshaping y_train, y_test to 1D arrays (since later using loss function as sparse categorical crossentropy)\n", 78 | "y_train = y_train.reshape(-1)\n", 79 | "y_test = y_test.reshape(-1)\n", 80 | "\n", 81 | "label_names = ['airplane','automobile','bird','cat', 'deer' , 'dog', 'frog', 'horse', 'ship', 'truck' ] # reference - https://www.cs.toronto.edu/~kriz/cifar.html" 82 | ], 83 | "execution_count": 6, 84 | "outputs": [ 85 | { 86 | "output_type": "stream", 87 | "text": [ 88 | "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", 89 | "170500096/170498071 [==============================] - 11s 0us/step\n" 90 | ], 91 | "name": "stdout" 92 | } 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "colab": { 99 | "base_uri": "https://localhost:8080/", 100 | "height": 366 101 | }, 102 | "id": "pteamvelXP3s", 103 | "outputId": "6d520fec-39df-4f7d-cf62-b26ecbc9b90e" 104 | }, 105 | "source": [ 106 | "# printing useful dataset related information \n", 107 | "\n", 108 | "# printing dataset shapes\n", 109 | "print(\"x_train:{} , y_train:{} , x_test:{} , y_test:{} \".format(x_train.shape, y_train.shape, x_test.shape, y_test.shape ))\n", 110 | "\n", 111 | "# printing range of values in x_train, x_test and unique values in y_train, y_test\n", 112 | "print(\"x_train values are in range:{}-{}\".format(np.min(x_train), np.max(x_train)))\n", 113 | "print(\"x_test values are in range:{}-{}\".format(np.min(x_test), np.max(x_test)))\n", 114 | "print(\"unique values in y_train:{}\".format(np.unique(y_train)))\n", 115 | "print(\"unique values in y_test:{}\".format(np.unique(y_test)))\n", 116 | "\n", 117 | "# visualizing sample image \n", 118 | "idx = 0\n", 119 | "img = x_train[idx]\n", 120 | "label_id = y_train[idx]\n", 121 | "plt.title( \"image label {}:{}\".format( label_names[label_id] , label_id ) )\n", 122 | "plt.imshow(img)\n", 123 | "plt.show()" 124 | ], 125 | "execution_count": 7, 126 | "outputs": [ 127 | { 128 | "output_type": "stream", 129 | "text": [ 130 | "x_train:(50000, 32, 32, 3) , y_train:(50000,) , x_test:(10000, 32, 32, 3) , y_test:(10000,) \n", 131 | "x_train values are in range:0-255\n", 132 | "x_test values are in range:0-255\n", 133 | "unique values in y_train:[0 1 2 3 4 5 6 7 8 9]\n", 134 | "unique values in y_test:[0 1 2 3 4 5 6 7 8 9]\n" 135 | ], 136 | "name": "stdout" 137 | }, 138 | { 139 | "output_type": "display_data", 140 | "data": { 141 | "image/png": "\n", 142 | "text/plain": [ 143 | "
" 144 | ] 145 | }, 146 | "metadata": { 147 | "tags": [], 148 | "needs_background": "light" 149 | } 150 | } 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "colab": { 157 | "base_uri": "https://localhost:8080/" 158 | }, 159 | "id": "b5C73dgyZ0zG", 160 | "outputId": "c8a82177-b95e-4c63-88ba-084e5118e9e1" 161 | }, 162 | "source": [ 163 | "# pre-processing the image for classification using resnet model (expects a specific kind of input preprocessing for resnet)\n", 164 | "x_train_pp = x_train.astype('float32')\n", 165 | "x_train_pp = resnet_preprocess_input(x_train_pp)\n", 166 | "x_test_pp = x_test.astype('float32')\n", 167 | "x_test_pp = resnet_preprocess_input(x_test_pp)\n", 168 | "\n", 169 | "# printing range of values in x_train_pp, x_test_pp\n", 170 | "print(\"x_train_pp values are in range:{} to {}\".format(np.min(x_train_pp), np.max(x_train_pp)))\n", 171 | "print(\"x_test_pp values are in range:{} to {}\".format(np.min(x_test_pp), np.max(x_test_pp)))" 172 | ], 173 | "execution_count": 8, 174 | "outputs": [ 175 | { 176 | "output_type": "stream", 177 | "text": [ 178 | "x_train_pp values are in range:-123.68000030517578 to 151.06100463867188\n", 179 | "x_test_pp values are in range:-123.68000030517578 to 151.06100463867188\n" 180 | ], 181 | "name": "stdout" 182 | } 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "metadata": { 188 | "colab": { 189 | "base_uri": "https://localhost:8080/" 190 | }, 191 | "id": "W0GZNNXocHaM", 192 | "outputId": "de3fc1e4-5b6f-4e8a-c3b5-afe6f9b14503" 193 | }, 194 | "source": [ 195 | "# loading the resnet model \n", 196 | "# 1. top layer (global avg pooling + output dense layer) in not loaded\n", 197 | "# 2. weights pre-trained on imagenet are used \n", 198 | "# 3. input image shape is (32,32,3)\n", 199 | "# 4. global average pooling is added at the top \n", 200 | "model_resnet = ResNet50(include_top=False, weights='imagenet', input_shape=(32,32,3) , pooling='avg')\n", 201 | "model_resnet.summary()" 202 | ], 203 | "execution_count": 9, 204 | "outputs": [ 205 | { 206 | "output_type": "stream", 207 | "text": [ 208 | "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5\n", 209 | "94773248/94765736 [==============================] - 1s 0us/step\n", 210 | "Model: \"resnet50\"\n", 211 | "__________________________________________________________________________________________________\n", 212 | "Layer (type) Output Shape Param # Connected to \n", 213 | "==================================================================================================\n", 214 | "input_1 (InputLayer) [(None, 32, 32, 3)] 0 \n", 215 | "__________________________________________________________________________________________________\n", 216 | "conv1_pad (ZeroPadding2D) (None, 38, 38, 3) 0 input_1[0][0] \n", 217 | "__________________________________________________________________________________________________\n", 218 | "conv1_conv (Conv2D) (None, 16, 16, 64) 9472 conv1_pad[0][0] \n", 219 | "__________________________________________________________________________________________________\n", 220 | "conv1_bn (BatchNormalization) (None, 16, 16, 64) 256 conv1_conv[0][0] \n", 221 | "__________________________________________________________________________________________________\n", 222 | "conv1_relu (Activation) (None, 16, 16, 64) 0 conv1_bn[0][0] \n", 223 | "__________________________________________________________________________________________________\n", 224 | "pool1_pad (ZeroPadding2D) (None, 18, 18, 64) 0 conv1_relu[0][0] \n", 225 | "__________________________________________________________________________________________________\n", 226 | "pool1_pool (MaxPooling2D) (None, 8, 8, 64) 0 pool1_pad[0][0] \n", 227 | "__________________________________________________________________________________________________\n", 228 | "conv2_block1_1_conv (Conv2D) (None, 8, 8, 64) 4160 pool1_pool[0][0] \n", 229 | "__________________________________________________________________________________________________\n", 230 | "conv2_block1_1_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block1_1_conv[0][0] \n", 231 | "__________________________________________________________________________________________________\n", 232 | "conv2_block1_1_relu (Activation (None, 8, 8, 64) 0 conv2_block1_1_bn[0][0] \n", 233 | "__________________________________________________________________________________________________\n", 234 | "conv2_block1_2_conv (Conv2D) (None, 8, 8, 64) 36928 conv2_block1_1_relu[0][0] \n", 235 | "__________________________________________________________________________________________________\n", 236 | "conv2_block1_2_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block1_2_conv[0][0] \n", 237 | "__________________________________________________________________________________________________\n", 238 | "conv2_block1_2_relu (Activation (None, 8, 8, 64) 0 conv2_block1_2_bn[0][0] \n", 239 | "__________________________________________________________________________________________________\n", 240 | "conv2_block1_0_conv (Conv2D) (None, 8, 8, 256) 16640 pool1_pool[0][0] \n", 241 | "__________________________________________________________________________________________________\n", 242 | "conv2_block1_3_conv (Conv2D) (None, 8, 8, 256) 16640 conv2_block1_2_relu[0][0] \n", 243 | "__________________________________________________________________________________________________\n", 244 | "conv2_block1_0_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block1_0_conv[0][0] \n", 245 | "__________________________________________________________________________________________________\n", 246 | "conv2_block1_3_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block1_3_conv[0][0] \n", 247 | "__________________________________________________________________________________________________\n", 248 | "conv2_block1_add (Add) (None, 8, 8, 256) 0 conv2_block1_0_bn[0][0] \n", 249 | " conv2_block1_3_bn[0][0] \n", 250 | "__________________________________________________________________________________________________\n", 251 | "conv2_block1_out (Activation) (None, 8, 8, 256) 0 conv2_block1_add[0][0] \n", 252 | "__________________________________________________________________________________________________\n", 253 | "conv2_block2_1_conv (Conv2D) (None, 8, 8, 64) 16448 conv2_block1_out[0][0] \n", 254 | "__________________________________________________________________________________________________\n", 255 | "conv2_block2_1_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block2_1_conv[0][0] \n", 256 | "__________________________________________________________________________________________________\n", 257 | "conv2_block2_1_relu (Activation (None, 8, 8, 64) 0 conv2_block2_1_bn[0][0] \n", 258 | "__________________________________________________________________________________________________\n", 259 | "conv2_block2_2_conv (Conv2D) (None, 8, 8, 64) 36928 conv2_block2_1_relu[0][0] \n", 260 | "__________________________________________________________________________________________________\n", 261 | "conv2_block2_2_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block2_2_conv[0][0] \n", 262 | "__________________________________________________________________________________________________\n", 263 | "conv2_block2_2_relu (Activation (None, 8, 8, 64) 0 conv2_block2_2_bn[0][0] \n", 264 | "__________________________________________________________________________________________________\n", 265 | "conv2_block2_3_conv (Conv2D) (None, 8, 8, 256) 16640 conv2_block2_2_relu[0][0] \n", 266 | "__________________________________________________________________________________________________\n", 267 | "conv2_block2_3_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block2_3_conv[0][0] \n", 268 | "__________________________________________________________________________________________________\n", 269 | "conv2_block2_add (Add) (None, 8, 8, 256) 0 conv2_block1_out[0][0] \n", 270 | " conv2_block2_3_bn[0][0] \n", 271 | "__________________________________________________________________________________________________\n", 272 | "conv2_block2_out (Activation) (None, 8, 8, 256) 0 conv2_block2_add[0][0] \n", 273 | "__________________________________________________________________________________________________\n", 274 | "conv2_block3_1_conv (Conv2D) (None, 8, 8, 64) 16448 conv2_block2_out[0][0] \n", 275 | "__________________________________________________________________________________________________\n", 276 | "conv2_block3_1_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block3_1_conv[0][0] \n", 277 | "__________________________________________________________________________________________________\n", 278 | "conv2_block3_1_relu (Activation (None, 8, 8, 64) 0 conv2_block3_1_bn[0][0] \n", 279 | "__________________________________________________________________________________________________\n", 280 | "conv2_block3_2_conv (Conv2D) (None, 8, 8, 64) 36928 conv2_block3_1_relu[0][0] \n", 281 | "__________________________________________________________________________________________________\n", 282 | "conv2_block3_2_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block3_2_conv[0][0] \n", 283 | "__________________________________________________________________________________________________\n", 284 | "conv2_block3_2_relu (Activation (None, 8, 8, 64) 0 conv2_block3_2_bn[0][0] \n", 285 | "__________________________________________________________________________________________________\n", 286 | "conv2_block3_3_conv (Conv2D) (None, 8, 8, 256) 16640 conv2_block3_2_relu[0][0] \n", 287 | "__________________________________________________________________________________________________\n", 288 | "conv2_block3_3_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block3_3_conv[0][0] \n", 289 | "__________________________________________________________________________________________________\n", 290 | "conv2_block3_add (Add) (None, 8, 8, 256) 0 conv2_block2_out[0][0] \n", 291 | " conv2_block3_3_bn[0][0] \n", 292 | "__________________________________________________________________________________________________\n", 293 | "conv2_block3_out (Activation) (None, 8, 8, 256) 0 conv2_block3_add[0][0] \n", 294 | "__________________________________________________________________________________________________\n", 295 | "conv3_block1_1_conv (Conv2D) (None, 4, 4, 128) 32896 conv2_block3_out[0][0] \n", 296 | "__________________________________________________________________________________________________\n", 297 | "conv3_block1_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block1_1_conv[0][0] \n", 298 | "__________________________________________________________________________________________________\n", 299 | "conv3_block1_1_relu (Activation (None, 4, 4, 128) 0 conv3_block1_1_bn[0][0] \n", 300 | "__________________________________________________________________________________________________\n", 301 | "conv3_block1_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block1_1_relu[0][0] \n", 302 | "__________________________________________________________________________________________________\n", 303 | "conv3_block1_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block1_2_conv[0][0] \n", 304 | "__________________________________________________________________________________________________\n", 305 | "conv3_block1_2_relu (Activation (None, 4, 4, 128) 0 conv3_block1_2_bn[0][0] \n", 306 | "__________________________________________________________________________________________________\n", 307 | "conv3_block1_0_conv (Conv2D) (None, 4, 4, 512) 131584 conv2_block3_out[0][0] \n", 308 | "__________________________________________________________________________________________________\n", 309 | "conv3_block1_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block1_2_relu[0][0] \n", 310 | "__________________________________________________________________________________________________\n", 311 | "conv3_block1_0_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block1_0_conv[0][0] \n", 312 | "__________________________________________________________________________________________________\n", 313 | "conv3_block1_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block1_3_conv[0][0] \n", 314 | "__________________________________________________________________________________________________\n", 315 | "conv3_block1_add (Add) (None, 4, 4, 512) 0 conv3_block1_0_bn[0][0] \n", 316 | " conv3_block1_3_bn[0][0] \n", 317 | "__________________________________________________________________________________________________\n", 318 | "conv3_block1_out (Activation) (None, 4, 4, 512) 0 conv3_block1_add[0][0] \n", 319 | "__________________________________________________________________________________________________\n", 320 | "conv3_block2_1_conv (Conv2D) (None, 4, 4, 128) 65664 conv3_block1_out[0][0] \n", 321 | "__________________________________________________________________________________________________\n", 322 | "conv3_block2_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block2_1_conv[0][0] \n", 323 | "__________________________________________________________________________________________________\n", 324 | "conv3_block2_1_relu (Activation (None, 4, 4, 128) 0 conv3_block2_1_bn[0][0] \n", 325 | "__________________________________________________________________________________________________\n", 326 | "conv3_block2_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block2_1_relu[0][0] \n", 327 | "__________________________________________________________________________________________________\n", 328 | "conv3_block2_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block2_2_conv[0][0] \n", 329 | "__________________________________________________________________________________________________\n", 330 | "conv3_block2_2_relu (Activation (None, 4, 4, 128) 0 conv3_block2_2_bn[0][0] \n", 331 | "__________________________________________________________________________________________________\n", 332 | "conv3_block2_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block2_2_relu[0][0] \n", 333 | "__________________________________________________________________________________________________\n", 334 | "conv3_block2_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block2_3_conv[0][0] \n", 335 | "__________________________________________________________________________________________________\n", 336 | "conv3_block2_add (Add) (None, 4, 4, 512) 0 conv3_block1_out[0][0] \n", 337 | " conv3_block2_3_bn[0][0] \n", 338 | "__________________________________________________________________________________________________\n", 339 | "conv3_block2_out (Activation) (None, 4, 4, 512) 0 conv3_block2_add[0][0] \n", 340 | "__________________________________________________________________________________________________\n", 341 | "conv3_block3_1_conv (Conv2D) (None, 4, 4, 128) 65664 conv3_block2_out[0][0] \n", 342 | "__________________________________________________________________________________________________\n", 343 | "conv3_block3_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block3_1_conv[0][0] \n", 344 | "__________________________________________________________________________________________________\n", 345 | "conv3_block3_1_relu (Activation (None, 4, 4, 128) 0 conv3_block3_1_bn[0][0] \n", 346 | "__________________________________________________________________________________________________\n", 347 | "conv3_block3_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block3_1_relu[0][0] \n", 348 | "__________________________________________________________________________________________________\n", 349 | "conv3_block3_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block3_2_conv[0][0] \n", 350 | "__________________________________________________________________________________________________\n", 351 | "conv3_block3_2_relu (Activation (None, 4, 4, 128) 0 conv3_block3_2_bn[0][0] \n", 352 | "__________________________________________________________________________________________________\n", 353 | "conv3_block3_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block3_2_relu[0][0] \n", 354 | "__________________________________________________________________________________________________\n", 355 | "conv3_block3_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block3_3_conv[0][0] \n", 356 | "__________________________________________________________________________________________________\n", 357 | "conv3_block3_add (Add) (None, 4, 4, 512) 0 conv3_block2_out[0][0] \n", 358 | " conv3_block3_3_bn[0][0] \n", 359 | "__________________________________________________________________________________________________\n", 360 | "conv3_block3_out (Activation) (None, 4, 4, 512) 0 conv3_block3_add[0][0] \n", 361 | "__________________________________________________________________________________________________\n", 362 | "conv3_block4_1_conv (Conv2D) (None, 4, 4, 128) 65664 conv3_block3_out[0][0] \n", 363 | "__________________________________________________________________________________________________\n", 364 | "conv3_block4_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block4_1_conv[0][0] \n", 365 | "__________________________________________________________________________________________________\n", 366 | "conv3_block4_1_relu (Activation (None, 4, 4, 128) 0 conv3_block4_1_bn[0][0] \n", 367 | "__________________________________________________________________________________________________\n", 368 | "conv3_block4_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block4_1_relu[0][0] \n", 369 | "__________________________________________________________________________________________________\n", 370 | "conv3_block4_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block4_2_conv[0][0] \n", 371 | "__________________________________________________________________________________________________\n", 372 | "conv3_block4_2_relu (Activation (None, 4, 4, 128) 0 conv3_block4_2_bn[0][0] \n", 373 | "__________________________________________________________________________________________________\n", 374 | "conv3_block4_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block4_2_relu[0][0] \n", 375 | "__________________________________________________________________________________________________\n", 376 | "conv3_block4_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block4_3_conv[0][0] \n", 377 | "__________________________________________________________________________________________________\n", 378 | "conv3_block4_add (Add) (None, 4, 4, 512) 0 conv3_block3_out[0][0] \n", 379 | " conv3_block4_3_bn[0][0] \n", 380 | "__________________________________________________________________________________________________\n", 381 | "conv3_block4_out (Activation) (None, 4, 4, 512) 0 conv3_block4_add[0][0] \n", 382 | "__________________________________________________________________________________________________\n", 383 | "conv4_block1_1_conv (Conv2D) (None, 2, 2, 256) 131328 conv3_block4_out[0][0] \n", 384 | "__________________________________________________________________________________________________\n", 385 | "conv4_block1_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block1_1_conv[0][0] \n", 386 | "__________________________________________________________________________________________________\n", 387 | "conv4_block1_1_relu (Activation (None, 2, 2, 256) 0 conv4_block1_1_bn[0][0] \n", 388 | "__________________________________________________________________________________________________\n", 389 | "conv4_block1_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block1_1_relu[0][0] \n", 390 | "__________________________________________________________________________________________________\n", 391 | "conv4_block1_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block1_2_conv[0][0] \n", 392 | "__________________________________________________________________________________________________\n", 393 | "conv4_block1_2_relu (Activation (None, 2, 2, 256) 0 conv4_block1_2_bn[0][0] \n", 394 | "__________________________________________________________________________________________________\n", 395 | "conv4_block1_0_conv (Conv2D) (None, 2, 2, 1024) 525312 conv3_block4_out[0][0] \n", 396 | "__________________________________________________________________________________________________\n", 397 | "conv4_block1_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block1_2_relu[0][0] \n", 398 | "__________________________________________________________________________________________________\n", 399 | "conv4_block1_0_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block1_0_conv[0][0] \n", 400 | "__________________________________________________________________________________________________\n", 401 | "conv4_block1_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block1_3_conv[0][0] \n", 402 | "__________________________________________________________________________________________________\n", 403 | "conv4_block1_add (Add) (None, 2, 2, 1024) 0 conv4_block1_0_bn[0][0] \n", 404 | " conv4_block1_3_bn[0][0] \n", 405 | "__________________________________________________________________________________________________\n", 406 | "conv4_block1_out (Activation) (None, 2, 2, 1024) 0 conv4_block1_add[0][0] \n", 407 | "__________________________________________________________________________________________________\n", 408 | "conv4_block2_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block1_out[0][0] \n", 409 | "__________________________________________________________________________________________________\n", 410 | "conv4_block2_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block2_1_conv[0][0] \n", 411 | "__________________________________________________________________________________________________\n", 412 | "conv4_block2_1_relu (Activation (None, 2, 2, 256) 0 conv4_block2_1_bn[0][0] \n", 413 | "__________________________________________________________________________________________________\n", 414 | "conv4_block2_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block2_1_relu[0][0] \n", 415 | "__________________________________________________________________________________________________\n", 416 | "conv4_block2_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block2_2_conv[0][0] \n", 417 | "__________________________________________________________________________________________________\n", 418 | "conv4_block2_2_relu (Activation (None, 2, 2, 256) 0 conv4_block2_2_bn[0][0] \n", 419 | "__________________________________________________________________________________________________\n", 420 | "conv4_block2_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block2_2_relu[0][0] \n", 421 | "__________________________________________________________________________________________________\n", 422 | "conv4_block2_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block2_3_conv[0][0] \n", 423 | "__________________________________________________________________________________________________\n", 424 | "conv4_block2_add (Add) (None, 2, 2, 1024) 0 conv4_block1_out[0][0] \n", 425 | " conv4_block2_3_bn[0][0] \n", 426 | "__________________________________________________________________________________________________\n", 427 | "conv4_block2_out (Activation) (None, 2, 2, 1024) 0 conv4_block2_add[0][0] \n", 428 | "__________________________________________________________________________________________________\n", 429 | "conv4_block3_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block2_out[0][0] \n", 430 | "__________________________________________________________________________________________________\n", 431 | "conv4_block3_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block3_1_conv[0][0] \n", 432 | "__________________________________________________________________________________________________\n", 433 | "conv4_block3_1_relu (Activation (None, 2, 2, 256) 0 conv4_block3_1_bn[0][0] \n", 434 | "__________________________________________________________________________________________________\n", 435 | "conv4_block3_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block3_1_relu[0][0] \n", 436 | "__________________________________________________________________________________________________\n", 437 | "conv4_block3_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block3_2_conv[0][0] \n", 438 | "__________________________________________________________________________________________________\n", 439 | "conv4_block3_2_relu (Activation (None, 2, 2, 256) 0 conv4_block3_2_bn[0][0] \n", 440 | "__________________________________________________________________________________________________\n", 441 | "conv4_block3_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block3_2_relu[0][0] \n", 442 | "__________________________________________________________________________________________________\n", 443 | "conv4_block3_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block3_3_conv[0][0] \n", 444 | "__________________________________________________________________________________________________\n", 445 | "conv4_block3_add (Add) (None, 2, 2, 1024) 0 conv4_block2_out[0][0] \n", 446 | " conv4_block3_3_bn[0][0] \n", 447 | "__________________________________________________________________________________________________\n", 448 | "conv4_block3_out (Activation) (None, 2, 2, 1024) 0 conv4_block3_add[0][0] \n", 449 | "__________________________________________________________________________________________________\n", 450 | "conv4_block4_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block3_out[0][0] \n", 451 | "__________________________________________________________________________________________________\n", 452 | "conv4_block4_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block4_1_conv[0][0] \n", 453 | "__________________________________________________________________________________________________\n", 454 | "conv4_block4_1_relu (Activation (None, 2, 2, 256) 0 conv4_block4_1_bn[0][0] \n", 455 | "__________________________________________________________________________________________________\n", 456 | "conv4_block4_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block4_1_relu[0][0] \n", 457 | "__________________________________________________________________________________________________\n", 458 | "conv4_block4_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block4_2_conv[0][0] \n", 459 | "__________________________________________________________________________________________________\n", 460 | "conv4_block4_2_relu (Activation (None, 2, 2, 256) 0 conv4_block4_2_bn[0][0] \n", 461 | "__________________________________________________________________________________________________\n", 462 | "conv4_block4_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block4_2_relu[0][0] \n", 463 | "__________________________________________________________________________________________________\n", 464 | "conv4_block4_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block4_3_conv[0][0] \n", 465 | "__________________________________________________________________________________________________\n", 466 | "conv4_block4_add (Add) (None, 2, 2, 1024) 0 conv4_block3_out[0][0] \n", 467 | " conv4_block4_3_bn[0][0] \n", 468 | "__________________________________________________________________________________________________\n", 469 | "conv4_block4_out (Activation) (None, 2, 2, 1024) 0 conv4_block4_add[0][0] \n", 470 | "__________________________________________________________________________________________________\n", 471 | "conv4_block5_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block4_out[0][0] \n", 472 | "__________________________________________________________________________________________________\n", 473 | "conv4_block5_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block5_1_conv[0][0] \n", 474 | "__________________________________________________________________________________________________\n", 475 | "conv4_block5_1_relu (Activation (None, 2, 2, 256) 0 conv4_block5_1_bn[0][0] \n", 476 | "__________________________________________________________________________________________________\n", 477 | "conv4_block5_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block5_1_relu[0][0] \n", 478 | "__________________________________________________________________________________________________\n", 479 | "conv4_block5_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block5_2_conv[0][0] \n", 480 | "__________________________________________________________________________________________________\n", 481 | "conv4_block5_2_relu (Activation (None, 2, 2, 256) 0 conv4_block5_2_bn[0][0] \n", 482 | "__________________________________________________________________________________________________\n", 483 | "conv4_block5_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block5_2_relu[0][0] \n", 484 | "__________________________________________________________________________________________________\n", 485 | "conv4_block5_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block5_3_conv[0][0] \n", 486 | "__________________________________________________________________________________________________\n", 487 | "conv4_block5_add (Add) (None, 2, 2, 1024) 0 conv4_block4_out[0][0] \n", 488 | " conv4_block5_3_bn[0][0] \n", 489 | "__________________________________________________________________________________________________\n", 490 | "conv4_block5_out (Activation) (None, 2, 2, 1024) 0 conv4_block5_add[0][0] \n", 491 | "__________________________________________________________________________________________________\n", 492 | "conv4_block6_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block5_out[0][0] \n", 493 | "__________________________________________________________________________________________________\n", 494 | "conv4_block6_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block6_1_conv[0][0] \n", 495 | "__________________________________________________________________________________________________\n", 496 | "conv4_block6_1_relu (Activation (None, 2, 2, 256) 0 conv4_block6_1_bn[0][0] \n", 497 | "__________________________________________________________________________________________________\n", 498 | "conv4_block6_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block6_1_relu[0][0] \n", 499 | "__________________________________________________________________________________________________\n", 500 | "conv4_block6_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block6_2_conv[0][0] \n", 501 | "__________________________________________________________________________________________________\n", 502 | "conv4_block6_2_relu (Activation (None, 2, 2, 256) 0 conv4_block6_2_bn[0][0] \n", 503 | "__________________________________________________________________________________________________\n", 504 | "conv4_block6_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block6_2_relu[0][0] \n", 505 | "__________________________________________________________________________________________________\n", 506 | "conv4_block6_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block6_3_conv[0][0] \n", 507 | "__________________________________________________________________________________________________\n", 508 | "conv4_block6_add (Add) (None, 2, 2, 1024) 0 conv4_block5_out[0][0] \n", 509 | " conv4_block6_3_bn[0][0] \n", 510 | "__________________________________________________________________________________________________\n", 511 | "conv4_block6_out (Activation) (None, 2, 2, 1024) 0 conv4_block6_add[0][0] \n", 512 | "__________________________________________________________________________________________________\n", 513 | "conv5_block1_1_conv (Conv2D) (None, 1, 1, 512) 524800 conv4_block6_out[0][0] \n", 514 | "__________________________________________________________________________________________________\n", 515 | "conv5_block1_1_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block1_1_conv[0][0] \n", 516 | "__________________________________________________________________________________________________\n", 517 | "conv5_block1_1_relu (Activation (None, 1, 1, 512) 0 conv5_block1_1_bn[0][0] \n", 518 | "__________________________________________________________________________________________________\n", 519 | "conv5_block1_2_conv (Conv2D) (None, 1, 1, 512) 2359808 conv5_block1_1_relu[0][0] \n", 520 | "__________________________________________________________________________________________________\n", 521 | "conv5_block1_2_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block1_2_conv[0][0] \n", 522 | "__________________________________________________________________________________________________\n", 523 | "conv5_block1_2_relu (Activation (None, 1, 1, 512) 0 conv5_block1_2_bn[0][0] \n", 524 | "__________________________________________________________________________________________________\n", 525 | "conv5_block1_0_conv (Conv2D) (None, 1, 1, 2048) 2099200 conv4_block6_out[0][0] \n", 526 | "__________________________________________________________________________________________________\n", 527 | "conv5_block1_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 conv5_block1_2_relu[0][0] \n", 528 | "__________________________________________________________________________________________________\n", 529 | "conv5_block1_0_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block1_0_conv[0][0] \n", 530 | "__________________________________________________________________________________________________\n", 531 | "conv5_block1_3_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block1_3_conv[0][0] \n", 532 | "__________________________________________________________________________________________________\n", 533 | "conv5_block1_add (Add) (None, 1, 1, 2048) 0 conv5_block1_0_bn[0][0] \n", 534 | " conv5_block1_3_bn[0][0] \n", 535 | "__________________________________________________________________________________________________\n", 536 | "conv5_block1_out (Activation) (None, 1, 1, 2048) 0 conv5_block1_add[0][0] \n", 537 | "__________________________________________________________________________________________________\n", 538 | "conv5_block2_1_conv (Conv2D) (None, 1, 1, 512) 1049088 conv5_block1_out[0][0] \n", 539 | "__________________________________________________________________________________________________\n", 540 | "conv5_block2_1_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block2_1_conv[0][0] \n", 541 | "__________________________________________________________________________________________________\n", 542 | "conv5_block2_1_relu (Activation (None, 1, 1, 512) 0 conv5_block2_1_bn[0][0] \n", 543 | "__________________________________________________________________________________________________\n", 544 | "conv5_block2_2_conv (Conv2D) (None, 1, 1, 512) 2359808 conv5_block2_1_relu[0][0] \n", 545 | "__________________________________________________________________________________________________\n", 546 | "conv5_block2_2_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block2_2_conv[0][0] \n", 547 | "__________________________________________________________________________________________________\n", 548 | "conv5_block2_2_relu (Activation (None, 1, 1, 512) 0 conv5_block2_2_bn[0][0] \n", 549 | "__________________________________________________________________________________________________\n", 550 | "conv5_block2_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 conv5_block2_2_relu[0][0] \n", 551 | "__________________________________________________________________________________________________\n", 552 | "conv5_block2_3_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block2_3_conv[0][0] \n", 553 | "__________________________________________________________________________________________________\n", 554 | "conv5_block2_add (Add) (None, 1, 1, 2048) 0 conv5_block1_out[0][0] \n", 555 | " conv5_block2_3_bn[0][0] \n", 556 | "__________________________________________________________________________________________________\n", 557 | "conv5_block2_out (Activation) (None, 1, 1, 2048) 0 conv5_block2_add[0][0] \n", 558 | "__________________________________________________________________________________________________\n", 559 | "conv5_block3_1_conv (Conv2D) (None, 1, 1, 512) 1049088 conv5_block2_out[0][0] \n", 560 | "__________________________________________________________________________________________________\n", 561 | "conv5_block3_1_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block3_1_conv[0][0] \n", 562 | "__________________________________________________________________________________________________\n", 563 | "conv5_block3_1_relu (Activation (None, 1, 1, 512) 0 conv5_block3_1_bn[0][0] \n", 564 | "__________________________________________________________________________________________________\n", 565 | "conv5_block3_2_conv (Conv2D) (None, 1, 1, 512) 2359808 conv5_block3_1_relu[0][0] \n", 566 | "__________________________________________________________________________________________________\n", 567 | "conv5_block3_2_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block3_2_conv[0][0] \n", 568 | "__________________________________________________________________________________________________\n", 569 | "conv5_block3_2_relu (Activation (None, 1, 1, 512) 0 conv5_block3_2_bn[0][0] \n", 570 | "__________________________________________________________________________________________________\n", 571 | "conv5_block3_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 conv5_block3_2_relu[0][0] \n", 572 | "__________________________________________________________________________________________________\n", 573 | "conv5_block3_3_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block3_3_conv[0][0] \n", 574 | "__________________________________________________________________________________________________\n", 575 | "conv5_block3_add (Add) (None, 1, 1, 2048) 0 conv5_block2_out[0][0] \n", 576 | " conv5_block3_3_bn[0][0] \n", 577 | "__________________________________________________________________________________________________\n", 578 | "conv5_block3_out (Activation) (None, 1, 1, 2048) 0 conv5_block3_add[0][0] \n", 579 | "__________________________________________________________________________________________________\n", 580 | "avg_pool (GlobalAveragePooling2 (None, 2048) 0 conv5_block3_out[0][0] \n", 581 | "==================================================================================================\n", 582 | "Total params: 23,587,712\n", 583 | "Trainable params: 23,534,592\n", 584 | "Non-trainable params: 53,120\n", 585 | "__________________________________________________________________________________________________\n" 586 | ], 587 | "name": "stdout" 588 | } 589 | ] 590 | }, 591 | { 592 | "cell_type": "code", 593 | "metadata": { 594 | "colab": { 595 | "base_uri": "https://localhost:8080/" 596 | }, 597 | "id": "JTVzf85zd6fF", 598 | "outputId": "bf1822ea-43ab-4694-ba10-3338babdf12e" 599 | }, 600 | "source": [ 601 | "# defining the model for transfer learning with 10 different output classes \n", 602 | "num_classes = 10\n", 603 | "model_resnet.trainable = False # freezing the weights in model_resnet (these weights will not be updated during training)\n", 604 | "inputs = Input(shape=(32,32,3))\n", 605 | "x = model_resnet(inputs, training=False)\n", 606 | "outputs = Dense(units=num_classes, activation='softmax')(x)\n", 607 | "model = Model(inputs=inputs, outputs=outputs)\n", 608 | "model.summary()" 609 | ], 610 | "execution_count": 10, 611 | "outputs": [ 612 | { 613 | "output_type": "stream", 614 | "text": [ 615 | "Model: \"model\"\n", 616 | "_________________________________________________________________\n", 617 | "Layer (type) Output Shape Param # \n", 618 | "=================================================================\n", 619 | "input_2 (InputLayer) [(None, 32, 32, 3)] 0 \n", 620 | "_________________________________________________________________\n", 621 | "resnet50 (Functional) (None, 2048) 23587712 \n", 622 | "_________________________________________________________________\n", 623 | "dense (Dense) (None, 10) 20490 \n", 624 | "=================================================================\n", 625 | "Total params: 23,608,202\n", 626 | "Trainable params: 20,490\n", 627 | "Non-trainable params: 23,587,712\n", 628 | "_________________________________________________________________\n" 629 | ], 630 | "name": "stdout" 631 | } 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "metadata": { 637 | "id": "k7gT0FjofIFf" 638 | }, 639 | "source": [ 640 | "# compiling the model \n", 641 | "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')" 642 | ], 643 | "execution_count": 11, 644 | "outputs": [] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "metadata": { 649 | "colab": { 650 | "base_uri": "https://localhost:8080/" 651 | }, 652 | "id": "DaU1wI1gfQ3O", 653 | "outputId": "879d7cae-38a3-46dd-af9e-582c19776065" 654 | }, 655 | "source": [ 656 | "# training the model \n", 657 | "H = model.fit(x_train_pp, y_train, validation_split=0.1, epochs=10, verbose=2)" 658 | ], 659 | "execution_count": 12, 660 | "outputs": [ 661 | { 662 | "output_type": "stream", 663 | "text": [ 664 | "Epoch 1/10\n", 665 | "1407/1407 - 69s - loss: 1.5887 - accuracy: 0.5685 - val_loss: 1.3398 - val_accuracy: 0.6276\n", 666 | "Epoch 2/10\n", 667 | "1407/1407 - 34s - loss: 1.2304 - accuracy: 0.6419 - val_loss: 1.3710 - val_accuracy: 0.6300\n", 668 | "Epoch 3/10\n", 669 | "1407/1407 - 34s - loss: 1.1648 - accuracy: 0.6585 - val_loss: 1.3614 - val_accuracy: 0.6254\n", 670 | "Epoch 4/10\n", 671 | "1407/1407 - 34s - loss: 1.1258 - accuracy: 0.6690 - val_loss: 1.4487 - val_accuracy: 0.6216\n", 672 | "Epoch 5/10\n", 673 | "1407/1407 - 34s - loss: 1.1015 - accuracy: 0.6775 - val_loss: 1.4542 - val_accuracy: 0.6178\n", 674 | "Epoch 6/10\n", 675 | "1407/1407 - 34s - loss: 1.1143 - accuracy: 0.6785 - val_loss: 1.4831 - val_accuracy: 0.6294\n", 676 | "Epoch 7/10\n", 677 | "1407/1407 - 34s - loss: 1.0876 - accuracy: 0.6824 - val_loss: 1.4893 - val_accuracy: 0.6412\n", 678 | "Epoch 8/10\n", 679 | "1407/1407 - 34s - loss: 1.0887 - accuracy: 0.6838 - val_loss: 1.6309 - val_accuracy: 0.6074\n", 680 | "Epoch 9/10\n", 681 | "1407/1407 - 34s - loss: 1.0838 - accuracy: 0.6880 - val_loss: 1.5115 - val_accuracy: 0.6274\n", 682 | "Epoch 10/10\n", 683 | "1407/1407 - 34s - loss: 1.0830 - accuracy: 0.6887 - val_loss: 1.5944 - val_accuracy: 0.6218\n" 684 | ], 685 | "name": "stdout" 686 | } 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "metadata": { 692 | "colab": { 693 | "base_uri": "https://localhost:8080/", 694 | "height": 581 695 | }, 696 | "id": "P5dxn52RfdEL", 697 | "outputId": "e295538f-5262-46e3-c05c-38865247f8b9" 698 | }, 699 | "source": [ 700 | "# plotting the training and validation set loss and accuracy results\n", 701 | "plt.style.use('ggplot')\n", 702 | "plt.figure()\n", 703 | "num_epochs=10\n", 704 | "epochs = np.arange(num_epochs)\n", 705 | "train_loss = H.history['loss']\n", 706 | "val_loss = H.history['val_loss']\n", 707 | "plt.plot(epochs, train_loss, label='train loss')\n", 708 | "plt.plot(epochs, val_loss, label='val_loss')\n", 709 | "plt.title('training and testing loss')\n", 710 | "plt.legend()\n", 711 | "plt.xlabel('#epochs')\n", 712 | "plt.ylabel('loss')\n", 713 | "\n", 714 | "plt.figure()\n", 715 | "train_acc = H.history['accuracy']\n", 716 | "val_acc = H.history['val_accuracy']\n", 717 | "plt.plot(epochs, train_acc, label='train accuracy')\n", 718 | "plt.plot(epochs, val_acc, label='val_accuracy')\n", 719 | "plt.title('training and testing accuracy')\n", 720 | "plt.legend()\n", 721 | "plt.xlabel('#epochs')\n", 722 | "plt.ylabel('accuracy')\n", 723 | "\n", 724 | "plt.show()\n" 725 | ], 726 | "execution_count": 16, 727 | "outputs": [ 728 | { 729 | "output_type": "display_data", 730 | "data": { 731 | "image/png": "\n", 732 | "text/plain": [ 733 | "
" 734 | ] 735 | }, 736 | "metadata": { 737 | "tags": [] 738 | } 739 | }, 740 | { 741 | "output_type": "display_data", 742 | "data": { 743 | "image/png": "\n", 744 | "text/plain": [ 745 | "
" 746 | ] 747 | }, 748 | "metadata": { 749 | "tags": [] 750 | } 751 | } 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "metadata": { 757 | "colab": { 758 | "base_uri": "https://localhost:8080/" 759 | }, 760 | "id": "nMBd-COFf_yI", 761 | "outputId": "8cef5860-2e2b-4b8e-b3bb-d780e7962fd7" 762 | }, 763 | "source": [ 764 | "# evaluating the model on the test set \n", 765 | "model.evaluate(x_test_pp, y_test)" 766 | ], 767 | "execution_count": 17, 768 | "outputs": [ 769 | { 770 | "output_type": "stream", 771 | "text": [ 772 | "313/313 [==============================] - 8s 26ms/step - loss: 1.6533 - accuracy: 0.6125\n" 773 | ], 774 | "name": "stdout" 775 | }, 776 | { 777 | "output_type": "execute_result", 778 | "data": { 779 | "text/plain": [ 780 | "[1.653288722038269, 0.612500011920929]" 781 | ] 782 | }, 783 | "metadata": { 784 | "tags": [] 785 | }, 786 | "execution_count": 17 787 | } 788 | ] 789 | } 790 | ] 791 | } -------------------------------------------------------------------------------- /TransferLearningResnet/cifar10_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/TransferLearningResnet/cifar10_img.png -------------------------------------------------------------------------------- /VideoActivityRecognition3DResnet/Readme.md: -------------------------------------------------------------------------------- 1 | # Performing activity recognition in videos using a pre-trained 3D ResNet model 2 | 1. Pre-trained model used for recognition is from https://github.com/kenshohara 3 | 2. Model can recognize 400 different activities 4 | 3. Running the code notebook requires 3 files - pretrained model file, class names text file, sample video file . These files are automatically downloaded from my google drive on running the associated ipynb code file 5 | 4. Link to medium blog post with more details 6 | 7 | ![Image snapshot for Video Activity Recognition](img_activity_recognition.jpg) 8 | 9 | ## Programming language and Libraries used 10 | 1. Python programming language 11 | 2. OpenCV library 12 | 3. Other Python libraries including numpy, etc 13 | 14 | -------------------------------------------------------------------------------- /VideoActivityRecognition3DResnet/img_activity_recognition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/VideoActivityRecognition3DResnet/img_activity_recognition.jpg --------------------------------------------------------------------------------