├── .gitignore
├── CIFAR10_Image_Classifier
├── Cifar10_ImageClassifier.ipynb
├── Readme.md
└── cifar10_img.png
├── ConvolutionalNeuralNetworkVisualizer
├── ConvolutionalNeuralNetwork_Visualizer.ipynb
├── Readme.md
└── cool_cat.jpg
├── FaceDetectionOpenCV
├── Readme.md
├── deploy.prototxt.txt
├── face_detection_ssd_parallel.py
└── res10_300x300_ssd_iter_140000.caffemodel
├── MultiThreadedVideoProcessing
├── Readme.md
├── video_processing_parallel.py
└── video_processing_simple.py
├── NeuralStyleTransfer
├── Neural_Style_Transfer_Tensorflow.ipynb
├── README.md
├── content.jpg
├── neural_style_transfer.png
└── style_mosaic.jpg
├── README.md
├── StreetViewHouseNumbers_Classifier
├── Colab_ImageClassifier_SVHN.ipynb
├── Readme.md
└── SVHN_Overview_Image.png
├── TransferLearningResnet
├── Readme.md
├── Transfer_Learning_ResNet_ImageClassifier.ipynb
└── cifar10_img.png
└── VideoActivityRecognition3DResnet
├── Readme.md
├── img_activity_recognition.jpg
└── video_activity_recognition_3DResnets.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | CIFAR10_Image_Classifier.zip
2 | ConvolutionalNeuralNetworkVisualizer.zip
3 | NeuralStyleTransfer.zip
4 | StreetViewHouseNumbers_Classifier.zip
5 | TransferLearningResnet.zip
--------------------------------------------------------------------------------
/CIFAR10_Image_Classifier/Cifar10_ImageClassifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Cifar10_ImageClassifier_v2.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true,
10 | "machine_shape": "hm",
11 | "authorship_tag": "ABX9TyPCgLrK1cIJtC5kaIsuHVGK",
12 | "include_colab_link": true
13 | },
14 | "kernelspec": {
15 | "display_name": "Python 3",
16 | "name": "python3"
17 | },
18 | "accelerator": "GPU"
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "metadata": {
34 | "id": "MjzPfXdR50ff"
35 | },
36 | "source": [
37 | "# importing required libraries\n",
38 | "import numpy as np\n",
39 | "import matplotlib.pyplot as plt \n",
40 | "from tensorflow.keras.datasets import cifar10\n",
41 | "from tensorflow.keras.models import Sequential\n",
42 | "from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, BatchNormalization, Dropout"
43 | ],
44 | "execution_count": 1,
45 | "outputs": []
46 | },
47 | {
48 | "cell_type": "code",
49 | "metadata": {
50 | "colab": {
51 | "base_uri": "https://localhost:8080/"
52 | },
53 | "id": "xwJrg32a7itw",
54 | "outputId": "b9c76392-242f-435b-d63e-c2c906f8698a"
55 | },
56 | "source": [
57 | "# Loading the cifar-10 dataset \n",
58 | "# cifar10 is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 10 categories. \n",
59 | "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
60 | "print(x_train.shape, y_train.shape)\n",
61 | "print(x_test.shape, y_test.shape)\n",
62 | "print(np.min(x_train), np.max(x_train)) # to check if scaling to the range 0-1 is needed\n",
63 | "print(np.min(y_train), np.max(y_train))"
64 | ],
65 | "execution_count": 2,
66 | "outputs": [
67 | {
68 | "output_type": "stream",
69 | "text": [
70 | "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n",
71 | "170500096/170498071 [==============================] - 4s 0us/step\n",
72 | "(50000, 32, 32, 3) (50000, 1)\n",
73 | "(10000, 32, 32, 3) (10000, 1)\n",
74 | "0 255\n",
75 | "0 9\n"
76 | ],
77 | "name": "stdout"
78 | }
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "CL2-Mc0TE_he"
85 | },
86 | "source": [
87 | "# scaling x_train and x_test values to the range 0-1\n",
88 | "x_train_scaled = x_train/255.\n",
89 | "x_test_scaled = x_test/255.\n",
90 | "# y_train, y_test values are already labelled as integers from 0 to 9 , so no preprocessing required"
91 | ],
92 | "execution_count": 3,
93 | "outputs": []
94 | },
95 | {
96 | "cell_type": "code",
97 | "metadata": {
98 | "colab": {
99 | "base_uri": "https://localhost:8080/"
100 | },
101 | "id": "b2A2O8cjFhbE",
102 | "outputId": "56207d39-de18-44be-be46-75676649f9ec"
103 | },
104 | "source": [
105 | "# Defining the CNN architecture using keras Sequential API (a minified version of VGGNet)\n",
106 | "model = Sequential()\n",
107 | "model.add(Conv2D(32, kernel_size=(3,3), padding=\"same\", activation=\"relu\", input_shape=(32,32,3)))\n",
108 | "model.add(BatchNormalization())\n",
109 | "model.add(Conv2D(32, kernel_size=(3,3), padding=\"same\", activation=\"relu\"))\n",
110 | "model.add(BatchNormalization())\n",
111 | "model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))\n",
112 | "model.add(Dropout(0.25))\n",
113 | "model.add(Conv2D(64, kernel_size=(3,3), padding=\"same\", activation=\"relu\"))\n",
114 | "model.add(BatchNormalization())\n",
115 | "model.add(Conv2D(64, kernel_size=(3,3), padding=\"same\", activation=\"relu\"))\n",
116 | "model.add(BatchNormalization())\n",
117 | "model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))\n",
118 | "model.add(Dropout(0.25))\n",
119 | "model.add(Flatten())\n",
120 | "model.add(Dense(512, activation=\"relu\"))\n",
121 | "model.add(BatchNormalization())\n",
122 | "model.add(Dropout(0.5))\n",
123 | "model.add(Dense(10, activation=\"softmax\"))\n",
124 | "\n",
125 | "# printing model summary\n",
126 | "model.summary()\n",
127 | "\n",
128 | "# Compiling the model \n",
129 | "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])"
130 | ],
131 | "execution_count": 4,
132 | "outputs": [
133 | {
134 | "output_type": "stream",
135 | "text": [
136 | "Model: \"sequential\"\n",
137 | "_________________________________________________________________\n",
138 | "Layer (type) Output Shape Param # \n",
139 | "=================================================================\n",
140 | "conv2d (Conv2D) (None, 32, 32, 32) 896 \n",
141 | "_________________________________________________________________\n",
142 | "batch_normalization (BatchNo (None, 32, 32, 32) 128 \n",
143 | "_________________________________________________________________\n",
144 | "conv2d_1 (Conv2D) (None, 32, 32, 32) 9248 \n",
145 | "_________________________________________________________________\n",
146 | "batch_normalization_1 (Batch (None, 32, 32, 32) 128 \n",
147 | "_________________________________________________________________\n",
148 | "max_pooling2d (MaxPooling2D) (None, 16, 16, 32) 0 \n",
149 | "_________________________________________________________________\n",
150 | "dropout (Dropout) (None, 16, 16, 32) 0 \n",
151 | "_________________________________________________________________\n",
152 | "conv2d_2 (Conv2D) (None, 16, 16, 64) 18496 \n",
153 | "_________________________________________________________________\n",
154 | "batch_normalization_2 (Batch (None, 16, 16, 64) 256 \n",
155 | "_________________________________________________________________\n",
156 | "conv2d_3 (Conv2D) (None, 16, 16, 64) 36928 \n",
157 | "_________________________________________________________________\n",
158 | "batch_normalization_3 (Batch (None, 16, 16, 64) 256 \n",
159 | "_________________________________________________________________\n",
160 | "max_pooling2d_1 (MaxPooling2 (None, 8, 8, 64) 0 \n",
161 | "_________________________________________________________________\n",
162 | "dropout_1 (Dropout) (None, 8, 8, 64) 0 \n",
163 | "_________________________________________________________________\n",
164 | "flatten (Flatten) (None, 4096) 0 \n",
165 | "_________________________________________________________________\n",
166 | "dense (Dense) (None, 512) 2097664 \n",
167 | "_________________________________________________________________\n",
168 | "batch_normalization_4 (Batch (None, 512) 2048 \n",
169 | "_________________________________________________________________\n",
170 | "dropout_2 (Dropout) (None, 512) 0 \n",
171 | "_________________________________________________________________\n",
172 | "dense_1 (Dense) (None, 10) 5130 \n",
173 | "=================================================================\n",
174 | "Total params: 2,171,178\n",
175 | "Trainable params: 2,169,770\n",
176 | "Non-trainable params: 1,408\n",
177 | "_________________________________________________________________\n"
178 | ],
179 | "name": "stdout"
180 | }
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "metadata": {
186 | "colab": {
187 | "base_uri": "https://localhost:8080/"
188 | },
189 | "id": "T1JX59wSF-Lv",
190 | "outputId": "a73c23f1-8bd2-43c6-9df9-726c890a66d7"
191 | },
192 | "source": [
193 | "# Training the model \n",
194 | "history = model.fit(x_train_scaled, y_train, batch_size=64, epochs=40, validation_data=(x_test_scaled, y_test), verbose=2)"
195 | ],
196 | "execution_count": 6,
197 | "outputs": [
198 | {
199 | "output_type": "stream",
200 | "text": [
201 | "Epoch 1/40\n",
202 | "782/782 - 20s - loss: 1.4748 - accuracy: 0.5088 - val_loss: 1.0989 - val_accuracy: 0.6155\n",
203 | "Epoch 2/40\n",
204 | "782/782 - 4s - loss: 0.9532 - accuracy: 0.6642 - val_loss: 3.6966 - val_accuracy: 0.2994\n",
205 | "Epoch 3/40\n",
206 | "782/782 - 4s - loss: 0.8142 - accuracy: 0.7133 - val_loss: 1.0699 - val_accuracy: 0.6588\n",
207 | "Epoch 4/40\n",
208 | "782/782 - 4s - loss: 0.7352 - accuracy: 0.7435 - val_loss: 0.6923 - val_accuracy: 0.7580\n",
209 | "Epoch 5/40\n",
210 | "782/782 - 4s - loss: 0.6651 - accuracy: 0.7642 - val_loss: 0.6709 - val_accuracy: 0.7713\n",
211 | "Epoch 6/40\n",
212 | "782/782 - 4s - loss: 0.6095 - accuracy: 0.7856 - val_loss: 0.7959 - val_accuracy: 0.7364\n",
213 | "Epoch 7/40\n",
214 | "782/782 - 4s - loss: 0.5581 - accuracy: 0.8040 - val_loss: 0.5927 - val_accuracy: 0.7970\n",
215 | "Epoch 8/40\n",
216 | "782/782 - 4s - loss: 0.5189 - accuracy: 0.8168 - val_loss: 0.6359 - val_accuracy: 0.7847\n",
217 | "Epoch 9/40\n",
218 | "782/782 - 4s - loss: 0.4788 - accuracy: 0.8311 - val_loss: 0.5534 - val_accuracy: 0.8137\n",
219 | "Epoch 10/40\n",
220 | "782/782 - 4s - loss: 0.4419 - accuracy: 0.8447 - val_loss: 0.6788 - val_accuracy: 0.7834\n",
221 | "Epoch 11/40\n",
222 | "782/782 - 4s - loss: 0.4076 - accuracy: 0.8566 - val_loss: 0.6475 - val_accuracy: 0.7919\n",
223 | "Epoch 12/40\n",
224 | "782/782 - 4s - loss: 0.3912 - accuracy: 0.8626 - val_loss: 0.6400 - val_accuracy: 0.7905\n",
225 | "Epoch 13/40\n",
226 | "782/782 - 4s - loss: 0.3676 - accuracy: 0.8695 - val_loss: 0.5595 - val_accuracy: 0.8173\n",
227 | "Epoch 14/40\n",
228 | "782/782 - 4s - loss: 0.3337 - accuracy: 0.8804 - val_loss: 0.6950 - val_accuracy: 0.7885\n",
229 | "Epoch 15/40\n",
230 | "782/782 - 4s - loss: 0.3138 - accuracy: 0.8882 - val_loss: 0.6289 - val_accuracy: 0.8036\n",
231 | "Epoch 16/40\n",
232 | "782/782 - 4s - loss: 0.3163 - accuracy: 0.8884 - val_loss: 0.5864 - val_accuracy: 0.8213\n",
233 | "Epoch 17/40\n",
234 | "782/782 - 4s - loss: 0.2875 - accuracy: 0.8976 - val_loss: 0.5803 - val_accuracy: 0.8184\n",
235 | "Epoch 18/40\n",
236 | "782/782 - 4s - loss: 0.2745 - accuracy: 0.9024 - val_loss: 0.6515 - val_accuracy: 0.8099\n",
237 | "Epoch 19/40\n",
238 | "782/782 - 4s - loss: 0.2605 - accuracy: 0.9081 - val_loss: 0.6302 - val_accuracy: 0.8155\n",
239 | "Epoch 20/40\n",
240 | "782/782 - 4s - loss: 0.2476 - accuracy: 0.9112 - val_loss: 0.5855 - val_accuracy: 0.8273\n",
241 | "Epoch 21/40\n",
242 | "782/782 - 4s - loss: 0.2432 - accuracy: 0.9142 - val_loss: 0.6192 - val_accuracy: 0.8196\n",
243 | "Epoch 22/40\n",
244 | "782/782 - 4s - loss: 0.2368 - accuracy: 0.9165 - val_loss: 0.6565 - val_accuracy: 0.8243\n",
245 | "Epoch 23/40\n",
246 | "782/782 - 4s - loss: 0.2182 - accuracy: 0.9230 - val_loss: 0.5994 - val_accuracy: 0.8281\n",
247 | "Epoch 24/40\n",
248 | "782/782 - 4s - loss: 0.2184 - accuracy: 0.9231 - val_loss: 0.6279 - val_accuracy: 0.8204\n",
249 | "Epoch 25/40\n",
250 | "782/782 - 4s - loss: 0.2067 - accuracy: 0.9278 - val_loss: 0.5764 - val_accuracy: 0.8358\n",
251 | "Epoch 26/40\n",
252 | "782/782 - 4s - loss: 0.1998 - accuracy: 0.9291 - val_loss: 0.5831 - val_accuracy: 0.8359\n",
253 | "Epoch 27/40\n",
254 | "782/782 - 4s - loss: 0.1929 - accuracy: 0.9316 - val_loss: 0.6074 - val_accuracy: 0.8339\n",
255 | "Epoch 28/40\n",
256 | "782/782 - 4s - loss: 0.1918 - accuracy: 0.9311 - val_loss: 0.6144 - val_accuracy: 0.8290\n",
257 | "Epoch 29/40\n",
258 | "782/782 - 4s - loss: 0.1897 - accuracy: 0.9332 - val_loss: 0.5774 - val_accuracy: 0.8377\n",
259 | "Epoch 30/40\n",
260 | "782/782 - 4s - loss: 0.1908 - accuracy: 0.9328 - val_loss: 0.6057 - val_accuracy: 0.8367\n",
261 | "Epoch 31/40\n",
262 | "782/782 - 4s - loss: 0.1724 - accuracy: 0.9394 - val_loss: 0.6022 - val_accuracy: 0.8353\n",
263 | "Epoch 32/40\n",
264 | "782/782 - 4s - loss: 0.1679 - accuracy: 0.9412 - val_loss: 0.6457 - val_accuracy: 0.8297\n",
265 | "Epoch 33/40\n",
266 | "782/782 - 4s - loss: 0.1659 - accuracy: 0.9401 - val_loss: 0.7429 - val_accuracy: 0.8173\n",
267 | "Epoch 34/40\n",
268 | "782/782 - 4s - loss: 0.1665 - accuracy: 0.9422 - val_loss: 0.6386 - val_accuracy: 0.8347\n",
269 | "Epoch 35/40\n",
270 | "782/782 - 4s - loss: 0.1641 - accuracy: 0.9411 - val_loss: 0.6328 - val_accuracy: 0.8298\n",
271 | "Epoch 36/40\n",
272 | "782/782 - 4s - loss: 0.1527 - accuracy: 0.9462 - val_loss: 0.6865 - val_accuracy: 0.8236\n",
273 | "Epoch 37/40\n",
274 | "782/782 - 4s - loss: 0.1524 - accuracy: 0.9456 - val_loss: 0.6909 - val_accuracy: 0.8270\n",
275 | "Epoch 38/40\n",
276 | "782/782 - 4s - loss: 0.1472 - accuracy: 0.9484 - val_loss: 0.7240 - val_accuracy: 0.8218\n",
277 | "Epoch 39/40\n",
278 | "782/782 - 4s - loss: 0.1456 - accuracy: 0.9484 - val_loss: 0.6283 - val_accuracy: 0.8368\n",
279 | "Epoch 40/40\n",
280 | "782/782 - 4s - loss: 0.1409 - accuracy: 0.9503 - val_loss: 0.6493 - val_accuracy: 0.8256\n"
281 | ],
282 | "name": "stdout"
283 | }
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "metadata": {
289 | "id": "tChTuxzcGUcA",
290 | "colab": {
291 | "base_uri": "https://localhost:8080/",
292 | "height": 299
293 | },
294 | "outputId": "3f1a63ee-0d41-433c-a70d-1bce7aef8ded"
295 | },
296 | "source": [
297 | "# plotting loss and accuracy values \n",
298 | "num_epochs=40\n",
299 | "plt.style.use('ggplot')\n",
300 | "plt.figure()\n",
301 | "plt.plot(np.arange(num_epochs), history.history[\"loss\"], label=\"train_loss\")\n",
302 | "plt.plot(np.arange(num_epochs), history.history[\"val_loss\"], label=\"val_loss\")\n",
303 | "plt.plot(np.arange(num_epochs), history.history[\"accuracy\"], label=\"train_acc\")\n",
304 | "plt.plot(np.arange(num_epochs), history.history[\"val_accuracy\"], label=\"val_acc\")\n",
305 | "plt.title(\"Accuracy and Loss Curves\")\n",
306 | "plt.xlabel(\"Epoch #\")\n",
307 | "plt.ylabel(\"Accuracy/Loss\")\n",
308 | "plt.legend()\n",
309 | "plt.show()\n"
310 | ],
311 | "execution_count": 7,
312 | "outputs": [
313 | {
314 | "output_type": "display_data",
315 | "data": {
316 | "image/png": "\n",
317 | "text/plain": [
318 | ""
319 | ]
320 | },
321 | "metadata": {
322 | "tags": []
323 | }
324 | }
325 | ]
326 | }
327 | ]
328 | }
--------------------------------------------------------------------------------
/CIFAR10_Image_Classifier/Readme.md:
--------------------------------------------------------------------------------
1 | # This project is an Image Classifier for the CIFAR-10 dataset containing 10 different image classes - airplanes, automobiles, birds, cats, deer, dogs, frogs, horses, ships, and trucks.
2 |
3 | 
4 | [*Image Source*](https://www.cs.toronto.edu/~kriz/cifar.html)
5 |
6 | ## Project folder includes
7 | 1. Readme.md (this file)
8 | 2. Google colab notebook for building, training and testing a Convolutional Neural Network on the CIFAR-10 dataset.
9 | 3. Misc
10 | - CIFAR-10 overview image
11 |
12 | ## Programming language and Libraries used
13 | 1. Python programming language
14 | 2. Deep learning libraries tensorflow 2.x and Keras API
15 | 3. Python libraries including numpy, etc
16 |
17 | ## Notes
18 | 1. Purpose of this project is to demonstrate how to build, train and evaluate a simple CNN model for classifying images in the cifar-10 dataset
19 |
--------------------------------------------------------------------------------
/CIFAR10_Image_Classifier/cifar10_img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/CIFAR10_Image_Classifier/cifar10_img.png
--------------------------------------------------------------------------------
/ConvolutionalNeuralNetworkVisualizer/Readme.md:
--------------------------------------------------------------------------------
1 | # This project is a Convolutional Neural Network Visualizer for visualizing outputs from inner convolutional layers within a CNN model.
2 |
3 | 
4 | [*Image Source*](https://i.pinimg.com/originals/73/fd/d4/73fdd4752a176af8f388b31a67e93d87.jpg)
5 |
6 | ## Project folder includes
7 | 1. Readme.md (this file)
8 | 2. Google colab notebook for visualizing the CNN layer outputs on the Cool Cat image
9 | 3. Misc
10 | - Cool Cat Image -
11 | In Zambia’s South Luangwa National Park, a photographer had been watching a pride of lions while they slept off a feast from a buffalo kill. When this female walked away, he anticipated that she might be going for a drink and so he positioned his vehicle on the opposite side of the waterhole. The cool cat picture is one of the highly commended 2018 Image from Wildlife Photographer of the Year.
12 |
13 | ## Programming language and Libraries used
14 | 1. Python programming language
15 | 2. Deep learning libraries tensorflow 2.x and Keras API
16 | 3. Python libraries including numpy, etc
17 |
18 | ## Notes
19 | 1. Purpose of this project is to demonstrate how to use the functional api in keras to visualize outputs from inner layers within a CNN.
20 |
--------------------------------------------------------------------------------
/ConvolutionalNeuralNetworkVisualizer/cool_cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/ConvolutionalNeuralNetworkVisualizer/cool_cat.jpg
--------------------------------------------------------------------------------
/FaceDetectionOpenCV/Readme.md:
--------------------------------------------------------------------------------
1 | # Real-Time Face Detection Using OpenCV
2 | 1. face_detection_ssd_parallel.py script is used for performing real-time face detection on input stream from webcam connected to a laptop or desktop
3 | 2. Pre-trained deep learning model for face detection from OpenCV is used Link
4 | 3. Imutils library is used for reading frames from webcam in a multi-threaded approach for achieving higher FPS Link
5 | 4. Model architecture is a Single Shot Detector (SSD) framework with a ResNet backbone
6 | 5. Model files are in caffe format
7 | * deploy.prototxt.txt - defines model architecture
8 | * res10_300x300_ssd_iter_140000.caffemodel - contains trained model weights
9 | 6. Model files can also be directly downloaded from OpenCV repository. Useful links: Link1 Link2 Link3
10 |
11 | ## Programming language and libraries used
12 | 1. Python programming language
13 | 2. OpenCV library
14 | 3. Imutils library (install using -> pip install imutils)
15 | 4. Other Python libraries including Numpy library
16 |
17 |
--------------------------------------------------------------------------------
/FaceDetectionOpenCV/deploy.prototxt.txt:
--------------------------------------------------------------------------------
1 | input: "data"
2 | input_shape {
3 | dim: 1
4 | dim: 3
5 | dim: 300
6 | dim: 300
7 | }
8 |
9 | layer {
10 | name: "data_bn"
11 | type: "BatchNorm"
12 | bottom: "data"
13 | top: "data_bn"
14 | param {
15 | lr_mult: 0.0
16 | }
17 | param {
18 | lr_mult: 0.0
19 | }
20 | param {
21 | lr_mult: 0.0
22 | }
23 | }
24 | layer {
25 | name: "data_scale"
26 | type: "Scale"
27 | bottom: "data_bn"
28 | top: "data_bn"
29 | param {
30 | lr_mult: 1.0
31 | decay_mult: 1.0
32 | }
33 | param {
34 | lr_mult: 2.0
35 | decay_mult: 1.0
36 | }
37 | scale_param {
38 | bias_term: true
39 | }
40 | }
41 | layer {
42 | name: "conv1_h"
43 | type: "Convolution"
44 | bottom: "data_bn"
45 | top: "conv1_h"
46 | param {
47 | lr_mult: 1.0
48 | decay_mult: 1.0
49 | }
50 | param {
51 | lr_mult: 2.0
52 | decay_mult: 1.0
53 | }
54 | convolution_param {
55 | num_output: 32
56 | pad: 3
57 | kernel_size: 7
58 | stride: 2
59 | weight_filler {
60 | type: "msra"
61 | variance_norm: FAN_OUT
62 | }
63 | bias_filler {
64 | type: "constant"
65 | value: 0.0
66 | }
67 | }
68 | }
69 | layer {
70 | name: "conv1_bn_h"
71 | type: "BatchNorm"
72 | bottom: "conv1_h"
73 | top: "conv1_h"
74 | param {
75 | lr_mult: 0.0
76 | }
77 | param {
78 | lr_mult: 0.0
79 | }
80 | param {
81 | lr_mult: 0.0
82 | }
83 | }
84 | layer {
85 | name: "conv1_scale_h"
86 | type: "Scale"
87 | bottom: "conv1_h"
88 | top: "conv1_h"
89 | param {
90 | lr_mult: 1.0
91 | decay_mult: 1.0
92 | }
93 | param {
94 | lr_mult: 2.0
95 | decay_mult: 1.0
96 | }
97 | scale_param {
98 | bias_term: true
99 | }
100 | }
101 | layer {
102 | name: "conv1_relu"
103 | type: "ReLU"
104 | bottom: "conv1_h"
105 | top: "conv1_h"
106 | }
107 | layer {
108 | name: "conv1_pool"
109 | type: "Pooling"
110 | bottom: "conv1_h"
111 | top: "conv1_pool"
112 | pooling_param {
113 | kernel_size: 3
114 | stride: 2
115 | }
116 | }
117 | layer {
118 | name: "layer_64_1_conv1_h"
119 | type: "Convolution"
120 | bottom: "conv1_pool"
121 | top: "layer_64_1_conv1_h"
122 | param {
123 | lr_mult: 1.0
124 | decay_mult: 1.0
125 | }
126 | convolution_param {
127 | num_output: 32
128 | bias_term: false
129 | pad: 1
130 | kernel_size: 3
131 | stride: 1
132 | weight_filler {
133 | type: "msra"
134 | }
135 | bias_filler {
136 | type: "constant"
137 | value: 0.0
138 | }
139 | }
140 | }
141 | layer {
142 | name: "layer_64_1_bn2_h"
143 | type: "BatchNorm"
144 | bottom: "layer_64_1_conv1_h"
145 | top: "layer_64_1_conv1_h"
146 | param {
147 | lr_mult: 0.0
148 | }
149 | param {
150 | lr_mult: 0.0
151 | }
152 | param {
153 | lr_mult: 0.0
154 | }
155 | }
156 | layer {
157 | name: "layer_64_1_scale2_h"
158 | type: "Scale"
159 | bottom: "layer_64_1_conv1_h"
160 | top: "layer_64_1_conv1_h"
161 | param {
162 | lr_mult: 1.0
163 | decay_mult: 1.0
164 | }
165 | param {
166 | lr_mult: 2.0
167 | decay_mult: 1.0
168 | }
169 | scale_param {
170 | bias_term: true
171 | }
172 | }
173 | layer {
174 | name: "layer_64_1_relu2"
175 | type: "ReLU"
176 | bottom: "layer_64_1_conv1_h"
177 | top: "layer_64_1_conv1_h"
178 | }
179 | layer {
180 | name: "layer_64_1_conv2_h"
181 | type: "Convolution"
182 | bottom: "layer_64_1_conv1_h"
183 | top: "layer_64_1_conv2_h"
184 | param {
185 | lr_mult: 1.0
186 | decay_mult: 1.0
187 | }
188 | convolution_param {
189 | num_output: 32
190 | bias_term: false
191 | pad: 1
192 | kernel_size: 3
193 | stride: 1
194 | weight_filler {
195 | type: "msra"
196 | }
197 | bias_filler {
198 | type: "constant"
199 | value: 0.0
200 | }
201 | }
202 | }
203 | layer {
204 | name: "layer_64_1_sum"
205 | type: "Eltwise"
206 | bottom: "layer_64_1_conv2_h"
207 | bottom: "conv1_pool"
208 | top: "layer_64_1_sum"
209 | }
210 | layer {
211 | name: "layer_128_1_bn1_h"
212 | type: "BatchNorm"
213 | bottom: "layer_64_1_sum"
214 | top: "layer_128_1_bn1_h"
215 | param {
216 | lr_mult: 0.0
217 | }
218 | param {
219 | lr_mult: 0.0
220 | }
221 | param {
222 | lr_mult: 0.0
223 | }
224 | }
225 | layer {
226 | name: "layer_128_1_scale1_h"
227 | type: "Scale"
228 | bottom: "layer_128_1_bn1_h"
229 | top: "layer_128_1_bn1_h"
230 | param {
231 | lr_mult: 1.0
232 | decay_mult: 1.0
233 | }
234 | param {
235 | lr_mult: 2.0
236 | decay_mult: 1.0
237 | }
238 | scale_param {
239 | bias_term: true
240 | }
241 | }
242 | layer {
243 | name: "layer_128_1_relu1"
244 | type: "ReLU"
245 | bottom: "layer_128_1_bn1_h"
246 | top: "layer_128_1_bn1_h"
247 | }
248 | layer {
249 | name: "layer_128_1_conv1_h"
250 | type: "Convolution"
251 | bottom: "layer_128_1_bn1_h"
252 | top: "layer_128_1_conv1_h"
253 | param {
254 | lr_mult: 1.0
255 | decay_mult: 1.0
256 | }
257 | convolution_param {
258 | num_output: 128
259 | bias_term: false
260 | pad: 1
261 | kernel_size: 3
262 | stride: 2
263 | weight_filler {
264 | type: "msra"
265 | }
266 | bias_filler {
267 | type: "constant"
268 | value: 0.0
269 | }
270 | }
271 | }
272 | layer {
273 | name: "layer_128_1_bn2"
274 | type: "BatchNorm"
275 | bottom: "layer_128_1_conv1_h"
276 | top: "layer_128_1_conv1_h"
277 | param {
278 | lr_mult: 0.0
279 | }
280 | param {
281 | lr_mult: 0.0
282 | }
283 | param {
284 | lr_mult: 0.0
285 | }
286 | }
287 | layer {
288 | name: "layer_128_1_scale2"
289 | type: "Scale"
290 | bottom: "layer_128_1_conv1_h"
291 | top: "layer_128_1_conv1_h"
292 | param {
293 | lr_mult: 1.0
294 | decay_mult: 1.0
295 | }
296 | param {
297 | lr_mult: 2.0
298 | decay_mult: 1.0
299 | }
300 | scale_param {
301 | bias_term: true
302 | }
303 | }
304 | layer {
305 | name: "layer_128_1_relu2"
306 | type: "ReLU"
307 | bottom: "layer_128_1_conv1_h"
308 | top: "layer_128_1_conv1_h"
309 | }
310 | layer {
311 | name: "layer_128_1_conv2"
312 | type: "Convolution"
313 | bottom: "layer_128_1_conv1_h"
314 | top: "layer_128_1_conv2"
315 | param {
316 | lr_mult: 1.0
317 | decay_mult: 1.0
318 | }
319 | convolution_param {
320 | num_output: 128
321 | bias_term: false
322 | pad: 1
323 | kernel_size: 3
324 | stride: 1
325 | weight_filler {
326 | type: "msra"
327 | }
328 | bias_filler {
329 | type: "constant"
330 | value: 0.0
331 | }
332 | }
333 | }
334 | layer {
335 | name: "layer_128_1_conv_expand_h"
336 | type: "Convolution"
337 | bottom: "layer_128_1_bn1_h"
338 | top: "layer_128_1_conv_expand_h"
339 | param {
340 | lr_mult: 1.0
341 | decay_mult: 1.0
342 | }
343 | convolution_param {
344 | num_output: 128
345 | bias_term: false
346 | pad: 0
347 | kernel_size: 1
348 | stride: 2
349 | weight_filler {
350 | type: "msra"
351 | }
352 | bias_filler {
353 | type: "constant"
354 | value: 0.0
355 | }
356 | }
357 | }
358 | layer {
359 | name: "layer_128_1_sum"
360 | type: "Eltwise"
361 | bottom: "layer_128_1_conv2"
362 | bottom: "layer_128_1_conv_expand_h"
363 | top: "layer_128_1_sum"
364 | }
365 | layer {
366 | name: "layer_256_1_bn1"
367 | type: "BatchNorm"
368 | bottom: "layer_128_1_sum"
369 | top: "layer_256_1_bn1"
370 | param {
371 | lr_mult: 0.0
372 | }
373 | param {
374 | lr_mult: 0.0
375 | }
376 | param {
377 | lr_mult: 0.0
378 | }
379 | }
380 | layer {
381 | name: "layer_256_1_scale1"
382 | type: "Scale"
383 | bottom: "layer_256_1_bn1"
384 | top: "layer_256_1_bn1"
385 | param {
386 | lr_mult: 1.0
387 | decay_mult: 1.0
388 | }
389 | param {
390 | lr_mult: 2.0
391 | decay_mult: 1.0
392 | }
393 | scale_param {
394 | bias_term: true
395 | }
396 | }
397 | layer {
398 | name: "layer_256_1_relu1"
399 | type: "ReLU"
400 | bottom: "layer_256_1_bn1"
401 | top: "layer_256_1_bn1"
402 | }
403 | layer {
404 | name: "layer_256_1_conv1"
405 | type: "Convolution"
406 | bottom: "layer_256_1_bn1"
407 | top: "layer_256_1_conv1"
408 | param {
409 | lr_mult: 1.0
410 | decay_mult: 1.0
411 | }
412 | convolution_param {
413 | num_output: 256
414 | bias_term: false
415 | pad: 1
416 | kernel_size: 3
417 | stride: 2
418 | weight_filler {
419 | type: "msra"
420 | }
421 | bias_filler {
422 | type: "constant"
423 | value: 0.0
424 | }
425 | }
426 | }
427 | layer {
428 | name: "layer_256_1_bn2"
429 | type: "BatchNorm"
430 | bottom: "layer_256_1_conv1"
431 | top: "layer_256_1_conv1"
432 | param {
433 | lr_mult: 0.0
434 | }
435 | param {
436 | lr_mult: 0.0
437 | }
438 | param {
439 | lr_mult: 0.0
440 | }
441 | }
442 | layer {
443 | name: "layer_256_1_scale2"
444 | type: "Scale"
445 | bottom: "layer_256_1_conv1"
446 | top: "layer_256_1_conv1"
447 | param {
448 | lr_mult: 1.0
449 | decay_mult: 1.0
450 | }
451 | param {
452 | lr_mult: 2.0
453 | decay_mult: 1.0
454 | }
455 | scale_param {
456 | bias_term: true
457 | }
458 | }
459 | layer {
460 | name: "layer_256_1_relu2"
461 | type: "ReLU"
462 | bottom: "layer_256_1_conv1"
463 | top: "layer_256_1_conv1"
464 | }
465 | layer {
466 | name: "layer_256_1_conv2"
467 | type: "Convolution"
468 | bottom: "layer_256_1_conv1"
469 | top: "layer_256_1_conv2"
470 | param {
471 | lr_mult: 1.0
472 | decay_mult: 1.0
473 | }
474 | convolution_param {
475 | num_output: 256
476 | bias_term: false
477 | pad: 1
478 | kernel_size: 3
479 | stride: 1
480 | weight_filler {
481 | type: "msra"
482 | }
483 | bias_filler {
484 | type: "constant"
485 | value: 0.0
486 | }
487 | }
488 | }
489 | layer {
490 | name: "layer_256_1_conv_expand"
491 | type: "Convolution"
492 | bottom: "layer_256_1_bn1"
493 | top: "layer_256_1_conv_expand"
494 | param {
495 | lr_mult: 1.0
496 | decay_mult: 1.0
497 | }
498 | convolution_param {
499 | num_output: 256
500 | bias_term: false
501 | pad: 0
502 | kernel_size: 1
503 | stride: 2
504 | weight_filler {
505 | type: "msra"
506 | }
507 | bias_filler {
508 | type: "constant"
509 | value: 0.0
510 | }
511 | }
512 | }
513 | layer {
514 | name: "layer_256_1_sum"
515 | type: "Eltwise"
516 | bottom: "layer_256_1_conv2"
517 | bottom: "layer_256_1_conv_expand"
518 | top: "layer_256_1_sum"
519 | }
520 | layer {
521 | name: "layer_512_1_bn1"
522 | type: "BatchNorm"
523 | bottom: "layer_256_1_sum"
524 | top: "layer_512_1_bn1"
525 | param {
526 | lr_mult: 0.0
527 | }
528 | param {
529 | lr_mult: 0.0
530 | }
531 | param {
532 | lr_mult: 0.0
533 | }
534 | }
535 | layer {
536 | name: "layer_512_1_scale1"
537 | type: "Scale"
538 | bottom: "layer_512_1_bn1"
539 | top: "layer_512_1_bn1"
540 | param {
541 | lr_mult: 1.0
542 | decay_mult: 1.0
543 | }
544 | param {
545 | lr_mult: 2.0
546 | decay_mult: 1.0
547 | }
548 | scale_param {
549 | bias_term: true
550 | }
551 | }
552 | layer {
553 | name: "layer_512_1_relu1"
554 | type: "ReLU"
555 | bottom: "layer_512_1_bn1"
556 | top: "layer_512_1_bn1"
557 | }
558 | layer {
559 | name: "layer_512_1_conv1_h"
560 | type: "Convolution"
561 | bottom: "layer_512_1_bn1"
562 | top: "layer_512_1_conv1_h"
563 | param {
564 | lr_mult: 1.0
565 | decay_mult: 1.0
566 | }
567 | convolution_param {
568 | num_output: 128
569 | bias_term: false
570 | pad: 1
571 | kernel_size: 3
572 | stride: 1 # 2
573 | weight_filler {
574 | type: "msra"
575 | }
576 | bias_filler {
577 | type: "constant"
578 | value: 0.0
579 | }
580 | }
581 | }
582 | layer {
583 | name: "layer_512_1_bn2_h"
584 | type: "BatchNorm"
585 | bottom: "layer_512_1_conv1_h"
586 | top: "layer_512_1_conv1_h"
587 | param {
588 | lr_mult: 0.0
589 | }
590 | param {
591 | lr_mult: 0.0
592 | }
593 | param {
594 | lr_mult: 0.0
595 | }
596 | }
597 | layer {
598 | name: "layer_512_1_scale2_h"
599 | type: "Scale"
600 | bottom: "layer_512_1_conv1_h"
601 | top: "layer_512_1_conv1_h"
602 | param {
603 | lr_mult: 1.0
604 | decay_mult: 1.0
605 | }
606 | param {
607 | lr_mult: 2.0
608 | decay_mult: 1.0
609 | }
610 | scale_param {
611 | bias_term: true
612 | }
613 | }
614 | layer {
615 | name: "layer_512_1_relu2"
616 | type: "ReLU"
617 | bottom: "layer_512_1_conv1_h"
618 | top: "layer_512_1_conv1_h"
619 | }
620 | layer {
621 | name: "layer_512_1_conv2_h"
622 | type: "Convolution"
623 | bottom: "layer_512_1_conv1_h"
624 | top: "layer_512_1_conv2_h"
625 | param {
626 | lr_mult: 1.0
627 | decay_mult: 1.0
628 | }
629 | convolution_param {
630 | num_output: 256
631 | bias_term: false
632 | pad: 2 # 1
633 | kernel_size: 3
634 | stride: 1
635 | dilation: 2
636 | weight_filler {
637 | type: "msra"
638 | }
639 | bias_filler {
640 | type: "constant"
641 | value: 0.0
642 | }
643 | }
644 | }
645 | layer {
646 | name: "layer_512_1_conv_expand_h"
647 | type: "Convolution"
648 | bottom: "layer_512_1_bn1"
649 | top: "layer_512_1_conv_expand_h"
650 | param {
651 | lr_mult: 1.0
652 | decay_mult: 1.0
653 | }
654 | convolution_param {
655 | num_output: 256
656 | bias_term: false
657 | pad: 0
658 | kernel_size: 1
659 | stride: 1 # 2
660 | weight_filler {
661 | type: "msra"
662 | }
663 | bias_filler {
664 | type: "constant"
665 | value: 0.0
666 | }
667 | }
668 | }
669 | layer {
670 | name: "layer_512_1_sum"
671 | type: "Eltwise"
672 | bottom: "layer_512_1_conv2_h"
673 | bottom: "layer_512_1_conv_expand_h"
674 | top: "layer_512_1_sum"
675 | }
676 | layer {
677 | name: "last_bn_h"
678 | type: "BatchNorm"
679 | bottom: "layer_512_1_sum"
680 | top: "layer_512_1_sum"
681 | param {
682 | lr_mult: 0.0
683 | }
684 | param {
685 | lr_mult: 0.0
686 | }
687 | param {
688 | lr_mult: 0.0
689 | }
690 | }
691 | layer {
692 | name: "last_scale_h"
693 | type: "Scale"
694 | bottom: "layer_512_1_sum"
695 | top: "layer_512_1_sum"
696 | param {
697 | lr_mult: 1.0
698 | decay_mult: 1.0
699 | }
700 | param {
701 | lr_mult: 2.0
702 | decay_mult: 1.0
703 | }
704 | scale_param {
705 | bias_term: true
706 | }
707 | }
708 | layer {
709 | name: "last_relu"
710 | type: "ReLU"
711 | bottom: "layer_512_1_sum"
712 | top: "fc7"
713 | }
714 |
715 | layer {
716 | name: "conv6_1_h"
717 | type: "Convolution"
718 | bottom: "fc7"
719 | top: "conv6_1_h"
720 | param {
721 | lr_mult: 1
722 | decay_mult: 1
723 | }
724 | param {
725 | lr_mult: 2
726 | decay_mult: 0
727 | }
728 | convolution_param {
729 | num_output: 128
730 | pad: 0
731 | kernel_size: 1
732 | stride: 1
733 | weight_filler {
734 | type: "xavier"
735 | }
736 | bias_filler {
737 | type: "constant"
738 | value: 0
739 | }
740 | }
741 | }
742 | layer {
743 | name: "conv6_1_relu"
744 | type: "ReLU"
745 | bottom: "conv6_1_h"
746 | top: "conv6_1_h"
747 | }
748 | layer {
749 | name: "conv6_2_h"
750 | type: "Convolution"
751 | bottom: "conv6_1_h"
752 | top: "conv6_2_h"
753 | param {
754 | lr_mult: 1
755 | decay_mult: 1
756 | }
757 | param {
758 | lr_mult: 2
759 | decay_mult: 0
760 | }
761 | convolution_param {
762 | num_output: 256
763 | pad: 1
764 | kernel_size: 3
765 | stride: 2
766 | weight_filler {
767 | type: "xavier"
768 | }
769 | bias_filler {
770 | type: "constant"
771 | value: 0
772 | }
773 | }
774 | }
775 | layer {
776 | name: "conv6_2_relu"
777 | type: "ReLU"
778 | bottom: "conv6_2_h"
779 | top: "conv6_2_h"
780 | }
781 | layer {
782 | name: "conv7_1_h"
783 | type: "Convolution"
784 | bottom: "conv6_2_h"
785 | top: "conv7_1_h"
786 | param {
787 | lr_mult: 1
788 | decay_mult: 1
789 | }
790 | param {
791 | lr_mult: 2
792 | decay_mult: 0
793 | }
794 | convolution_param {
795 | num_output: 64
796 | pad: 0
797 | kernel_size: 1
798 | stride: 1
799 | weight_filler {
800 | type: "xavier"
801 | }
802 | bias_filler {
803 | type: "constant"
804 | value: 0
805 | }
806 | }
807 | }
808 | layer {
809 | name: "conv7_1_relu"
810 | type: "ReLU"
811 | bottom: "conv7_1_h"
812 | top: "conv7_1_h"
813 | }
814 | layer {
815 | name: "conv7_2_h"
816 | type: "Convolution"
817 | bottom: "conv7_1_h"
818 | top: "conv7_2_h"
819 | param {
820 | lr_mult: 1
821 | decay_mult: 1
822 | }
823 | param {
824 | lr_mult: 2
825 | decay_mult: 0
826 | }
827 | convolution_param {
828 | num_output: 128
829 | pad: 1
830 | kernel_size: 3
831 | stride: 2
832 | weight_filler {
833 | type: "xavier"
834 | }
835 | bias_filler {
836 | type: "constant"
837 | value: 0
838 | }
839 | }
840 | }
841 | layer {
842 | name: "conv7_2_relu"
843 | type: "ReLU"
844 | bottom: "conv7_2_h"
845 | top: "conv7_2_h"
846 | }
847 | layer {
848 | name: "conv8_1_h"
849 | type: "Convolution"
850 | bottom: "conv7_2_h"
851 | top: "conv8_1_h"
852 | param {
853 | lr_mult: 1
854 | decay_mult: 1
855 | }
856 | param {
857 | lr_mult: 2
858 | decay_mult: 0
859 | }
860 | convolution_param {
861 | num_output: 64
862 | pad: 0
863 | kernel_size: 1
864 | stride: 1
865 | weight_filler {
866 | type: "xavier"
867 | }
868 | bias_filler {
869 | type: "constant"
870 | value: 0
871 | }
872 | }
873 | }
874 | layer {
875 | name: "conv8_1_relu"
876 | type: "ReLU"
877 | bottom: "conv8_1_h"
878 | top: "conv8_1_h"
879 | }
880 | layer {
881 | name: "conv8_2_h"
882 | type: "Convolution"
883 | bottom: "conv8_1_h"
884 | top: "conv8_2_h"
885 | param {
886 | lr_mult: 1
887 | decay_mult: 1
888 | }
889 | param {
890 | lr_mult: 2
891 | decay_mult: 0
892 | }
893 | convolution_param {
894 | num_output: 128
895 | pad: 1
896 | kernel_size: 3
897 | stride: 1
898 | weight_filler {
899 | type: "xavier"
900 | }
901 | bias_filler {
902 | type: "constant"
903 | value: 0
904 | }
905 | }
906 | }
907 | layer {
908 | name: "conv8_2_relu"
909 | type: "ReLU"
910 | bottom: "conv8_2_h"
911 | top: "conv8_2_h"
912 | }
913 | layer {
914 | name: "conv9_1_h"
915 | type: "Convolution"
916 | bottom: "conv8_2_h"
917 | top: "conv9_1_h"
918 | param {
919 | lr_mult: 1
920 | decay_mult: 1
921 | }
922 | param {
923 | lr_mult: 2
924 | decay_mult: 0
925 | }
926 | convolution_param {
927 | num_output: 64
928 | pad: 0
929 | kernel_size: 1
930 | stride: 1
931 | weight_filler {
932 | type: "xavier"
933 | }
934 | bias_filler {
935 | type: "constant"
936 | value: 0
937 | }
938 | }
939 | }
940 | layer {
941 | name: "conv9_1_relu"
942 | type: "ReLU"
943 | bottom: "conv9_1_h"
944 | top: "conv9_1_h"
945 | }
946 | layer {
947 | name: "conv9_2_h"
948 | type: "Convolution"
949 | bottom: "conv9_1_h"
950 | top: "conv9_2_h"
951 | param {
952 | lr_mult: 1
953 | decay_mult: 1
954 | }
955 | param {
956 | lr_mult: 2
957 | decay_mult: 0
958 | }
959 | convolution_param {
960 | num_output: 128
961 | pad: 1
962 | kernel_size: 3
963 | stride: 1
964 | weight_filler {
965 | type: "xavier"
966 | }
967 | bias_filler {
968 | type: "constant"
969 | value: 0
970 | }
971 | }
972 | }
973 | layer {
974 | name: "conv9_2_relu"
975 | type: "ReLU"
976 | bottom: "conv9_2_h"
977 | top: "conv9_2_h"
978 | }
979 | layer {
980 | name: "conv4_3_norm"
981 | type: "Normalize"
982 | bottom: "layer_256_1_bn1"
983 | top: "conv4_3_norm"
984 | norm_param {
985 | across_spatial: false
986 | scale_filler {
987 | type: "constant"
988 | value: 20
989 | }
990 | channel_shared: false
991 | }
992 | }
993 | layer {
994 | name: "conv4_3_norm_mbox_loc"
995 | type: "Convolution"
996 | bottom: "conv4_3_norm"
997 | top: "conv4_3_norm_mbox_loc"
998 | param {
999 | lr_mult: 1
1000 | decay_mult: 1
1001 | }
1002 | param {
1003 | lr_mult: 2
1004 | decay_mult: 0
1005 | }
1006 | convolution_param {
1007 | num_output: 16
1008 | pad: 1
1009 | kernel_size: 3
1010 | stride: 1
1011 | weight_filler {
1012 | type: "xavier"
1013 | }
1014 | bias_filler {
1015 | type: "constant"
1016 | value: 0
1017 | }
1018 | }
1019 | }
1020 | layer {
1021 | name: "conv4_3_norm_mbox_loc_perm"
1022 | type: "Permute"
1023 | bottom: "conv4_3_norm_mbox_loc"
1024 | top: "conv4_3_norm_mbox_loc_perm"
1025 | permute_param {
1026 | order: 0
1027 | order: 2
1028 | order: 3
1029 | order: 1
1030 | }
1031 | }
1032 | layer {
1033 | name: "conv4_3_norm_mbox_loc_flat"
1034 | type: "Flatten"
1035 | bottom: "conv4_3_norm_mbox_loc_perm"
1036 | top: "conv4_3_norm_mbox_loc_flat"
1037 | flatten_param {
1038 | axis: 1
1039 | }
1040 | }
1041 | layer {
1042 | name: "conv4_3_norm_mbox_conf"
1043 | type: "Convolution"
1044 | bottom: "conv4_3_norm"
1045 | top: "conv4_3_norm_mbox_conf"
1046 | param {
1047 | lr_mult: 1
1048 | decay_mult: 1
1049 | }
1050 | param {
1051 | lr_mult: 2
1052 | decay_mult: 0
1053 | }
1054 | convolution_param {
1055 | num_output: 8 # 84
1056 | pad: 1
1057 | kernel_size: 3
1058 | stride: 1
1059 | weight_filler {
1060 | type: "xavier"
1061 | }
1062 | bias_filler {
1063 | type: "constant"
1064 | value: 0
1065 | }
1066 | }
1067 | }
1068 | layer {
1069 | name: "conv4_3_norm_mbox_conf_perm"
1070 | type: "Permute"
1071 | bottom: "conv4_3_norm_mbox_conf"
1072 | top: "conv4_3_norm_mbox_conf_perm"
1073 | permute_param {
1074 | order: 0
1075 | order: 2
1076 | order: 3
1077 | order: 1
1078 | }
1079 | }
1080 | layer {
1081 | name: "conv4_3_norm_mbox_conf_flat"
1082 | type: "Flatten"
1083 | bottom: "conv4_3_norm_mbox_conf_perm"
1084 | top: "conv4_3_norm_mbox_conf_flat"
1085 | flatten_param {
1086 | axis: 1
1087 | }
1088 | }
1089 | layer {
1090 | name: "conv4_3_norm_mbox_priorbox"
1091 | type: "PriorBox"
1092 | bottom: "conv4_3_norm"
1093 | bottom: "data"
1094 | top: "conv4_3_norm_mbox_priorbox"
1095 | prior_box_param {
1096 | min_size: 30.0
1097 | max_size: 60.0
1098 | aspect_ratio: 2
1099 | flip: true
1100 | clip: false
1101 | variance: 0.1
1102 | variance: 0.1
1103 | variance: 0.2
1104 | variance: 0.2
1105 | step: 8
1106 | offset: 0.5
1107 | }
1108 | }
1109 | layer {
1110 | name: "fc7_mbox_loc"
1111 | type: "Convolution"
1112 | bottom: "fc7"
1113 | top: "fc7_mbox_loc"
1114 | param {
1115 | lr_mult: 1
1116 | decay_mult: 1
1117 | }
1118 | param {
1119 | lr_mult: 2
1120 | decay_mult: 0
1121 | }
1122 | convolution_param {
1123 | num_output: 24
1124 | pad: 1
1125 | kernel_size: 3
1126 | stride: 1
1127 | weight_filler {
1128 | type: "xavier"
1129 | }
1130 | bias_filler {
1131 | type: "constant"
1132 | value: 0
1133 | }
1134 | }
1135 | }
1136 | layer {
1137 | name: "fc7_mbox_loc_perm"
1138 | type: "Permute"
1139 | bottom: "fc7_mbox_loc"
1140 | top: "fc7_mbox_loc_perm"
1141 | permute_param {
1142 | order: 0
1143 | order: 2
1144 | order: 3
1145 | order: 1
1146 | }
1147 | }
1148 | layer {
1149 | name: "fc7_mbox_loc_flat"
1150 | type: "Flatten"
1151 | bottom: "fc7_mbox_loc_perm"
1152 | top: "fc7_mbox_loc_flat"
1153 | flatten_param {
1154 | axis: 1
1155 | }
1156 | }
1157 | layer {
1158 | name: "fc7_mbox_conf"
1159 | type: "Convolution"
1160 | bottom: "fc7"
1161 | top: "fc7_mbox_conf"
1162 | param {
1163 | lr_mult: 1
1164 | decay_mult: 1
1165 | }
1166 | param {
1167 | lr_mult: 2
1168 | decay_mult: 0
1169 | }
1170 | convolution_param {
1171 | num_output: 12 # 126
1172 | pad: 1
1173 | kernel_size: 3
1174 | stride: 1
1175 | weight_filler {
1176 | type: "xavier"
1177 | }
1178 | bias_filler {
1179 | type: "constant"
1180 | value: 0
1181 | }
1182 | }
1183 | }
1184 | layer {
1185 | name: "fc7_mbox_conf_perm"
1186 | type: "Permute"
1187 | bottom: "fc7_mbox_conf"
1188 | top: "fc7_mbox_conf_perm"
1189 | permute_param {
1190 | order: 0
1191 | order: 2
1192 | order: 3
1193 | order: 1
1194 | }
1195 | }
1196 | layer {
1197 | name: "fc7_mbox_conf_flat"
1198 | type: "Flatten"
1199 | bottom: "fc7_mbox_conf_perm"
1200 | top: "fc7_mbox_conf_flat"
1201 | flatten_param {
1202 | axis: 1
1203 | }
1204 | }
1205 | layer {
1206 | name: "fc7_mbox_priorbox"
1207 | type: "PriorBox"
1208 | bottom: "fc7"
1209 | bottom: "data"
1210 | top: "fc7_mbox_priorbox"
1211 | prior_box_param {
1212 | min_size: 60.0
1213 | max_size: 111.0
1214 | aspect_ratio: 2
1215 | aspect_ratio: 3
1216 | flip: true
1217 | clip: false
1218 | variance: 0.1
1219 | variance: 0.1
1220 | variance: 0.2
1221 | variance: 0.2
1222 | step: 16
1223 | offset: 0.5
1224 | }
1225 | }
1226 | layer {
1227 | name: "conv6_2_mbox_loc"
1228 | type: "Convolution"
1229 | bottom: "conv6_2_h"
1230 | top: "conv6_2_mbox_loc"
1231 | param {
1232 | lr_mult: 1
1233 | decay_mult: 1
1234 | }
1235 | param {
1236 | lr_mult: 2
1237 | decay_mult: 0
1238 | }
1239 | convolution_param {
1240 | num_output: 24
1241 | pad: 1
1242 | kernel_size: 3
1243 | stride: 1
1244 | weight_filler {
1245 | type: "xavier"
1246 | }
1247 | bias_filler {
1248 | type: "constant"
1249 | value: 0
1250 | }
1251 | }
1252 | }
1253 | layer {
1254 | name: "conv6_2_mbox_loc_perm"
1255 | type: "Permute"
1256 | bottom: "conv6_2_mbox_loc"
1257 | top: "conv6_2_mbox_loc_perm"
1258 | permute_param {
1259 | order: 0
1260 | order: 2
1261 | order: 3
1262 | order: 1
1263 | }
1264 | }
1265 | layer {
1266 | name: "conv6_2_mbox_loc_flat"
1267 | type: "Flatten"
1268 | bottom: "conv6_2_mbox_loc_perm"
1269 | top: "conv6_2_mbox_loc_flat"
1270 | flatten_param {
1271 | axis: 1
1272 | }
1273 | }
1274 | layer {
1275 | name: "conv6_2_mbox_conf"
1276 | type: "Convolution"
1277 | bottom: "conv6_2_h"
1278 | top: "conv6_2_mbox_conf"
1279 | param {
1280 | lr_mult: 1
1281 | decay_mult: 1
1282 | }
1283 | param {
1284 | lr_mult: 2
1285 | decay_mult: 0
1286 | }
1287 | convolution_param {
1288 | num_output: 12 # 126
1289 | pad: 1
1290 | kernel_size: 3
1291 | stride: 1
1292 | weight_filler {
1293 | type: "xavier"
1294 | }
1295 | bias_filler {
1296 | type: "constant"
1297 | value: 0
1298 | }
1299 | }
1300 | }
1301 | layer {
1302 | name: "conv6_2_mbox_conf_perm"
1303 | type: "Permute"
1304 | bottom: "conv6_2_mbox_conf"
1305 | top: "conv6_2_mbox_conf_perm"
1306 | permute_param {
1307 | order: 0
1308 | order: 2
1309 | order: 3
1310 | order: 1
1311 | }
1312 | }
1313 | layer {
1314 | name: "conv6_2_mbox_conf_flat"
1315 | type: "Flatten"
1316 | bottom: "conv6_2_mbox_conf_perm"
1317 | top: "conv6_2_mbox_conf_flat"
1318 | flatten_param {
1319 | axis: 1
1320 | }
1321 | }
1322 | layer {
1323 | name: "conv6_2_mbox_priorbox"
1324 | type: "PriorBox"
1325 | bottom: "conv6_2_h"
1326 | bottom: "data"
1327 | top: "conv6_2_mbox_priorbox"
1328 | prior_box_param {
1329 | min_size: 111.0
1330 | max_size: 162.0
1331 | aspect_ratio: 2
1332 | aspect_ratio: 3
1333 | flip: true
1334 | clip: false
1335 | variance: 0.1
1336 | variance: 0.1
1337 | variance: 0.2
1338 | variance: 0.2
1339 | step: 32
1340 | offset: 0.5
1341 | }
1342 | }
1343 | layer {
1344 | name: "conv7_2_mbox_loc"
1345 | type: "Convolution"
1346 | bottom: "conv7_2_h"
1347 | top: "conv7_2_mbox_loc"
1348 | param {
1349 | lr_mult: 1
1350 | decay_mult: 1
1351 | }
1352 | param {
1353 | lr_mult: 2
1354 | decay_mult: 0
1355 | }
1356 | convolution_param {
1357 | num_output: 24
1358 | pad: 1
1359 | kernel_size: 3
1360 | stride: 1
1361 | weight_filler {
1362 | type: "xavier"
1363 | }
1364 | bias_filler {
1365 | type: "constant"
1366 | value: 0
1367 | }
1368 | }
1369 | }
1370 | layer {
1371 | name: "conv7_2_mbox_loc_perm"
1372 | type: "Permute"
1373 | bottom: "conv7_2_mbox_loc"
1374 | top: "conv7_2_mbox_loc_perm"
1375 | permute_param {
1376 | order: 0
1377 | order: 2
1378 | order: 3
1379 | order: 1
1380 | }
1381 | }
1382 | layer {
1383 | name: "conv7_2_mbox_loc_flat"
1384 | type: "Flatten"
1385 | bottom: "conv7_2_mbox_loc_perm"
1386 | top: "conv7_2_mbox_loc_flat"
1387 | flatten_param {
1388 | axis: 1
1389 | }
1390 | }
1391 | layer {
1392 | name: "conv7_2_mbox_conf"
1393 | type: "Convolution"
1394 | bottom: "conv7_2_h"
1395 | top: "conv7_2_mbox_conf"
1396 | param {
1397 | lr_mult: 1
1398 | decay_mult: 1
1399 | }
1400 | param {
1401 | lr_mult: 2
1402 | decay_mult: 0
1403 | }
1404 | convolution_param {
1405 | num_output: 12 # 126
1406 | pad: 1
1407 | kernel_size: 3
1408 | stride: 1
1409 | weight_filler {
1410 | type: "xavier"
1411 | }
1412 | bias_filler {
1413 | type: "constant"
1414 | value: 0
1415 | }
1416 | }
1417 | }
1418 | layer {
1419 | name: "conv7_2_mbox_conf_perm"
1420 | type: "Permute"
1421 | bottom: "conv7_2_mbox_conf"
1422 | top: "conv7_2_mbox_conf_perm"
1423 | permute_param {
1424 | order: 0
1425 | order: 2
1426 | order: 3
1427 | order: 1
1428 | }
1429 | }
1430 | layer {
1431 | name: "conv7_2_mbox_conf_flat"
1432 | type: "Flatten"
1433 | bottom: "conv7_2_mbox_conf_perm"
1434 | top: "conv7_2_mbox_conf_flat"
1435 | flatten_param {
1436 | axis: 1
1437 | }
1438 | }
1439 | layer {
1440 | name: "conv7_2_mbox_priorbox"
1441 | type: "PriorBox"
1442 | bottom: "conv7_2_h"
1443 | bottom: "data"
1444 | top: "conv7_2_mbox_priorbox"
1445 | prior_box_param {
1446 | min_size: 162.0
1447 | max_size: 213.0
1448 | aspect_ratio: 2
1449 | aspect_ratio: 3
1450 | flip: true
1451 | clip: false
1452 | variance: 0.1
1453 | variance: 0.1
1454 | variance: 0.2
1455 | variance: 0.2
1456 | step: 64
1457 | offset: 0.5
1458 | }
1459 | }
1460 | layer {
1461 | name: "conv8_2_mbox_loc"
1462 | type: "Convolution"
1463 | bottom: "conv8_2_h"
1464 | top: "conv8_2_mbox_loc"
1465 | param {
1466 | lr_mult: 1
1467 | decay_mult: 1
1468 | }
1469 | param {
1470 | lr_mult: 2
1471 | decay_mult: 0
1472 | }
1473 | convolution_param {
1474 | num_output: 16
1475 | pad: 1
1476 | kernel_size: 3
1477 | stride: 1
1478 | weight_filler {
1479 | type: "xavier"
1480 | }
1481 | bias_filler {
1482 | type: "constant"
1483 | value: 0
1484 | }
1485 | }
1486 | }
1487 | layer {
1488 | name: "conv8_2_mbox_loc_perm"
1489 | type: "Permute"
1490 | bottom: "conv8_2_mbox_loc"
1491 | top: "conv8_2_mbox_loc_perm"
1492 | permute_param {
1493 | order: 0
1494 | order: 2
1495 | order: 3
1496 | order: 1
1497 | }
1498 | }
1499 | layer {
1500 | name: "conv8_2_mbox_loc_flat"
1501 | type: "Flatten"
1502 | bottom: "conv8_2_mbox_loc_perm"
1503 | top: "conv8_2_mbox_loc_flat"
1504 | flatten_param {
1505 | axis: 1
1506 | }
1507 | }
1508 | layer {
1509 | name: "conv8_2_mbox_conf"
1510 | type: "Convolution"
1511 | bottom: "conv8_2_h"
1512 | top: "conv8_2_mbox_conf"
1513 | param {
1514 | lr_mult: 1
1515 | decay_mult: 1
1516 | }
1517 | param {
1518 | lr_mult: 2
1519 | decay_mult: 0
1520 | }
1521 | convolution_param {
1522 | num_output: 8 # 84
1523 | pad: 1
1524 | kernel_size: 3
1525 | stride: 1
1526 | weight_filler {
1527 | type: "xavier"
1528 | }
1529 | bias_filler {
1530 | type: "constant"
1531 | value: 0
1532 | }
1533 | }
1534 | }
1535 | layer {
1536 | name: "conv8_2_mbox_conf_perm"
1537 | type: "Permute"
1538 | bottom: "conv8_2_mbox_conf"
1539 | top: "conv8_2_mbox_conf_perm"
1540 | permute_param {
1541 | order: 0
1542 | order: 2
1543 | order: 3
1544 | order: 1
1545 | }
1546 | }
1547 | layer {
1548 | name: "conv8_2_mbox_conf_flat"
1549 | type: "Flatten"
1550 | bottom: "conv8_2_mbox_conf_perm"
1551 | top: "conv8_2_mbox_conf_flat"
1552 | flatten_param {
1553 | axis: 1
1554 | }
1555 | }
1556 | layer {
1557 | name: "conv8_2_mbox_priorbox"
1558 | type: "PriorBox"
1559 | bottom: "conv8_2_h"
1560 | bottom: "data"
1561 | top: "conv8_2_mbox_priorbox"
1562 | prior_box_param {
1563 | min_size: 213.0
1564 | max_size: 264.0
1565 | aspect_ratio: 2
1566 | flip: true
1567 | clip: false
1568 | variance: 0.1
1569 | variance: 0.1
1570 | variance: 0.2
1571 | variance: 0.2
1572 | step: 100
1573 | offset: 0.5
1574 | }
1575 | }
1576 | layer {
1577 | name: "conv9_2_mbox_loc"
1578 | type: "Convolution"
1579 | bottom: "conv9_2_h"
1580 | top: "conv9_2_mbox_loc"
1581 | param {
1582 | lr_mult: 1
1583 | decay_mult: 1
1584 | }
1585 | param {
1586 | lr_mult: 2
1587 | decay_mult: 0
1588 | }
1589 | convolution_param {
1590 | num_output: 16
1591 | pad: 1
1592 | kernel_size: 3
1593 | stride: 1
1594 | weight_filler {
1595 | type: "xavier"
1596 | }
1597 | bias_filler {
1598 | type: "constant"
1599 | value: 0
1600 | }
1601 | }
1602 | }
1603 | layer {
1604 | name: "conv9_2_mbox_loc_perm"
1605 | type: "Permute"
1606 | bottom: "conv9_2_mbox_loc"
1607 | top: "conv9_2_mbox_loc_perm"
1608 | permute_param {
1609 | order: 0
1610 | order: 2
1611 | order: 3
1612 | order: 1
1613 | }
1614 | }
1615 | layer {
1616 | name: "conv9_2_mbox_loc_flat"
1617 | type: "Flatten"
1618 | bottom: "conv9_2_mbox_loc_perm"
1619 | top: "conv9_2_mbox_loc_flat"
1620 | flatten_param {
1621 | axis: 1
1622 | }
1623 | }
1624 | layer {
1625 | name: "conv9_2_mbox_conf"
1626 | type: "Convolution"
1627 | bottom: "conv9_2_h"
1628 | top: "conv9_2_mbox_conf"
1629 | param {
1630 | lr_mult: 1
1631 | decay_mult: 1
1632 | }
1633 | param {
1634 | lr_mult: 2
1635 | decay_mult: 0
1636 | }
1637 | convolution_param {
1638 | num_output: 8 # 84
1639 | pad: 1
1640 | kernel_size: 3
1641 | stride: 1
1642 | weight_filler {
1643 | type: "xavier"
1644 | }
1645 | bias_filler {
1646 | type: "constant"
1647 | value: 0
1648 | }
1649 | }
1650 | }
1651 | layer {
1652 | name: "conv9_2_mbox_conf_perm"
1653 | type: "Permute"
1654 | bottom: "conv9_2_mbox_conf"
1655 | top: "conv9_2_mbox_conf_perm"
1656 | permute_param {
1657 | order: 0
1658 | order: 2
1659 | order: 3
1660 | order: 1
1661 | }
1662 | }
1663 | layer {
1664 | name: "conv9_2_mbox_conf_flat"
1665 | type: "Flatten"
1666 | bottom: "conv9_2_mbox_conf_perm"
1667 | top: "conv9_2_mbox_conf_flat"
1668 | flatten_param {
1669 | axis: 1
1670 | }
1671 | }
1672 | layer {
1673 | name: "conv9_2_mbox_priorbox"
1674 | type: "PriorBox"
1675 | bottom: "conv9_2_h"
1676 | bottom: "data"
1677 | top: "conv9_2_mbox_priorbox"
1678 | prior_box_param {
1679 | min_size: 264.0
1680 | max_size: 315.0
1681 | aspect_ratio: 2
1682 | flip: true
1683 | clip: false
1684 | variance: 0.1
1685 | variance: 0.1
1686 | variance: 0.2
1687 | variance: 0.2
1688 | step: 300
1689 | offset: 0.5
1690 | }
1691 | }
1692 | layer {
1693 | name: "mbox_loc"
1694 | type: "Concat"
1695 | bottom: "conv4_3_norm_mbox_loc_flat"
1696 | bottom: "fc7_mbox_loc_flat"
1697 | bottom: "conv6_2_mbox_loc_flat"
1698 | bottom: "conv7_2_mbox_loc_flat"
1699 | bottom: "conv8_2_mbox_loc_flat"
1700 | bottom: "conv9_2_mbox_loc_flat"
1701 | top: "mbox_loc"
1702 | concat_param {
1703 | axis: 1
1704 | }
1705 | }
1706 | layer {
1707 | name: "mbox_conf"
1708 | type: "Concat"
1709 | bottom: "conv4_3_norm_mbox_conf_flat"
1710 | bottom: "fc7_mbox_conf_flat"
1711 | bottom: "conv6_2_mbox_conf_flat"
1712 | bottom: "conv7_2_mbox_conf_flat"
1713 | bottom: "conv8_2_mbox_conf_flat"
1714 | bottom: "conv9_2_mbox_conf_flat"
1715 | top: "mbox_conf"
1716 | concat_param {
1717 | axis: 1
1718 | }
1719 | }
1720 | layer {
1721 | name: "mbox_priorbox"
1722 | type: "Concat"
1723 | bottom: "conv4_3_norm_mbox_priorbox"
1724 | bottom: "fc7_mbox_priorbox"
1725 | bottom: "conv6_2_mbox_priorbox"
1726 | bottom: "conv7_2_mbox_priorbox"
1727 | bottom: "conv8_2_mbox_priorbox"
1728 | bottom: "conv9_2_mbox_priorbox"
1729 | top: "mbox_priorbox"
1730 | concat_param {
1731 | axis: 2
1732 | }
1733 | }
1734 |
1735 | layer {
1736 | name: "mbox_conf_reshape"
1737 | type: "Reshape"
1738 | bottom: "mbox_conf"
1739 | top: "mbox_conf_reshape"
1740 | reshape_param {
1741 | shape {
1742 | dim: 0
1743 | dim: -1
1744 | dim: 2
1745 | }
1746 | }
1747 | }
1748 | layer {
1749 | name: "mbox_conf_softmax"
1750 | type: "Softmax"
1751 | bottom: "mbox_conf_reshape"
1752 | top: "mbox_conf_softmax"
1753 | softmax_param {
1754 | axis: 2
1755 | }
1756 | }
1757 | layer {
1758 | name: "mbox_conf_flatten"
1759 | type: "Flatten"
1760 | bottom: "mbox_conf_softmax"
1761 | top: "mbox_conf_flatten"
1762 | flatten_param {
1763 | axis: 1
1764 | }
1765 | }
1766 |
1767 | layer {
1768 | name: "detection_out"
1769 | type: "DetectionOutput"
1770 | bottom: "mbox_loc"
1771 | bottom: "mbox_conf_flatten"
1772 | bottom: "mbox_priorbox"
1773 | top: "detection_out"
1774 | include {
1775 | phase: TEST
1776 | }
1777 | detection_output_param {
1778 | num_classes: 2
1779 | share_location: true
1780 | background_label_id: 0
1781 | nms_param {
1782 | nms_threshold: 0.45
1783 | top_k: 400
1784 | }
1785 | code_type: CENTER_SIZE
1786 | keep_top_k: 200
1787 | confidence_threshold: 0.01
1788 | }
1789 | }
1790 |
--------------------------------------------------------------------------------
/FaceDetectionOpenCV/face_detection_ssd_parallel.py:
--------------------------------------------------------------------------------
1 | # importing required libaries
2 | import cv2
3 | import numpy as np
4 | from imutils.video import WebcamVideoStream, FPS # pip install imutils (if imutils library not already installed)
5 |
6 | # defining parameters and helper functions for performing face detection
7 | model_config_filepath = 'deploy.prototxt.txt'
8 | model_weights_filepath = 'res10_300x300_ssd_iter_140000.caffemodel'
9 | confidence_thresh = 0.9 # threshold for filtering weak detections
10 |
11 | # loading inference model using cv2's dnn module
12 | model = cv2.dnn.readNet(model=model_weights_filepath, config=model_config_filepath)
13 |
14 | # defining function for detecting faces in a single input image/video frame
15 | def detect_faces(frame):
16 | # original frame resolution
17 | orig_h, orig_w = frame.shape[:2]
18 |
19 | # preprocessing input frame
20 | h , w = 300 , 300 # required height and width after resizing
21 | resized_frame = cv2.resize(frame, (w,h) ) # performing resizing
22 | # performing mean subtraction and reshaping to a blob/image of shape 1x3x300x300
23 | blob = cv2.dnn.blobFromImage(resized_frame, scalefactor=1, size=(w,h), mean=(104, 177, 123)) # mean values are in BGR ordering
24 |
25 | # performing inference
26 | model.setInput(blob)
27 | detections = model.forward() # returned detections are of shape (1,1,num_detections,7).
28 |
29 | # looping over all detections and annotating input frame with high confidence detections
30 | for i in range(detections.shape[2]) :
31 | confidence = detections[0,0,i,2] # index 2 stores the confidence/probability of the detection
32 | if confidence < confidence_thresh :
33 | continue
34 |
35 | # indices 3,4,5,6 store the bounding box coordinates in order [xmin, ymin, xmax, ymax] with values in the range 0-1
36 | bbox = detections[0,0,i,3:7] * np.array([orig_w, orig_h, orig_w, orig_h]) # scaling bounding box coordinates back to original frame dimensions
37 | bbox = bbox.astype(np.int) # type casting and rounding to int type
38 | cv2.rectangle(frame, (bbox[0], bbox[1]) , (bbox[2], bbox[3]) , (0,0,255) , 2) # drawing rectangular bounding boxes around detections
39 |
40 | return frame
41 |
42 | # setting up input video stream for reading from webcam
43 | webcam_stream = WebcamVideoStream(0) # opening video stream from primary camera
44 | webcam_stream.start()
45 | fps = FPS() # for computing frames processed per second
46 |
47 | # processing video frames
48 | fps.start()
49 | while True :
50 | # reading next frame from input stream
51 | frame = webcam_stream.read()
52 | fps.update()
53 |
54 | # detecting faces in the read frame
55 | frame_with_detections = detect_faces(frame)
56 |
57 | # displaying the frame
58 | cv2.imshow('Detected Faces', frame_with_detections)
59 | key_pressed = cv2.waitKey(1) # a 1 millisecond delay
60 | if key_pressed == ord('q'):
61 | break
62 | fps.stop()
63 |
64 | # closing open streams, etc
65 | webcam_stream.stop()
66 | cv2.destroyAllWindows()
67 |
68 | # printing stats - fps
69 | print("FPS:{}".format(fps.fps()))
70 |
--------------------------------------------------------------------------------
/FaceDetectionOpenCV/res10_300x300_ssd_iter_140000.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/FaceDetectionOpenCV/res10_300x300_ssd_iter_140000.caffemodel
--------------------------------------------------------------------------------
/MultiThreadedVideoProcessing/Readme.md:
--------------------------------------------------------------------------------
1 | # Faster Real-Time Video Processing using Multi-Threading in Python
2 | 1. Two python scripts are included for processing video frames from a webcam connected to a laptop or desktop. One script uses a non-threaded implementation and the second script uses a threaded implementation.
3 | 2. Multi-threaded implementation can help achieve a higher FPS
4 | 3. Delay variable in the code can be used for simulating time taken for performing some video processing task like running a deep learning model for face detection, etc. Different amounts of delay can be used to evaluate performance.
5 | 4. Link to medium blog post with more details
6 |
7 | ## Programming language and libraries used
8 | 1. Python programming language
9 | 2. OpenCV library
10 | 3. Other Python libraries including threading library
11 |
12 |
--------------------------------------------------------------------------------
/MultiThreadedVideoProcessing/video_processing_parallel.py:
--------------------------------------------------------------------------------
1 | # importing required libraries
2 | import cv2
3 | import time
4 | from threading import Thread # library for implementing multi-threaded processing
5 |
6 | # defining a helper class for implementing multi-threaded processing
7 | class WebcamStream :
8 | def __init__(self, stream_id=0):
9 | self.stream_id = stream_id # default is 0 for primary camera
10 |
11 | # opening video capture stream
12 | self.vcap = cv2.VideoCapture(self.stream_id)
13 | if self.vcap.isOpened() is False :
14 | print("[Exiting]: Error accessing webcam stream.")
15 | exit(0)
16 | fps_input_stream = int(self.vcap.get(5))
17 | print("FPS of webcam hardware/input stream: {}".format(fps_input_stream))
18 |
19 | # reading a single frame from vcap stream for initializing
20 | self.grabbed , self.frame = self.vcap.read()
21 | if self.grabbed is False :
22 | print('[Exiting] No more frames to read')
23 | exit(0)
24 |
25 | # self.stopped is set to False when frames are being read from self.vcap stream
26 | self.stopped = True
27 |
28 | # reference to the thread for reading next available frame from input stream
29 | self.t = Thread(target=self.update, args=())
30 | self.t.daemon = True # daemon threads keep running in the background while the program is executing
31 |
32 | # method for starting the thread for grabbing next available frame in input stream
33 | def start(self):
34 | self.stopped = False
35 | self.t.start()
36 |
37 | # method for reading next frame
38 | def update(self):
39 | while True :
40 | if self.stopped is True :
41 | break
42 | self.grabbed , self.frame = self.vcap.read()
43 | if self.grabbed is False :
44 | print('[Exiting] No more frames to read')
45 | self.stopped = True
46 | break
47 | self.vcap.release()
48 |
49 | # method for returning latest read frame
50 | def read(self):
51 | return self.frame
52 |
53 | # method called to stop reading frames
54 | def stop(self):
55 | self.stopped = True
56 |
57 |
58 | # initializing and starting multi-threaded webcam capture input stream
59 | webcam_stream = WebcamStream(stream_id=0) # stream_id = 0 is for primary camera
60 | webcam_stream.start()
61 |
62 | # processing frames in input stream
63 | num_frames_processed = 0
64 | start = time.time()
65 | while True :
66 | if webcam_stream.stopped is True :
67 | break
68 | else :
69 | frame = webcam_stream.read()
70 |
71 | # adding a delay for simulating time taken for processing a frame
72 | delay = 0.03 # delay value in seconds. so, delay=1 is equivalent to 1 second
73 | time.sleep(delay)
74 | num_frames_processed += 1
75 |
76 | cv2.imshow('frame' , frame)
77 | key = cv2.waitKey(1)
78 | if key == ord('q'):
79 | break
80 | end = time.time()
81 | webcam_stream.stop() # stop the webcam stream
82 |
83 | # printing time elapsed and fps
84 | elapsed = end-start
85 | fps = num_frames_processed/elapsed
86 | print("FPS: {} , Elapsed Time: {} , Frames Processed: {}".format(fps, elapsed, num_frames_processed))
87 |
88 | # closing all windows
89 | cv2.destroyAllWindows()
90 |
--------------------------------------------------------------------------------
/MultiThreadedVideoProcessing/video_processing_simple.py:
--------------------------------------------------------------------------------
1 | # importing required libraries
2 | import cv2
3 | import time
4 |
5 | # opening video capture stream
6 | vcap = cv2.VideoCapture(0)
7 | if vcap.isOpened() is False :
8 | print("[Exiting]: Error accessing webcam stream.")
9 | exit(0)
10 | fps_input_stream = int(vcap.get(5))
11 | print("FPS of webcam hardware/input stream: {}".format(fps_input_stream))
12 | grabbed, frame = vcap.read() # reading single frame for initialization/ hardware warm-up
13 |
14 | # processing frames in input stream
15 | num_frames_processed = 0
16 | start = time.time()
17 | while True :
18 | grabbed, frame = vcap.read()
19 | if grabbed is False :
20 | print('[Exiting] No more frames to read')
21 | break
22 |
23 | # adding a delay for simulating time taken for processing a frame
24 | delay = 0.03 # delay value in seconds. so, delay=1 is equivalent to 1 second
25 | time.sleep(delay)
26 | num_frames_processed += 1
27 |
28 | cv2.imshow('frame' , frame)
29 | key = cv2.waitKey(1)
30 | if key == ord('q'):
31 | break
32 | end = time.time()
33 |
34 | # printing time elapsed and fps
35 | elapsed = end-start
36 | fps = num_frames_processed/elapsed
37 | print("FPS: {} , Elapsed Time: {} , Frames Processed: {}".format(fps, elapsed, num_frames_processed))
38 |
39 | # releasing input stream , closing all windows
40 | vcap.release()
41 | cv2.destroyAllWindows()
42 |
--------------------------------------------------------------------------------
/NeuralStyleTransfer/README.md:
--------------------------------------------------------------------------------
1 | # Neural Style Transfer using TensorFlow
2 | Neural Style Transfer is a technique for generating new artistic images from existing content and style images. This project uses an unsupervised deep learning algorithm for performing neural style transfer.
3 |
4 | 
5 |
6 | ## Programming Language and Libraries used
7 | 1. Python 3.x
8 | 2. Tensorflow 2.x and Keras API
9 | 3. Other python libraries like Numpy, OpenCV
10 |
11 | ## Notes
12 | 1. Link to Medium Article presenting the main ideas and implementation details of Neural Style Transfer using TensorFlow
13 |
14 |
--------------------------------------------------------------------------------
/NeuralStyleTransfer/content.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/NeuralStyleTransfer/content.jpg
--------------------------------------------------------------------------------
/NeuralStyleTransfer/neural_style_transfer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/NeuralStyleTransfer/neural_style_transfer.png
--------------------------------------------------------------------------------
/NeuralStyleTransfer/style_mosaic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/NeuralStyleTransfer/style_mosaic.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Computer Vision and Deep Learning Projects
2 | This repo contains code for some of my computer vision and deep learning projects.
3 |
4 | ## List of Projects
5 | | Project |
6 | | ------------- |
7 | | [Image Classifier for The Street View House Numbers (SVHN) Dataset](StreetViewHouseNumbers_Classifier/) |
8 | | [A Simple CNN Image Classifier for Cifar-10 dataset](CIFAR10_Image_Classifier/) |
9 | | [Convolutional Neural Network Visualizer](ConvolutionalNeuralNetworkVisualizer/) |
10 | | [Transfer Learning example using ResNet-50 architecture](TransferLearningResnet/) |
11 | | [Neural Style Transfer using TensorFlow](NeuralStyleTransfer/) [[ Blog ]](https://medium.com/@vasu.gupta9/neural-style-transfer-using-tensorflow-7e0f3e789e0c) |
12 | | [Video Activity Recognition using Pretrained 3D ResNet model](VideoActivityRecognition3DResnet/) [[ Blog ]](https://gvasu.medium.com/recognizing-400-different-activities-in-videos-using-python-and-opencv-ee59cc6d61f6) |
13 | | [Faster Real-Time Video Processing using Multi-Threading in Python](MultiThreadedVideoProcessing/) [[ Blog ]](https://gvasu.medium.com/faster-real-time-video-processing-using-multi-threading-in-python-8902589e1055) |
14 | | [Real-Time Face Detection Using OpenCV](FaceDetectionOpenCV/)
--------------------------------------------------------------------------------
/StreetViewHouseNumbers_Classifier/Readme.md:
--------------------------------------------------------------------------------
1 | # This project is an Image Classifier for the Street View House Numbers (SVHN) Dataset obtained from house numbers in Google Street View images.
2 |
3 | 
4 | [*Image Source*](http://ufldl.stanford.edu/housenumbers/)
5 |
6 | ## Project folder includes
7 | 1. Readme.md (*this file*)
8 | 2. Google colab notebook for building, training and testing a Convolutional Neural Network on the SVHN dataset.
9 | 3. Misc
10 | * SVHN Overview Image
11 |
12 | ## Libraries used
13 | 1. Python programming language is used
14 | 2. Deep learning libraries Tensorflow 2.x and Keras API
15 | 3. Other python libraries including numpy, etc
16 |
17 | ## Notes
18 | 1. The SVHN dataset has 2 formats. This project uses Format 2: Cropped Digits
19 |
--------------------------------------------------------------------------------
/StreetViewHouseNumbers_Classifier/SVHN_Overview_Image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/StreetViewHouseNumbers_Classifier/SVHN_Overview_Image.png
--------------------------------------------------------------------------------
/TransferLearningResnet/Readme.md:
--------------------------------------------------------------------------------
1 | # Purpose of this project is to demonstrate how to build and train a Deep Neural Network using Transfer Learning
2 | 1. Dataset used is Cifar-10 which contains images from 10 different classes (https://www.cs.toronto.edu/~kriz/cifar.html)
3 | 2. Architecture used is ResNet-50 (https://keras.io/api/applications/)
4 |
5 | 
6 | [*Image Source*](https://www.cs.toronto.edu/~kriz/cifar.html)
7 |
8 | ## Project folder includes
9 | 1. Readme.md (this file)
10 | 2. Google colab notebook for building, training and testing a ResNet50 architecture using transfer learning on the CIFAR-10 dataset.
11 | 3. Misc
12 | - CIFAR-10 overview image
13 |
14 | ## Programming language and Libraries used
15 | 1. Python programming language
16 | 2. Deep learning libraries tensorflow 2.x and Keras API
17 | 3. Python libraries including numpy, etc
18 |
--------------------------------------------------------------------------------
/TransferLearningResnet/Transfer_Learning_ResNet_ImageClassifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Transfer_Learning_ResNet_ImageClassifier.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true,
10 | "authorship_tag": "ABX9TyMgQ4Qi0SYj/dBzG221AxvF",
11 | "include_colab_link": true
12 | },
13 | "kernelspec": {
14 | "name": "python3",
15 | "display_name": "Python 3"
16 | },
17 | "language_info": {
18 | "name": "python"
19 | },
20 | "accelerator": "GPU"
21 | },
22 | "cells": [
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "id": "view-in-github",
27 | "colab_type": "text"
28 | },
29 | "source": [
30 | "
"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {
36 | "id": "ZdvsU9L3T_8P"
37 | },
38 | "source": [
39 | "# Purpose of this notebook is to show an example of training a Deep Neural Network using Transfer Learning\n",
40 | "1. Dataset used is Cifar-10 which contains images from 10 different classes (https://www.cs.toronto.edu/~kriz/cifar.html)\n",
41 | "2. Architecture used is ResNet-50 (https://keras.io/api/applications/)\n"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "metadata": {
47 | "id": "5puSqI9GSmk2"
48 | },
49 | "source": [
50 | "# importing required libraries\n",
51 | "import numpy as np \n",
52 | "import cv2\n",
53 | "import matplotlib.pyplot as plt \n",
54 | "import tensorflow as tf \n",
55 | "from tensorflow.keras.models import Model \n",
56 | "from tensorflow.keras.layers import Dense , Input\n",
57 | "from tensorflow.keras.datasets import cifar10\n",
58 | "from tensorflow.keras.applications import ResNet50\n",
59 | "from tensorflow.keras.applications.resnet import preprocess_input as resnet_preprocess_input\n"
60 | ],
61 | "execution_count": 5,
62 | "outputs": []
63 | },
64 | {
65 | "cell_type": "code",
66 | "metadata": {
67 | "colab": {
68 | "base_uri": "https://localhost:8080/"
69 | },
70 | "id": "oKNeyDZ9VrKj",
71 | "outputId": "dd9cfa2e-d9e1-437f-ea17-695d7c39356c"
72 | },
73 | "source": [
74 | "# loading cifar-10 dataset \n",
75 | "(x_train, y_train) , (x_test, y_test) = cifar10.load_data()\n",
76 | "\n",
77 | "# reshaping y_train, y_test to 1D arrays (since later using loss function as sparse categorical crossentropy)\n",
78 | "y_train = y_train.reshape(-1)\n",
79 | "y_test = y_test.reshape(-1)\n",
80 | "\n",
81 | "label_names = ['airplane','automobile','bird','cat', 'deer' , 'dog', 'frog', 'horse', 'ship', 'truck' ] # reference - https://www.cs.toronto.edu/~kriz/cifar.html"
82 | ],
83 | "execution_count": 6,
84 | "outputs": [
85 | {
86 | "output_type": "stream",
87 | "text": [
88 | "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n",
89 | "170500096/170498071 [==============================] - 11s 0us/step\n"
90 | ],
91 | "name": "stdout"
92 | }
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "metadata": {
98 | "colab": {
99 | "base_uri": "https://localhost:8080/",
100 | "height": 366
101 | },
102 | "id": "pteamvelXP3s",
103 | "outputId": "6d520fec-39df-4f7d-cf62-b26ecbc9b90e"
104 | },
105 | "source": [
106 | "# printing useful dataset related information \n",
107 | "\n",
108 | "# printing dataset shapes\n",
109 | "print(\"x_train:{} , y_train:{} , x_test:{} , y_test:{} \".format(x_train.shape, y_train.shape, x_test.shape, y_test.shape ))\n",
110 | "\n",
111 | "# printing range of values in x_train, x_test and unique values in y_train, y_test\n",
112 | "print(\"x_train values are in range:{}-{}\".format(np.min(x_train), np.max(x_train)))\n",
113 | "print(\"x_test values are in range:{}-{}\".format(np.min(x_test), np.max(x_test)))\n",
114 | "print(\"unique values in y_train:{}\".format(np.unique(y_train)))\n",
115 | "print(\"unique values in y_test:{}\".format(np.unique(y_test)))\n",
116 | "\n",
117 | "# visualizing sample image \n",
118 | "idx = 0\n",
119 | "img = x_train[idx]\n",
120 | "label_id = y_train[idx]\n",
121 | "plt.title( \"image label {}:{}\".format( label_names[label_id] , label_id ) )\n",
122 | "plt.imshow(img)\n",
123 | "plt.show()"
124 | ],
125 | "execution_count": 7,
126 | "outputs": [
127 | {
128 | "output_type": "stream",
129 | "text": [
130 | "x_train:(50000, 32, 32, 3) , y_train:(50000,) , x_test:(10000, 32, 32, 3) , y_test:(10000,) \n",
131 | "x_train values are in range:0-255\n",
132 | "x_test values are in range:0-255\n",
133 | "unique values in y_train:[0 1 2 3 4 5 6 7 8 9]\n",
134 | "unique values in y_test:[0 1 2 3 4 5 6 7 8 9]\n"
135 | ],
136 | "name": "stdout"
137 | },
138 | {
139 | "output_type": "display_data",
140 | "data": {
141 | "image/png": "\n",
142 | "text/plain": [
143 | ""
144 | ]
145 | },
146 | "metadata": {
147 | "tags": [],
148 | "needs_background": "light"
149 | }
150 | }
151 | ]
152 | },
153 | {
154 | "cell_type": "code",
155 | "metadata": {
156 | "colab": {
157 | "base_uri": "https://localhost:8080/"
158 | },
159 | "id": "b5C73dgyZ0zG",
160 | "outputId": "c8a82177-b95e-4c63-88ba-084e5118e9e1"
161 | },
162 | "source": [
163 | "# pre-processing the image for classification using resnet model (expects a specific kind of input preprocessing for resnet)\n",
164 | "x_train_pp = x_train.astype('float32')\n",
165 | "x_train_pp = resnet_preprocess_input(x_train_pp)\n",
166 | "x_test_pp = x_test.astype('float32')\n",
167 | "x_test_pp = resnet_preprocess_input(x_test_pp)\n",
168 | "\n",
169 | "# printing range of values in x_train_pp, x_test_pp\n",
170 | "print(\"x_train_pp values are in range:{} to {}\".format(np.min(x_train_pp), np.max(x_train_pp)))\n",
171 | "print(\"x_test_pp values are in range:{} to {}\".format(np.min(x_test_pp), np.max(x_test_pp)))"
172 | ],
173 | "execution_count": 8,
174 | "outputs": [
175 | {
176 | "output_type": "stream",
177 | "text": [
178 | "x_train_pp values are in range:-123.68000030517578 to 151.06100463867188\n",
179 | "x_test_pp values are in range:-123.68000030517578 to 151.06100463867188\n"
180 | ],
181 | "name": "stdout"
182 | }
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "metadata": {
188 | "colab": {
189 | "base_uri": "https://localhost:8080/"
190 | },
191 | "id": "W0GZNNXocHaM",
192 | "outputId": "de3fc1e4-5b6f-4e8a-c3b5-afe6f9b14503"
193 | },
194 | "source": [
195 | "# loading the resnet model \n",
196 | "# 1. top layer (global avg pooling + output dense layer) in not loaded\n",
197 | "# 2. weights pre-trained on imagenet are used \n",
198 | "# 3. input image shape is (32,32,3)\n",
199 | "# 4. global average pooling is added at the top \n",
200 | "model_resnet = ResNet50(include_top=False, weights='imagenet', input_shape=(32,32,3) , pooling='avg')\n",
201 | "model_resnet.summary()"
202 | ],
203 | "execution_count": 9,
204 | "outputs": [
205 | {
206 | "output_type": "stream",
207 | "text": [
208 | "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5\n",
209 | "94773248/94765736 [==============================] - 1s 0us/step\n",
210 | "Model: \"resnet50\"\n",
211 | "__________________________________________________________________________________________________\n",
212 | "Layer (type) Output Shape Param # Connected to \n",
213 | "==================================================================================================\n",
214 | "input_1 (InputLayer) [(None, 32, 32, 3)] 0 \n",
215 | "__________________________________________________________________________________________________\n",
216 | "conv1_pad (ZeroPadding2D) (None, 38, 38, 3) 0 input_1[0][0] \n",
217 | "__________________________________________________________________________________________________\n",
218 | "conv1_conv (Conv2D) (None, 16, 16, 64) 9472 conv1_pad[0][0] \n",
219 | "__________________________________________________________________________________________________\n",
220 | "conv1_bn (BatchNormalization) (None, 16, 16, 64) 256 conv1_conv[0][0] \n",
221 | "__________________________________________________________________________________________________\n",
222 | "conv1_relu (Activation) (None, 16, 16, 64) 0 conv1_bn[0][0] \n",
223 | "__________________________________________________________________________________________________\n",
224 | "pool1_pad (ZeroPadding2D) (None, 18, 18, 64) 0 conv1_relu[0][0] \n",
225 | "__________________________________________________________________________________________________\n",
226 | "pool1_pool (MaxPooling2D) (None, 8, 8, 64) 0 pool1_pad[0][0] \n",
227 | "__________________________________________________________________________________________________\n",
228 | "conv2_block1_1_conv (Conv2D) (None, 8, 8, 64) 4160 pool1_pool[0][0] \n",
229 | "__________________________________________________________________________________________________\n",
230 | "conv2_block1_1_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block1_1_conv[0][0] \n",
231 | "__________________________________________________________________________________________________\n",
232 | "conv2_block1_1_relu (Activation (None, 8, 8, 64) 0 conv2_block1_1_bn[0][0] \n",
233 | "__________________________________________________________________________________________________\n",
234 | "conv2_block1_2_conv (Conv2D) (None, 8, 8, 64) 36928 conv2_block1_1_relu[0][0] \n",
235 | "__________________________________________________________________________________________________\n",
236 | "conv2_block1_2_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block1_2_conv[0][0] \n",
237 | "__________________________________________________________________________________________________\n",
238 | "conv2_block1_2_relu (Activation (None, 8, 8, 64) 0 conv2_block1_2_bn[0][0] \n",
239 | "__________________________________________________________________________________________________\n",
240 | "conv2_block1_0_conv (Conv2D) (None, 8, 8, 256) 16640 pool1_pool[0][0] \n",
241 | "__________________________________________________________________________________________________\n",
242 | "conv2_block1_3_conv (Conv2D) (None, 8, 8, 256) 16640 conv2_block1_2_relu[0][0] \n",
243 | "__________________________________________________________________________________________________\n",
244 | "conv2_block1_0_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block1_0_conv[0][0] \n",
245 | "__________________________________________________________________________________________________\n",
246 | "conv2_block1_3_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block1_3_conv[0][0] \n",
247 | "__________________________________________________________________________________________________\n",
248 | "conv2_block1_add (Add) (None, 8, 8, 256) 0 conv2_block1_0_bn[0][0] \n",
249 | " conv2_block1_3_bn[0][0] \n",
250 | "__________________________________________________________________________________________________\n",
251 | "conv2_block1_out (Activation) (None, 8, 8, 256) 0 conv2_block1_add[0][0] \n",
252 | "__________________________________________________________________________________________________\n",
253 | "conv2_block2_1_conv (Conv2D) (None, 8, 8, 64) 16448 conv2_block1_out[0][0] \n",
254 | "__________________________________________________________________________________________________\n",
255 | "conv2_block2_1_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block2_1_conv[0][0] \n",
256 | "__________________________________________________________________________________________________\n",
257 | "conv2_block2_1_relu (Activation (None, 8, 8, 64) 0 conv2_block2_1_bn[0][0] \n",
258 | "__________________________________________________________________________________________________\n",
259 | "conv2_block2_2_conv (Conv2D) (None, 8, 8, 64) 36928 conv2_block2_1_relu[0][0] \n",
260 | "__________________________________________________________________________________________________\n",
261 | "conv2_block2_2_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block2_2_conv[0][0] \n",
262 | "__________________________________________________________________________________________________\n",
263 | "conv2_block2_2_relu (Activation (None, 8, 8, 64) 0 conv2_block2_2_bn[0][0] \n",
264 | "__________________________________________________________________________________________________\n",
265 | "conv2_block2_3_conv (Conv2D) (None, 8, 8, 256) 16640 conv2_block2_2_relu[0][0] \n",
266 | "__________________________________________________________________________________________________\n",
267 | "conv2_block2_3_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block2_3_conv[0][0] \n",
268 | "__________________________________________________________________________________________________\n",
269 | "conv2_block2_add (Add) (None, 8, 8, 256) 0 conv2_block1_out[0][0] \n",
270 | " conv2_block2_3_bn[0][0] \n",
271 | "__________________________________________________________________________________________________\n",
272 | "conv2_block2_out (Activation) (None, 8, 8, 256) 0 conv2_block2_add[0][0] \n",
273 | "__________________________________________________________________________________________________\n",
274 | "conv2_block3_1_conv (Conv2D) (None, 8, 8, 64) 16448 conv2_block2_out[0][0] \n",
275 | "__________________________________________________________________________________________________\n",
276 | "conv2_block3_1_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block3_1_conv[0][0] \n",
277 | "__________________________________________________________________________________________________\n",
278 | "conv2_block3_1_relu (Activation (None, 8, 8, 64) 0 conv2_block3_1_bn[0][0] \n",
279 | "__________________________________________________________________________________________________\n",
280 | "conv2_block3_2_conv (Conv2D) (None, 8, 8, 64) 36928 conv2_block3_1_relu[0][0] \n",
281 | "__________________________________________________________________________________________________\n",
282 | "conv2_block3_2_bn (BatchNormali (None, 8, 8, 64) 256 conv2_block3_2_conv[0][0] \n",
283 | "__________________________________________________________________________________________________\n",
284 | "conv2_block3_2_relu (Activation (None, 8, 8, 64) 0 conv2_block3_2_bn[0][0] \n",
285 | "__________________________________________________________________________________________________\n",
286 | "conv2_block3_3_conv (Conv2D) (None, 8, 8, 256) 16640 conv2_block3_2_relu[0][0] \n",
287 | "__________________________________________________________________________________________________\n",
288 | "conv2_block3_3_bn (BatchNormali (None, 8, 8, 256) 1024 conv2_block3_3_conv[0][0] \n",
289 | "__________________________________________________________________________________________________\n",
290 | "conv2_block3_add (Add) (None, 8, 8, 256) 0 conv2_block2_out[0][0] \n",
291 | " conv2_block3_3_bn[0][0] \n",
292 | "__________________________________________________________________________________________________\n",
293 | "conv2_block3_out (Activation) (None, 8, 8, 256) 0 conv2_block3_add[0][0] \n",
294 | "__________________________________________________________________________________________________\n",
295 | "conv3_block1_1_conv (Conv2D) (None, 4, 4, 128) 32896 conv2_block3_out[0][0] \n",
296 | "__________________________________________________________________________________________________\n",
297 | "conv3_block1_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block1_1_conv[0][0] \n",
298 | "__________________________________________________________________________________________________\n",
299 | "conv3_block1_1_relu (Activation (None, 4, 4, 128) 0 conv3_block1_1_bn[0][0] \n",
300 | "__________________________________________________________________________________________________\n",
301 | "conv3_block1_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block1_1_relu[0][0] \n",
302 | "__________________________________________________________________________________________________\n",
303 | "conv3_block1_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block1_2_conv[0][0] \n",
304 | "__________________________________________________________________________________________________\n",
305 | "conv3_block1_2_relu (Activation (None, 4, 4, 128) 0 conv3_block1_2_bn[0][0] \n",
306 | "__________________________________________________________________________________________________\n",
307 | "conv3_block1_0_conv (Conv2D) (None, 4, 4, 512) 131584 conv2_block3_out[0][0] \n",
308 | "__________________________________________________________________________________________________\n",
309 | "conv3_block1_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block1_2_relu[0][0] \n",
310 | "__________________________________________________________________________________________________\n",
311 | "conv3_block1_0_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block1_0_conv[0][0] \n",
312 | "__________________________________________________________________________________________________\n",
313 | "conv3_block1_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block1_3_conv[0][0] \n",
314 | "__________________________________________________________________________________________________\n",
315 | "conv3_block1_add (Add) (None, 4, 4, 512) 0 conv3_block1_0_bn[0][0] \n",
316 | " conv3_block1_3_bn[0][0] \n",
317 | "__________________________________________________________________________________________________\n",
318 | "conv3_block1_out (Activation) (None, 4, 4, 512) 0 conv3_block1_add[0][0] \n",
319 | "__________________________________________________________________________________________________\n",
320 | "conv3_block2_1_conv (Conv2D) (None, 4, 4, 128) 65664 conv3_block1_out[0][0] \n",
321 | "__________________________________________________________________________________________________\n",
322 | "conv3_block2_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block2_1_conv[0][0] \n",
323 | "__________________________________________________________________________________________________\n",
324 | "conv3_block2_1_relu (Activation (None, 4, 4, 128) 0 conv3_block2_1_bn[0][0] \n",
325 | "__________________________________________________________________________________________________\n",
326 | "conv3_block2_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block2_1_relu[0][0] \n",
327 | "__________________________________________________________________________________________________\n",
328 | "conv3_block2_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block2_2_conv[0][0] \n",
329 | "__________________________________________________________________________________________________\n",
330 | "conv3_block2_2_relu (Activation (None, 4, 4, 128) 0 conv3_block2_2_bn[0][0] \n",
331 | "__________________________________________________________________________________________________\n",
332 | "conv3_block2_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block2_2_relu[0][0] \n",
333 | "__________________________________________________________________________________________________\n",
334 | "conv3_block2_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block2_3_conv[0][0] \n",
335 | "__________________________________________________________________________________________________\n",
336 | "conv3_block2_add (Add) (None, 4, 4, 512) 0 conv3_block1_out[0][0] \n",
337 | " conv3_block2_3_bn[0][0] \n",
338 | "__________________________________________________________________________________________________\n",
339 | "conv3_block2_out (Activation) (None, 4, 4, 512) 0 conv3_block2_add[0][0] \n",
340 | "__________________________________________________________________________________________________\n",
341 | "conv3_block3_1_conv (Conv2D) (None, 4, 4, 128) 65664 conv3_block2_out[0][0] \n",
342 | "__________________________________________________________________________________________________\n",
343 | "conv3_block3_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block3_1_conv[0][0] \n",
344 | "__________________________________________________________________________________________________\n",
345 | "conv3_block3_1_relu (Activation (None, 4, 4, 128) 0 conv3_block3_1_bn[0][0] \n",
346 | "__________________________________________________________________________________________________\n",
347 | "conv3_block3_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block3_1_relu[0][0] \n",
348 | "__________________________________________________________________________________________________\n",
349 | "conv3_block3_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block3_2_conv[0][0] \n",
350 | "__________________________________________________________________________________________________\n",
351 | "conv3_block3_2_relu (Activation (None, 4, 4, 128) 0 conv3_block3_2_bn[0][0] \n",
352 | "__________________________________________________________________________________________________\n",
353 | "conv3_block3_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block3_2_relu[0][0] \n",
354 | "__________________________________________________________________________________________________\n",
355 | "conv3_block3_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block3_3_conv[0][0] \n",
356 | "__________________________________________________________________________________________________\n",
357 | "conv3_block3_add (Add) (None, 4, 4, 512) 0 conv3_block2_out[0][0] \n",
358 | " conv3_block3_3_bn[0][0] \n",
359 | "__________________________________________________________________________________________________\n",
360 | "conv3_block3_out (Activation) (None, 4, 4, 512) 0 conv3_block3_add[0][0] \n",
361 | "__________________________________________________________________________________________________\n",
362 | "conv3_block4_1_conv (Conv2D) (None, 4, 4, 128) 65664 conv3_block3_out[0][0] \n",
363 | "__________________________________________________________________________________________________\n",
364 | "conv3_block4_1_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block4_1_conv[0][0] \n",
365 | "__________________________________________________________________________________________________\n",
366 | "conv3_block4_1_relu (Activation (None, 4, 4, 128) 0 conv3_block4_1_bn[0][0] \n",
367 | "__________________________________________________________________________________________________\n",
368 | "conv3_block4_2_conv (Conv2D) (None, 4, 4, 128) 147584 conv3_block4_1_relu[0][0] \n",
369 | "__________________________________________________________________________________________________\n",
370 | "conv3_block4_2_bn (BatchNormali (None, 4, 4, 128) 512 conv3_block4_2_conv[0][0] \n",
371 | "__________________________________________________________________________________________________\n",
372 | "conv3_block4_2_relu (Activation (None, 4, 4, 128) 0 conv3_block4_2_bn[0][0] \n",
373 | "__________________________________________________________________________________________________\n",
374 | "conv3_block4_3_conv (Conv2D) (None, 4, 4, 512) 66048 conv3_block4_2_relu[0][0] \n",
375 | "__________________________________________________________________________________________________\n",
376 | "conv3_block4_3_bn (BatchNormali (None, 4, 4, 512) 2048 conv3_block4_3_conv[0][0] \n",
377 | "__________________________________________________________________________________________________\n",
378 | "conv3_block4_add (Add) (None, 4, 4, 512) 0 conv3_block3_out[0][0] \n",
379 | " conv3_block4_3_bn[0][0] \n",
380 | "__________________________________________________________________________________________________\n",
381 | "conv3_block4_out (Activation) (None, 4, 4, 512) 0 conv3_block4_add[0][0] \n",
382 | "__________________________________________________________________________________________________\n",
383 | "conv4_block1_1_conv (Conv2D) (None, 2, 2, 256) 131328 conv3_block4_out[0][0] \n",
384 | "__________________________________________________________________________________________________\n",
385 | "conv4_block1_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block1_1_conv[0][0] \n",
386 | "__________________________________________________________________________________________________\n",
387 | "conv4_block1_1_relu (Activation (None, 2, 2, 256) 0 conv4_block1_1_bn[0][0] \n",
388 | "__________________________________________________________________________________________________\n",
389 | "conv4_block1_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block1_1_relu[0][0] \n",
390 | "__________________________________________________________________________________________________\n",
391 | "conv4_block1_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block1_2_conv[0][0] \n",
392 | "__________________________________________________________________________________________________\n",
393 | "conv4_block1_2_relu (Activation (None, 2, 2, 256) 0 conv4_block1_2_bn[0][0] \n",
394 | "__________________________________________________________________________________________________\n",
395 | "conv4_block1_0_conv (Conv2D) (None, 2, 2, 1024) 525312 conv3_block4_out[0][0] \n",
396 | "__________________________________________________________________________________________________\n",
397 | "conv4_block1_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block1_2_relu[0][0] \n",
398 | "__________________________________________________________________________________________________\n",
399 | "conv4_block1_0_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block1_0_conv[0][0] \n",
400 | "__________________________________________________________________________________________________\n",
401 | "conv4_block1_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block1_3_conv[0][0] \n",
402 | "__________________________________________________________________________________________________\n",
403 | "conv4_block1_add (Add) (None, 2, 2, 1024) 0 conv4_block1_0_bn[0][0] \n",
404 | " conv4_block1_3_bn[0][0] \n",
405 | "__________________________________________________________________________________________________\n",
406 | "conv4_block1_out (Activation) (None, 2, 2, 1024) 0 conv4_block1_add[0][0] \n",
407 | "__________________________________________________________________________________________________\n",
408 | "conv4_block2_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block1_out[0][0] \n",
409 | "__________________________________________________________________________________________________\n",
410 | "conv4_block2_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block2_1_conv[0][0] \n",
411 | "__________________________________________________________________________________________________\n",
412 | "conv4_block2_1_relu (Activation (None, 2, 2, 256) 0 conv4_block2_1_bn[0][0] \n",
413 | "__________________________________________________________________________________________________\n",
414 | "conv4_block2_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block2_1_relu[0][0] \n",
415 | "__________________________________________________________________________________________________\n",
416 | "conv4_block2_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block2_2_conv[0][0] \n",
417 | "__________________________________________________________________________________________________\n",
418 | "conv4_block2_2_relu (Activation (None, 2, 2, 256) 0 conv4_block2_2_bn[0][0] \n",
419 | "__________________________________________________________________________________________________\n",
420 | "conv4_block2_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block2_2_relu[0][0] \n",
421 | "__________________________________________________________________________________________________\n",
422 | "conv4_block2_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block2_3_conv[0][0] \n",
423 | "__________________________________________________________________________________________________\n",
424 | "conv4_block2_add (Add) (None, 2, 2, 1024) 0 conv4_block1_out[0][0] \n",
425 | " conv4_block2_3_bn[0][0] \n",
426 | "__________________________________________________________________________________________________\n",
427 | "conv4_block2_out (Activation) (None, 2, 2, 1024) 0 conv4_block2_add[0][0] \n",
428 | "__________________________________________________________________________________________________\n",
429 | "conv4_block3_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block2_out[0][0] \n",
430 | "__________________________________________________________________________________________________\n",
431 | "conv4_block3_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block3_1_conv[0][0] \n",
432 | "__________________________________________________________________________________________________\n",
433 | "conv4_block3_1_relu (Activation (None, 2, 2, 256) 0 conv4_block3_1_bn[0][0] \n",
434 | "__________________________________________________________________________________________________\n",
435 | "conv4_block3_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block3_1_relu[0][0] \n",
436 | "__________________________________________________________________________________________________\n",
437 | "conv4_block3_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block3_2_conv[0][0] \n",
438 | "__________________________________________________________________________________________________\n",
439 | "conv4_block3_2_relu (Activation (None, 2, 2, 256) 0 conv4_block3_2_bn[0][0] \n",
440 | "__________________________________________________________________________________________________\n",
441 | "conv4_block3_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block3_2_relu[0][0] \n",
442 | "__________________________________________________________________________________________________\n",
443 | "conv4_block3_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block3_3_conv[0][0] \n",
444 | "__________________________________________________________________________________________________\n",
445 | "conv4_block3_add (Add) (None, 2, 2, 1024) 0 conv4_block2_out[0][0] \n",
446 | " conv4_block3_3_bn[0][0] \n",
447 | "__________________________________________________________________________________________________\n",
448 | "conv4_block3_out (Activation) (None, 2, 2, 1024) 0 conv4_block3_add[0][0] \n",
449 | "__________________________________________________________________________________________________\n",
450 | "conv4_block4_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block3_out[0][0] \n",
451 | "__________________________________________________________________________________________________\n",
452 | "conv4_block4_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block4_1_conv[0][0] \n",
453 | "__________________________________________________________________________________________________\n",
454 | "conv4_block4_1_relu (Activation (None, 2, 2, 256) 0 conv4_block4_1_bn[0][0] \n",
455 | "__________________________________________________________________________________________________\n",
456 | "conv4_block4_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block4_1_relu[0][0] \n",
457 | "__________________________________________________________________________________________________\n",
458 | "conv4_block4_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block4_2_conv[0][0] \n",
459 | "__________________________________________________________________________________________________\n",
460 | "conv4_block4_2_relu (Activation (None, 2, 2, 256) 0 conv4_block4_2_bn[0][0] \n",
461 | "__________________________________________________________________________________________________\n",
462 | "conv4_block4_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block4_2_relu[0][0] \n",
463 | "__________________________________________________________________________________________________\n",
464 | "conv4_block4_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block4_3_conv[0][0] \n",
465 | "__________________________________________________________________________________________________\n",
466 | "conv4_block4_add (Add) (None, 2, 2, 1024) 0 conv4_block3_out[0][0] \n",
467 | " conv4_block4_3_bn[0][0] \n",
468 | "__________________________________________________________________________________________________\n",
469 | "conv4_block4_out (Activation) (None, 2, 2, 1024) 0 conv4_block4_add[0][0] \n",
470 | "__________________________________________________________________________________________________\n",
471 | "conv4_block5_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block4_out[0][0] \n",
472 | "__________________________________________________________________________________________________\n",
473 | "conv4_block5_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block5_1_conv[0][0] \n",
474 | "__________________________________________________________________________________________________\n",
475 | "conv4_block5_1_relu (Activation (None, 2, 2, 256) 0 conv4_block5_1_bn[0][0] \n",
476 | "__________________________________________________________________________________________________\n",
477 | "conv4_block5_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block5_1_relu[0][0] \n",
478 | "__________________________________________________________________________________________________\n",
479 | "conv4_block5_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block5_2_conv[0][0] \n",
480 | "__________________________________________________________________________________________________\n",
481 | "conv4_block5_2_relu (Activation (None, 2, 2, 256) 0 conv4_block5_2_bn[0][0] \n",
482 | "__________________________________________________________________________________________________\n",
483 | "conv4_block5_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block5_2_relu[0][0] \n",
484 | "__________________________________________________________________________________________________\n",
485 | "conv4_block5_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block5_3_conv[0][0] \n",
486 | "__________________________________________________________________________________________________\n",
487 | "conv4_block5_add (Add) (None, 2, 2, 1024) 0 conv4_block4_out[0][0] \n",
488 | " conv4_block5_3_bn[0][0] \n",
489 | "__________________________________________________________________________________________________\n",
490 | "conv4_block5_out (Activation) (None, 2, 2, 1024) 0 conv4_block5_add[0][0] \n",
491 | "__________________________________________________________________________________________________\n",
492 | "conv4_block6_1_conv (Conv2D) (None, 2, 2, 256) 262400 conv4_block5_out[0][0] \n",
493 | "__________________________________________________________________________________________________\n",
494 | "conv4_block6_1_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block6_1_conv[0][0] \n",
495 | "__________________________________________________________________________________________________\n",
496 | "conv4_block6_1_relu (Activation (None, 2, 2, 256) 0 conv4_block6_1_bn[0][0] \n",
497 | "__________________________________________________________________________________________________\n",
498 | "conv4_block6_2_conv (Conv2D) (None, 2, 2, 256) 590080 conv4_block6_1_relu[0][0] \n",
499 | "__________________________________________________________________________________________________\n",
500 | "conv4_block6_2_bn (BatchNormali (None, 2, 2, 256) 1024 conv4_block6_2_conv[0][0] \n",
501 | "__________________________________________________________________________________________________\n",
502 | "conv4_block6_2_relu (Activation (None, 2, 2, 256) 0 conv4_block6_2_bn[0][0] \n",
503 | "__________________________________________________________________________________________________\n",
504 | "conv4_block6_3_conv (Conv2D) (None, 2, 2, 1024) 263168 conv4_block6_2_relu[0][0] \n",
505 | "__________________________________________________________________________________________________\n",
506 | "conv4_block6_3_bn (BatchNormali (None, 2, 2, 1024) 4096 conv4_block6_3_conv[0][0] \n",
507 | "__________________________________________________________________________________________________\n",
508 | "conv4_block6_add (Add) (None, 2, 2, 1024) 0 conv4_block5_out[0][0] \n",
509 | " conv4_block6_3_bn[0][0] \n",
510 | "__________________________________________________________________________________________________\n",
511 | "conv4_block6_out (Activation) (None, 2, 2, 1024) 0 conv4_block6_add[0][0] \n",
512 | "__________________________________________________________________________________________________\n",
513 | "conv5_block1_1_conv (Conv2D) (None, 1, 1, 512) 524800 conv4_block6_out[0][0] \n",
514 | "__________________________________________________________________________________________________\n",
515 | "conv5_block1_1_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block1_1_conv[0][0] \n",
516 | "__________________________________________________________________________________________________\n",
517 | "conv5_block1_1_relu (Activation (None, 1, 1, 512) 0 conv5_block1_1_bn[0][0] \n",
518 | "__________________________________________________________________________________________________\n",
519 | "conv5_block1_2_conv (Conv2D) (None, 1, 1, 512) 2359808 conv5_block1_1_relu[0][0] \n",
520 | "__________________________________________________________________________________________________\n",
521 | "conv5_block1_2_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block1_2_conv[0][0] \n",
522 | "__________________________________________________________________________________________________\n",
523 | "conv5_block1_2_relu (Activation (None, 1, 1, 512) 0 conv5_block1_2_bn[0][0] \n",
524 | "__________________________________________________________________________________________________\n",
525 | "conv5_block1_0_conv (Conv2D) (None, 1, 1, 2048) 2099200 conv4_block6_out[0][0] \n",
526 | "__________________________________________________________________________________________________\n",
527 | "conv5_block1_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 conv5_block1_2_relu[0][0] \n",
528 | "__________________________________________________________________________________________________\n",
529 | "conv5_block1_0_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block1_0_conv[0][0] \n",
530 | "__________________________________________________________________________________________________\n",
531 | "conv5_block1_3_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block1_3_conv[0][0] \n",
532 | "__________________________________________________________________________________________________\n",
533 | "conv5_block1_add (Add) (None, 1, 1, 2048) 0 conv5_block1_0_bn[0][0] \n",
534 | " conv5_block1_3_bn[0][0] \n",
535 | "__________________________________________________________________________________________________\n",
536 | "conv5_block1_out (Activation) (None, 1, 1, 2048) 0 conv5_block1_add[0][0] \n",
537 | "__________________________________________________________________________________________________\n",
538 | "conv5_block2_1_conv (Conv2D) (None, 1, 1, 512) 1049088 conv5_block1_out[0][0] \n",
539 | "__________________________________________________________________________________________________\n",
540 | "conv5_block2_1_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block2_1_conv[0][0] \n",
541 | "__________________________________________________________________________________________________\n",
542 | "conv5_block2_1_relu (Activation (None, 1, 1, 512) 0 conv5_block2_1_bn[0][0] \n",
543 | "__________________________________________________________________________________________________\n",
544 | "conv5_block2_2_conv (Conv2D) (None, 1, 1, 512) 2359808 conv5_block2_1_relu[0][0] \n",
545 | "__________________________________________________________________________________________________\n",
546 | "conv5_block2_2_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block2_2_conv[0][0] \n",
547 | "__________________________________________________________________________________________________\n",
548 | "conv5_block2_2_relu (Activation (None, 1, 1, 512) 0 conv5_block2_2_bn[0][0] \n",
549 | "__________________________________________________________________________________________________\n",
550 | "conv5_block2_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 conv5_block2_2_relu[0][0] \n",
551 | "__________________________________________________________________________________________________\n",
552 | "conv5_block2_3_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block2_3_conv[0][0] \n",
553 | "__________________________________________________________________________________________________\n",
554 | "conv5_block2_add (Add) (None, 1, 1, 2048) 0 conv5_block1_out[0][0] \n",
555 | " conv5_block2_3_bn[0][0] \n",
556 | "__________________________________________________________________________________________________\n",
557 | "conv5_block2_out (Activation) (None, 1, 1, 2048) 0 conv5_block2_add[0][0] \n",
558 | "__________________________________________________________________________________________________\n",
559 | "conv5_block3_1_conv (Conv2D) (None, 1, 1, 512) 1049088 conv5_block2_out[0][0] \n",
560 | "__________________________________________________________________________________________________\n",
561 | "conv5_block3_1_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block3_1_conv[0][0] \n",
562 | "__________________________________________________________________________________________________\n",
563 | "conv5_block3_1_relu (Activation (None, 1, 1, 512) 0 conv5_block3_1_bn[0][0] \n",
564 | "__________________________________________________________________________________________________\n",
565 | "conv5_block3_2_conv (Conv2D) (None, 1, 1, 512) 2359808 conv5_block3_1_relu[0][0] \n",
566 | "__________________________________________________________________________________________________\n",
567 | "conv5_block3_2_bn (BatchNormali (None, 1, 1, 512) 2048 conv5_block3_2_conv[0][0] \n",
568 | "__________________________________________________________________________________________________\n",
569 | "conv5_block3_2_relu (Activation (None, 1, 1, 512) 0 conv5_block3_2_bn[0][0] \n",
570 | "__________________________________________________________________________________________________\n",
571 | "conv5_block3_3_conv (Conv2D) (None, 1, 1, 2048) 1050624 conv5_block3_2_relu[0][0] \n",
572 | "__________________________________________________________________________________________________\n",
573 | "conv5_block3_3_bn (BatchNormali (None, 1, 1, 2048) 8192 conv5_block3_3_conv[0][0] \n",
574 | "__________________________________________________________________________________________________\n",
575 | "conv5_block3_add (Add) (None, 1, 1, 2048) 0 conv5_block2_out[0][0] \n",
576 | " conv5_block3_3_bn[0][0] \n",
577 | "__________________________________________________________________________________________________\n",
578 | "conv5_block3_out (Activation) (None, 1, 1, 2048) 0 conv5_block3_add[0][0] \n",
579 | "__________________________________________________________________________________________________\n",
580 | "avg_pool (GlobalAveragePooling2 (None, 2048) 0 conv5_block3_out[0][0] \n",
581 | "==================================================================================================\n",
582 | "Total params: 23,587,712\n",
583 | "Trainable params: 23,534,592\n",
584 | "Non-trainable params: 53,120\n",
585 | "__________________________________________________________________________________________________\n"
586 | ],
587 | "name": "stdout"
588 | }
589 | ]
590 | },
591 | {
592 | "cell_type": "code",
593 | "metadata": {
594 | "colab": {
595 | "base_uri": "https://localhost:8080/"
596 | },
597 | "id": "JTVzf85zd6fF",
598 | "outputId": "bf1822ea-43ab-4694-ba10-3338babdf12e"
599 | },
600 | "source": [
601 | "# defining the model for transfer learning with 10 different output classes \n",
602 | "num_classes = 10\n",
603 | "model_resnet.trainable = False # freezing the weights in model_resnet (these weights will not be updated during training)\n",
604 | "inputs = Input(shape=(32,32,3))\n",
605 | "x = model_resnet(inputs, training=False)\n",
606 | "outputs = Dense(units=num_classes, activation='softmax')(x)\n",
607 | "model = Model(inputs=inputs, outputs=outputs)\n",
608 | "model.summary()"
609 | ],
610 | "execution_count": 10,
611 | "outputs": [
612 | {
613 | "output_type": "stream",
614 | "text": [
615 | "Model: \"model\"\n",
616 | "_________________________________________________________________\n",
617 | "Layer (type) Output Shape Param # \n",
618 | "=================================================================\n",
619 | "input_2 (InputLayer) [(None, 32, 32, 3)] 0 \n",
620 | "_________________________________________________________________\n",
621 | "resnet50 (Functional) (None, 2048) 23587712 \n",
622 | "_________________________________________________________________\n",
623 | "dense (Dense) (None, 10) 20490 \n",
624 | "=================================================================\n",
625 | "Total params: 23,608,202\n",
626 | "Trainable params: 20,490\n",
627 | "Non-trainable params: 23,587,712\n",
628 | "_________________________________________________________________\n"
629 | ],
630 | "name": "stdout"
631 | }
632 | ]
633 | },
634 | {
635 | "cell_type": "code",
636 | "metadata": {
637 | "id": "k7gT0FjofIFf"
638 | },
639 | "source": [
640 | "# compiling the model \n",
641 | "model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='accuracy')"
642 | ],
643 | "execution_count": 11,
644 | "outputs": []
645 | },
646 | {
647 | "cell_type": "code",
648 | "metadata": {
649 | "colab": {
650 | "base_uri": "https://localhost:8080/"
651 | },
652 | "id": "DaU1wI1gfQ3O",
653 | "outputId": "879d7cae-38a3-46dd-af9e-582c19776065"
654 | },
655 | "source": [
656 | "# training the model \n",
657 | "H = model.fit(x_train_pp, y_train, validation_split=0.1, epochs=10, verbose=2)"
658 | ],
659 | "execution_count": 12,
660 | "outputs": [
661 | {
662 | "output_type": "stream",
663 | "text": [
664 | "Epoch 1/10\n",
665 | "1407/1407 - 69s - loss: 1.5887 - accuracy: 0.5685 - val_loss: 1.3398 - val_accuracy: 0.6276\n",
666 | "Epoch 2/10\n",
667 | "1407/1407 - 34s - loss: 1.2304 - accuracy: 0.6419 - val_loss: 1.3710 - val_accuracy: 0.6300\n",
668 | "Epoch 3/10\n",
669 | "1407/1407 - 34s - loss: 1.1648 - accuracy: 0.6585 - val_loss: 1.3614 - val_accuracy: 0.6254\n",
670 | "Epoch 4/10\n",
671 | "1407/1407 - 34s - loss: 1.1258 - accuracy: 0.6690 - val_loss: 1.4487 - val_accuracy: 0.6216\n",
672 | "Epoch 5/10\n",
673 | "1407/1407 - 34s - loss: 1.1015 - accuracy: 0.6775 - val_loss: 1.4542 - val_accuracy: 0.6178\n",
674 | "Epoch 6/10\n",
675 | "1407/1407 - 34s - loss: 1.1143 - accuracy: 0.6785 - val_loss: 1.4831 - val_accuracy: 0.6294\n",
676 | "Epoch 7/10\n",
677 | "1407/1407 - 34s - loss: 1.0876 - accuracy: 0.6824 - val_loss: 1.4893 - val_accuracy: 0.6412\n",
678 | "Epoch 8/10\n",
679 | "1407/1407 - 34s - loss: 1.0887 - accuracy: 0.6838 - val_loss: 1.6309 - val_accuracy: 0.6074\n",
680 | "Epoch 9/10\n",
681 | "1407/1407 - 34s - loss: 1.0838 - accuracy: 0.6880 - val_loss: 1.5115 - val_accuracy: 0.6274\n",
682 | "Epoch 10/10\n",
683 | "1407/1407 - 34s - loss: 1.0830 - accuracy: 0.6887 - val_loss: 1.5944 - val_accuracy: 0.6218\n"
684 | ],
685 | "name": "stdout"
686 | }
687 | ]
688 | },
689 | {
690 | "cell_type": "code",
691 | "metadata": {
692 | "colab": {
693 | "base_uri": "https://localhost:8080/",
694 | "height": 581
695 | },
696 | "id": "P5dxn52RfdEL",
697 | "outputId": "e295538f-5262-46e3-c05c-38865247f8b9"
698 | },
699 | "source": [
700 | "# plotting the training and validation set loss and accuracy results\n",
701 | "plt.style.use('ggplot')\n",
702 | "plt.figure()\n",
703 | "num_epochs=10\n",
704 | "epochs = np.arange(num_epochs)\n",
705 | "train_loss = H.history['loss']\n",
706 | "val_loss = H.history['val_loss']\n",
707 | "plt.plot(epochs, train_loss, label='train loss')\n",
708 | "plt.plot(epochs, val_loss, label='val_loss')\n",
709 | "plt.title('training and testing loss')\n",
710 | "plt.legend()\n",
711 | "plt.xlabel('#epochs')\n",
712 | "plt.ylabel('loss')\n",
713 | "\n",
714 | "plt.figure()\n",
715 | "train_acc = H.history['accuracy']\n",
716 | "val_acc = H.history['val_accuracy']\n",
717 | "plt.plot(epochs, train_acc, label='train accuracy')\n",
718 | "plt.plot(epochs, val_acc, label='val_accuracy')\n",
719 | "plt.title('training and testing accuracy')\n",
720 | "plt.legend()\n",
721 | "plt.xlabel('#epochs')\n",
722 | "plt.ylabel('accuracy')\n",
723 | "\n",
724 | "plt.show()\n"
725 | ],
726 | "execution_count": 16,
727 | "outputs": [
728 | {
729 | "output_type": "display_data",
730 | "data": {
731 | "image/png": "\n",
732 | "text/plain": [
733 | ""
734 | ]
735 | },
736 | "metadata": {
737 | "tags": []
738 | }
739 | },
740 | {
741 | "output_type": "display_data",
742 | "data": {
743 | "image/png": "\n",
744 | "text/plain": [
745 | ""
746 | ]
747 | },
748 | "metadata": {
749 | "tags": []
750 | }
751 | }
752 | ]
753 | },
754 | {
755 | "cell_type": "code",
756 | "metadata": {
757 | "colab": {
758 | "base_uri": "https://localhost:8080/"
759 | },
760 | "id": "nMBd-COFf_yI",
761 | "outputId": "8cef5860-2e2b-4b8e-b3bb-d780e7962fd7"
762 | },
763 | "source": [
764 | "# evaluating the model on the test set \n",
765 | "model.evaluate(x_test_pp, y_test)"
766 | ],
767 | "execution_count": 17,
768 | "outputs": [
769 | {
770 | "output_type": "stream",
771 | "text": [
772 | "313/313 [==============================] - 8s 26ms/step - loss: 1.6533 - accuracy: 0.6125\n"
773 | ],
774 | "name": "stdout"
775 | },
776 | {
777 | "output_type": "execute_result",
778 | "data": {
779 | "text/plain": [
780 | "[1.653288722038269, 0.612500011920929]"
781 | ]
782 | },
783 | "metadata": {
784 | "tags": []
785 | },
786 | "execution_count": 17
787 | }
788 | ]
789 | }
790 | ]
791 | }
--------------------------------------------------------------------------------
/TransferLearningResnet/cifar10_img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/TransferLearningResnet/cifar10_img.png
--------------------------------------------------------------------------------
/VideoActivityRecognition3DResnet/Readme.md:
--------------------------------------------------------------------------------
1 | # Performing activity recognition in videos using a pre-trained 3D ResNet model
2 | 1. Pre-trained model used for recognition is from https://github.com/kenshohara
3 | 2. Model can recognize 400 different activities
4 | 3. Running the code notebook requires 3 files - pretrained model file, class names text file, sample video file . These files are automatically downloaded from my google drive on running the associated ipynb code file
5 | 4. Link to medium blog post with more details
6 |
7 | 
8 |
9 | ## Programming language and Libraries used
10 | 1. Python programming language
11 | 2. OpenCV library
12 | 3. Other Python libraries including numpy, etc
13 |
14 |
--------------------------------------------------------------------------------
/VideoActivityRecognition3DResnet/img_activity_recognition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasugupta9/DeepLearningProjects/f6396195d04cf91b59d1b78bddac27ba0a607119/VideoActivityRecognition3DResnet/img_activity_recognition.jpg
--------------------------------------------------------------------------------