├── .gitignore
├── BiCycleGAN_Toward_Multimodal_Image-to-Image_Translation.ipynb
├── CycleGAN_From_Scratch_PyTorch_FINAL_Entire_NB.ipynb
├── DCGAN-from-Scratch-with-PyTorch
    ├── DCGAN_ONLY_train.ipynb
    ├── Readme.md
    ├── dcgan.py
    ├── train.py
    └── utils.py
├── DCGAN_Generator_Function_Understanding_Filter_Size_and_Input_Shape.ipynb
├── DCGAN_Tensorflow_Celeb_A_Dataset.ipynb
├── GoogLeNet_Incepton-v1-PyTorch.ipynb
├── LeNet5_PyTorch.ipynb
├── README.md
├── ResNet56_PyTorch.ipynb
├── Unet_Brain_segmentation_unet_with_keras
    ├── brain_segmentation_unet_with_keras.ipynb
    ├── unet.py
    └── utils.py
├── WGAN_Pytorch_From_Scratch_Full_Notebook.ipynb
└── assets
    ├── 2.png
    ├── 3.png
    ├── Youtube_Cover.jpg
    └── yt_logo.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # See https://help.github.com/ignore-files/ for more about ignoring files.
  2 | 
  3 | # Below for all Kaggle and Other Dataset, as they will all be inside input
  4 | # On 22-April, have put all the data inside input
  5 | input/
  6 | *input
  7 | /*input
  8 | */*input
  9 | **/*input
 10 | /input
 11 | /input/*
 12 | */input/*
 13 | 
 14 | 
 15 | .vscode/
 16 | *.vscode
 17 | /*.vscode
 18 | */*.vscode
 19 | **/*.vscode
 20 | /.vscode
 21 | /.vscode/*
 22 | */.vscode/*
 23 | 
 24 | input_dataset/
 25 | *input_dataset
 26 | /*input_dataset
 27 | */*input_dataset
 28 | **/*input_dataset
 29 | /input_dataset
 30 | /input_dataset/*
 31 | */input_dataset/*
 32 | 
 33 | 
 34 | *input_dataset_2
 35 | /*input_dataset_2
 36 | */*input_dataset_2
 37 | **/*input_dataset_2
 38 | /input_dataset_2
 39 | /input_dataset_2/*
 40 | */input_dataset_2/*
 41 | 
 42 | # ABOVE ONLY FOR THIS REPO AS I KEPT ALL DATASET UNDER THE ABOVE FOLDER NAME
 43 | 
 44 | node_modules/
 45 | Colab_Model_Download/
 46 | wandb/
 47 | mlruns
 48 | # The above is the MOST EFFECTIVE ONE as per
 49 | # https://stackoverflow.com/a/1470664/1902852
 50 | # The way to ignore all directories called "node_modules"
 51 | # anywhere below the current level in a directory tree
 52 | 
 53 | # Further if I have already pushed a directory to remote then remove that with below
 54 | # Execute a folder remove (rm) from index only (--cached) recursivelly (-r). Command line example for root bin folder:
 55 | # `git rm -r --cached full_absolute_path`
 56 | # e.g. I had to run the below kind of command to remove the ".next" folder that was pushed to remote github
 57 | # passing the full path of the .next folder
 58 | # git rm -r --cached /media/veracrypt2/014-agency-classic-next/.next
 59 | 
 60 | 
 61 | # See https://help.github.com/ignore-files/ for more about ignoring files.
 62 | 
 63 | HF_Models/
 64 | 
 65 | *HF_Models
 66 | /*HF_Models
 67 | */*HF_Models
 68 | **/*HF_Models
 69 | /HF_Models
 70 | /HF_Models/*
 71 | 
 72 | node_modules/
 73 | # The above is the MOST EFFECTIVE ONE as per
 74 | # https://stackoverflow.com/a/1470664/1902852
 75 | # The way to ignore all directories called "node_modules"
 76 | # anywhere below the current level in a directory tree
 77 | 
 78 | *node_modules
 79 | /*node_modules
 80 | */*node_modules
 81 | **/*node_modules
 82 | /node_modules
 83 | /node_modules/*
 84 | */node_modules/*in
 85 | 
 86 | # If you have performed a task, such as adding a new line item to your .gitignore file, I MUST need to clear out your git repo's cache in order for the changes to take place. Here are the commands for doing that:
 87 | 
 88 | # git rm -r --cached . && git add . && git commit -am 'git cache cleared' && git push
 89 | 
 90 | __MACOSX/
 91 | *__MACOSX
 92 | /*__MACOSX
 93 | */*__MACOSX
 94 | **/*__MACOSXc
 95 | /__MACOSX
 96 | /__MACOSX/*
 97 | */__MACOSX/*
 98 | 
 99 | __pycache__/
100 | *__pycache__
101 | /*__pycache__
102 | */*__pycache__
103 | **/*__pycache__c
104 | /__pycache__
105 | /__pycache__/*
106 | */__pycache__/*
107 | 
108 | .ipynb_checkpoints/
109 | *.ipynb_checkpoints
110 | /*.ipynb_checkpoints
111 | */*.ipynb_checkpoints
112 | **/*.ipynb_checkpointsc
113 | /.ipynb_checkpoints
114 | /.ipynb_checkpoints/*
115 | */.ipynb_checkpoints/*
116 | 
117 | -checkpoint.ipynb/
118 | *-checkpoint.ipynb
119 | /*-checkpoint.ipynb
120 | */*-checkpoint.ipynb
121 | **/*-checkpoint.ipynb
122 | /-checkpoint.ipynb
123 | 
124 | .h5/
125 | *.h5
126 | /*.h5
127 | */*.h5
128 | **/*.h5
129 | /.h5
130 | 
131 | .pyc/
132 | *.pyc
133 | /*.pyc
134 | */*.pyc
135 | **/*.pyc
136 | /.pyc
137 | 
138 | 
139 | .bin/
140 | *.bin
141 | /*.bin
142 | */*.bin
143 | **/*.bin
144 | /.bin
145 | 
146 | .json/
147 | *.json
148 | /*.json
149 | */*.json
150 | **/*.json
151 | /.json
152 | 
153 | .next/
154 | *.next
155 | /*.next
156 | */*.next
157 | **/*.next
158 | /.next
159 | /.next/*
160 | */.next/*
161 | 
162 | 
163 | .npy/
164 | *.npy
165 | /*.npy
166 | */*.npy
167 | **/*.npy
168 | /.npy
169 | 
170 | # testing
171 | coverage/
172 | *coverage
173 | /*coverage
174 | */*coverage
175 | **/*coverage
176 | /coverage
177 | /coverage/*
178 | */coverage/*
179 | 
180 | # production
181 | build/
182 | *build
183 | /*build
184 | */*build
185 | **/*build
186 | /buildgs
187 | /build/*
188 | */build/*
189 | 
190 | 
191 | .db
192 | .db/
193 | *.db
194 | /*.db
195 | */*.db
196 | **/*.db
197 | /.db
198 | 
199 | 
200 | .pkl
201 | .pkl/
202 | *.pkl
203 | /*.pkl
204 | */*.pkl
205 | **/*.pkl
206 | /.pkl
207 | 
208 | .hdf5
209 | .hdf5/
210 | *.hdf5
211 | /*.hdf5
212 | */*.hdf5
213 | **/*.hdf5
214 | /.hdf5
215 | 
216 | .pt
217 | .pt/
218 | *.pt
219 | /*.pt
220 | */*.pt
221 | **/*.pt
222 | /.pt
223 | 
224 | 
225 | .pyc
226 | .pyc/
227 | *.pyc
228 | /*.pyc
229 | */*.pyc
230 | **/*.pyc
231 | /.pyc
232 | 
233 | 
234 | .txt
235 | *.txt
236 | /*.txt
237 | */*.txt
238 | **/*.txt
239 | /.txt
240 | 
241 | 
242 | .csv
243 | *.csv
244 | /*.csv
245 | */*.csv
246 | **/*.csv
247 | /.csv
248 | 
249 | .index
250 | *.index
251 | /*.index
252 | */*.index
253 | **/*.index
254 | /.index
255 | 
256 | .mp4/
257 | *.mp4
258 | /*.mp4
259 | */*.mp4
260 | **/*.mp4
261 | /.mp4
262 | 
263 | .srt/
264 | *.srt
265 | /*.srt
266 | */*.srt
267 | **/*.srt
268 | /.srt
269 | 
270 | glove_vectors
271 | *glove_vectors
272 | /glove_vectors
273 | */*glove_vectors
274 | **/*glove_vectors
275 | /glove_vectors
276 | 
277 | 
278 | 
279 | .zip
280 | *.zip
281 | /*.zip
282 | */*.zip
283 | **/*.zip
284 | /.zip
285 | 
286 | 
287 | .gz
288 | *.gz
289 | /*.gz
290 | */*.gz
291 | **/*.gz
292 | /.gz
293 | 
294 | 
295 | # Commenting out all .png files as I would need them for image pasting in .md files / notes
296 | #.png/
297 | #*.png
298 | #/*.png
299 | #*/*.png
300 | #**/*.png
301 | #/.png
302 | 
303 | 
304 | *.jpg
305 | /*.jpg
306 | */*.jpg
307 | **/*.jpg
308 | /.jpg
309 | 
310 | *.jpeg
311 | /*.jpeg
312 | */*.jpeg
313 | **/*.jpeg
314 | /.jpeg
315 | 
316 | *.rar
317 | /*.rar
318 | */*.rar
319 | **/*.rar
320 | /.rar
321 | 
322 | 
323 | .tgz/
324 | *.tgz
325 | /*.tgz
326 | */*.tgz
327 | **/*.tgz
328 | /.tgz
329 | 
330 | .tar/
331 | *.tar
332 | /*.tar
333 | */*.tar
334 | **/*.tar
335 | /.tar
336 | 
337 | *.7z
338 | /*.7z
339 | */*.7z
340 | **/*.7z
341 | /.7z
342 | 
343 | .dcm/
344 | *.dcm
345 | /*.dcm
346 | */*.dcm
347 | **/*.dcm
348 | /.dcm
349 | 
350 | .tiff/
351 | *.tiff
352 | /*.tiff
353 | */*.tiff
354 | **/*.tiff
355 | /.tiff
356 | 
357 | .nii/
358 | *.nii
359 | /*.nii
360 | */*.nii
361 | **/*.nii
362 | /.nii
363 | 
364 | LARGE_Datasets
365 | */LARGE_Datasets
366 | */LARGE_Datasets/**
367 | **/LARGE_Datasets/**
368 | 
369 | /Others_Code_gitignore
370 | /Others_Code_gitignore/*
371 | */Others_Code_gitignore/*
372 | 
373 | /YouTube_Experiments_Scripts
374 | /YouTube_Experiments_Scripts/*
375 | */YouTube_Experiments_Scripts/*
376 | 
377 | # *** END OF FILES SPECIFIC TO ML Projects ****
378 | 
379 | 
380 | # misc
381 | DS_Store/
382 | .DS_Store
383 | .env.local
384 | .env.development.local
385 | .env.test.local
386 | .env.production.local
387 | 
388 | npm-debug.log*
389 | yarn-debug.log*
390 | yarn-error.log*
391 | 
392 | # Ignore docs files
393 | _gh_pages
394 | .ruby-version
395 | 
396 | # Numerous always-ignore extensions
397 | *.diff
398 | *.err
399 | *.orig
400 | *.log
401 | *.rej
402 | *.swo
403 | *.swp
404 | *.zip
405 | *.vi
406 | *~
407 | *.~lock*
408 | .~lock*
409 | 
410 | # OS or Editor folders
411 | .DS_Store
412 | ._*
413 | Thumbs.db
414 | .cache
415 | .project
416 | .settings
417 | .tmproj
418 | *.esproj
419 | nbproject
420 | *.sublime-project
421 | *.sublime-workspace
422 | .idea
423 | 
424 | # Komodo
425 | *.komodoproject
426 | .komodotools
427 | 
428 | # grunt-html-validation
429 | validation-status.json
430 | validation-report.json
431 | 
432 | 
433 | # Ignore all logfiles and tempfiles.
434 | !/log/.keep
435 | /tmp
436 | /.gems
437 | 
438 | CountDownTimer-Note.odt
439 | random-code-1.js
440 | random-code-2.js
441 | random-code-3.js
442 | performance-1.js
443 | 
444 | test.html
445 | test1.html
446 | test2.html
447 | test3.html
448 | 
449 | #ignore file name ending in "-bkp.js" OR "-bkp.ts"  OR "-bkp.py"  or "-test.js" OR "-test.ts" in its name. So I will have to put "-test.js" at all files that is just for my development-time random testing code .
450 | **/*-bkp.js
451 | **/*-bkp.ts
452 | **/*-bkp.py
453 | **/*-test.js
454 | **/*-test.ts
455 | **/*-test.py
456 | **/*-test.ipynb
457 | **/*-test.md
458 | **/*-test.json
459 | 
460 | # OS or Editor folders
461 | .DS_Store
462 | ._*
463 | Thumbs.db
464 | .cache
465 | .project
466 | .settings
467 | .tmproj
468 | *.esproj
469 | nbproject
470 | *.sublime-project
471 | *.sublime-workspace
472 | .idea
473 | node_modules
474 | Others_Code_gitignore
475 | Project-Note-PAUL
476 | .vscode
477 | 
478 | # Local Netlify folder
479 | .netlify
480 | 


--------------------------------------------------------------------------------
/BiCycleGAN_Toward_Multimodal_Image-to-Image_Translation.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "markdown",
   5 |       "metadata": {},
   6 |       "source": [
   7 |         "## BiCycleGAN - Paper - Toward Multimodal Image-to-Image Translation - PyTorch Implementation from Scratch\n",
   8 |         "\n",
   9 |         "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=iCXruj3slIk&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=17)\n",
  10 |         "\n",
  11 |         "[![Imgur](https://imgur.com/n7xTxVm.png)](https://www.youtube.com/watch?v=iCXruj3slIk&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=17)\n"
  12 |       ]
  13 |     },
  14 |     {
  15 |       "cell_type": "markdown",
  16 |       "metadata": {},
  17 |       "source": [
  18 |         "### 👉 The Maps - Dataset link - http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/\n",
  19 |         "\n",
  20 |         "## About the Satellite to Map Image Translation Dataset\n",
  21 |         "\n",
  22 |         "This is a dataset comprised of satellite images of New York and their corresponding Google maps pages. The image translation problem involves converting satellite photos to Google maps format, or the reverse, Google maps images to Satellite photos.\n",
  23 |         "\n",
  24 |         "The dataset is provided on the pix2pix website and can be downloaded as a 255-megabyte zip file.\n",
  25 |         "\n",
  26 |         "maps\n",
  27 |         "├── train\n",
  28 |         "└── val\n",
  29 |         "\n",
  30 |         "The train folder contains 1,097 images, whereas the validation dataset contains 1,099 images.\n",
  31 |         "\n",
  32 |         "Images have a digit filename and are in JPEG format. Each image is 1,200 pixels wide and 600 pixels tall and contains both the satellite image on the left and the Google maps image on the right."
  33 |       ]
  34 |     },
  35 |     {
  36 |       "cell_type": "code",
  37 |       "execution_count": null,
  38 |       "metadata": {
  39 |         "colab": {
  40 |           "base_uri": "https://localhost:8080/"
  41 |         },
  42 |         "id": "jjIvEuipGqxu",
  43 |         "outputId": "fa2c5913-6374-4a01-d292-e4c85d1ca3e9"
  44 |       },
  45 |       "outputs": [],
  46 |       "source": [
  47 |         "!pip install icecream\n",
  48 |         "import os\n",
  49 |         "import numpy as np\n",
  50 |         "import math\n",
  51 |         "import itertools\n",
  52 |         "import scipy\n",
  53 |         "import sys\n",
  54 |         "import time\n",
  55 |         "import datetime\n",
  56 |         "\n",
  57 |         "import torchvision.transforms as transforms\n",
  58 |         "from torchvision.utils import save_image\n",
  59 |         "\n",
  60 |         "from torch.utils.data import DataLoader\n",
  61 |         "from torchvision import datasets\n",
  62 |         "from torch.autograd import Variable\n",
  63 |         "import torch.autograd as autograd\n",
  64 |         "from torchvision.utils import make_grid\n",
  65 |         "\n",
  66 |         "import torch.nn as nn\n",
  67 |         "import torch.nn.functional as F\n",
  68 |         "import torch\n",
  69 |         "\n",
  70 |         "import matplotlib.pyplot as plt\n",
  71 |         "from matplotlib.pyplot import figure\n",
  72 |         "from IPython.display import clear_output\n",
  73 |         "from icecream import ic"
  74 |       ]
  75 |     },
  76 |     {
  77 |       "cell_type": "code",
  78 |       "execution_count": 48,
  79 |       "metadata": {
  80 |         "colab": {
  81 |           "base_uri": "https://localhost:8080/"
  82 |         },
  83 |         "id": "UokJxbOYHuAi",
  84 |         "outputId": "d98cf4ea-3cba-411d-a329-e89ef1f48bf0"
  85 |       },
  86 |       "outputs": [
  87 |         {
  88 |           "name": "stdout",
  89 |           "output_type": "stream",
  90 |           "text": [
  91 |             "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
  92 |           ]
  93 |         }
  94 |       ],
  95 |       "source": [
  96 |         "from google.colab import drive\n",
  97 |         "drive.mount('/content/drive')"
  98 |       ]
  99 |     },
 100 |     {
 101 |       "cell_type": "code",
 102 |       "execution_count": 50,
 103 |       "metadata": {
 104 |         "id": "rVBTY_4wwfbq"
 105 |       },
 106 |       "outputs": [],
 107 |       "source": [
 108 |         "class Hyperparameters(object):\n",
 109 |         "  def __init__(self, **kwargs):\n",
 110 |         "    self.__dict__.update(kwargs)\n",
 111 |         "\n",
 112 |         "hp = Hyperparameters(\n",
 113 |         "    epoch=0,\n",
 114 |         "    n_epochs=200,\n",
 115 |         "    batch_size=8,        \n",
 116 |         "    dataset_train_mode=\"train\",\n",
 117 |         "    dataset_test_mode=\"val\",    \n",
 118 |         "    lr=.0002,    \n",
 119 |         "    b1=.5,\n",
 120 |         "    b2=0.999,\n",
 121 |         "    n_cpu=8,\n",
 122 |         "    img_size=128,\n",
 123 |         "    channels=3,\n",
 124 |         "    latent_dim=8,\n",
 125 |         "    n_critic=5,\n",
 126 |         "    sample_interval=400,\n",
 127 |         "    lambda_pixel=10,\n",
 128 |         "    lambda_latent=.5,\n",
 129 |         "    lambda_kl=.01)\n",
 130 |         "\n",
 131 |         "img_root_folder = '/content/drive/MyDrive/All_Datasets/Maps-UCBerkeley-CycleGAN/maps'"
 132 |       ]
 133 |     },
 134 |     {
 135 |       "cell_type": "code",
 136 |       "execution_count": 53,
 137 |       "metadata": {
 138 |         "id": "XKrlGFNzgjyr"
 139 |       },
 140 |       "outputs": [],
 141 |       "source": [
 142 |         "# Just print to check that the full file paths of the images are printed indeed\n",
 143 |         "# sorted(glob.glob(os.path.join(img_root_folder, 'train') + \"/*.*\"))"
 144 |       ]
 145 |     },
 146 |     {
 147 |       "cell_type": "code",
 148 |       "execution_count": 54,
 149 |       "metadata": {
 150 |         "cellView": "form",
 151 |         "id": "yZaSsdJa_xqH"
 152 |       },
 153 |       "outputs": [],
 154 |       "source": [
 155 |         "import glob\n",
 156 |         "import random\n",
 157 |         "import os\n",
 158 |         "from torch.utils.data import Dataset\n",
 159 |         "from PIL import Image\n",
 160 |         "\n",
 161 |         "class ImageDataset(Dataset):\n",
 162 |         "    def __init__(self, root, transforms_=None, mode=\"train\"):\n",
 163 |         "        self.transform = transforms.Compose(transforms_)\n",
 164 |         "\n",
 165 |         "        # Using the glob and sorted functions to load all the images and sort them.\n",
 166 |         "        self.files = sorted(glob.glob(os.path.join(root, mode) + \"/*.*\"))\n",
 167 |         "        if mode == \"train\":\n",
 168 |         "            self.files.extend(sorted(glob.glob(os.path.join(root, \"test\") + \"/*.*\")))\n",
 169 |         "            # `extends()` method adds the specified list elements to the end of the current list.\n",
 170 |         "\n",
 171 |         "    def __getitem__(self, index):\n",
 172 |         "\n",
 173 |         "        img = Image.open(self.files[index % len(self.files)])\n",
 174 |         "        ''' In above line, my target is to find index item in a list based on the length of a variable in the list\n",
 175 |         "        So, in case such indexes do not exist, None is returned. \n",
 176 |         "        a % b => a is divided by b, and the remainder of that division is returned.\n",
 177 |         "        5 % 100 => 5 '''\n",
 178 |         "        w, h = img.size # Pillow.Image.open() returns width, height\n",
 179 |         "        img_A = img.crop((0, 0, w / 2, h)) # (left, upper, right, lower)\n",
 180 |         "        img_B = img.crop((w / 2, 0, w, h)) # (left, upper, right, lower)        \n",
 181 |         "\n",
 182 |         "        ''' Below code block implements Horizontal Flipping or Mirroring Image based on randomly generated probability '''\n",
 183 |         "        if np.random.random() < 0.5:\n",
 184 |         "            # Converting from numpy arrays to a RGB image\n",
 185 |         "            # Call PIL.Image.fromarray(obj, mode) with obj as a 3-D array and mode as \"RGB\" to convert obj into an image.\n",
 186 |         "            # Image.fromarray creates an image memory from an object exporting the array interface\n",
 187 |         "            img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], \"RGB\") # Mirror in x direction (flip horizontally)\n",
 188 |         "            img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], \"RGB\") # Mirror in x direction (flip horizontally)\n",
 189 |         "            ''' a[::-1]    # all items in the array, reversed. \n",
 190 |         "            Pillow image returns tuple of (width, height)    \n",
 191 |         "            The data has 3 dimensions: height, width and color. Numpy shape of the image is a tuple of (row (height), column (width), color(3) )\n",
 192 |         "            So ::-1 effectively reverses the order of the width. The height and color are not affected.\n",
 193 |         "            '''\n",
 194 |         "\n",
 195 |         "        img_A = self.transform(img_A)\n",
 196 |         "        img_B = self.transform(img_B)\n",
 197 |         "\n",
 198 |         "        return {\"A\": img_A, \"B\": img_B}\n",
 199 |         "\n",
 200 |         "    def __len__(self):\n",
 201 |         "        return len(self.files)"
 202 |       ]
 203 |     },
 204 |     {
 205 |       "cell_type": "markdown",
 206 |       "metadata": {},
 207 |       "source": [
 208 |         "### Explanation of Horizontal Image Flipping with `Image.fromarray(np.array(img_A)[:, ::-1, :], \"RGB\")`\n",
 209 |         "\n",
 210 |         "Basically, its implementing, the official code's mechanism\n",
 211 |         "\n",
 212 |         "https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/data/base_dataset.py#L144\n",
 213 |         "\n",
 214 |         "```py\n",
 215 |         "def __flip(img, flip):\n",
 216 |         "    if flip:\n",
 217 |         "        return img.transpose(Image.FLIP_LEFT_RIGHT)\n",
 218 |         "    return \n",
 219 |         "\n",
 220 |         "```"
 221 |       ]
 222 |     },
 223 |     {
 224 |       "cell_type": "markdown",
 225 |       "metadata": {},
 226 |       "source": [
 227 |         "#### Note - numpy arrays and PIL images have different shape, in Numpy its (H,W) and in PIL and (W,H)\n",
 228 |         "\n",
 229 |         "===========================================================================\n",
 230 |         "\n",
 231 |         "#### Why `img = Image.open(self.files[index % len(self.files)])`\n",
 232 |         "\n",
 233 |         "Because, here my target is to find index item in a list based on the length of a variable in the list\n",
 234 |         "\n",
 235 |         "So, in case such indexes do not exist, None is returned. "
 236 |       ]
 237 |     },
 238 |     {
 239 |       "cell_type": "code",
 240 |       "execution_count": 56,
 241 |       "metadata": {
 242 |         "cellView": "form",
 243 |         "id": "VWcpYFFnWAbv"
 244 |       },
 245 |       "outputs": [],
 246 |       "source": [
 247 |         "#  IMAGE VISUALIZER HELPERS\n",
 248 |         "def imshow(img,size=10):\n",
 249 |         "  img = img / 2 + 0.5  # de-normalizing\n",
 250 |         "  npimg = img.numpy()\n",
 251 |         "  plt.figure(figsize=(size, size))\n",
 252 |         "  plt.imshow(np.transpose(npimg, (1, 2, 0)))\n",
 253 |         "  plt.show()\n",
 254 |         "\n",
 255 |         "\n",
 256 |         "import matplotlib.image as mpimg\n",
 257 |         "\n",
 258 |         "def visualise_output(path, x, y):\n",
 259 |         "    img = mpimg.imread(path)\n",
 260 |         "    plt.figure(figsize=(x,y))\n",
 261 |         "    plt.imshow(img)  \n",
 262 |         "    plt.show()"
 263 |       ]
 264 |     },
 265 |     {
 266 |       "cell_type": "markdown",
 267 |       "metadata": {},
 268 |       "source": [
 269 |         "## Explanation of line `img = img / 2 + 0.5`  # de-normalizing\n",
 270 |         "\n",
 271 |         "Since the normalization process is actually z = (x - mean) / sigma \n",
 272 |         "\n",
 273 |         "Where both mean and sigma is 0.5\n",
 274 |         "\n",
 275 |         "The inverse normalization should be x = z*sigma + mean\n",
 276 |         "\n",
 277 |         "\n",
 278 |         "https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/7"
 279 |       ]
 280 |     },
 281 |     {
 282 |       "cell_type": "code",
 283 |       "execution_count": 57,
 284 |       "metadata": {
 285 |         "cellView": "form",
 286 |         "colab": {
 287 |           "base_uri": "https://localhost:8080/"
 288 |         },
 289 |         "id": "pR5LjEFQARFm",
 290 |         "outputId": "3bedecbb-57bf-43ec-ad89-22337d77f320"
 291 |       },
 292 |       "outputs": [
 293 |         {
 294 |           "name": "stderr",
 295 |           "output_type": "stream",
 296 |           "text": [
 297 |             "/usr/local/lib/python3.7/dist-packages/torchvision/transforms/transforms.py:288: UserWarning: Argument interpolation should be of type InterpolationMode instead of int. Please, use InterpolationMode enum.\n",
 298 |             "  \"Argument interpolation should be of type InterpolationMode instead of int. \"\n"
 299 |           ]
 300 |         }
 301 |       ],
 302 |       "source": [
 303 |         "#  CONFIGURE DATALOADERS\n",
 304 |         "transforms_ = [\n",
 305 |         "    transforms.Resize((hp.img_size, hp.img_size), Image.BICUBIC),\n",
 306 |         "    transforms.ToTensor(),\n",
 307 |         "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n",
 308 |         "]\n",
 309 |         "\n",
 310 |         "train_dataloader = DataLoader(\n",
 311 |         "    ImageDataset(img_root_folder, mode=hp.dataset_train_mode, transforms_=transforms_),\n",
 312 |         "    batch_size=hp.batch_size,\n",
 313 |         "    shuffle=True,\n",
 314 |         "    num_workers=1,\n",
 315 |         ")\n",
 316 |         "val_dataloader = DataLoader(\n",
 317 |         "    ImageDataset(img_root_folder, mode=hp.dataset_test_mode, transforms_=transforms_),\n",
 318 |         "    batch_size=16,\n",
 319 |         "    shuffle=True,\n",
 320 |         "    num_workers=1,\n",
 321 |         ")"
 322 |       ]
 323 |     },
 324 |     {
 325 |       "cell_type": "code",
 326 |       "execution_count": null,
 327 |       "metadata": {
 328 |         "colab": {
 329 |           "base_uri": "https://localhost:8080/",
 330 |           "height": 1000
 331 |         },
 332 |         "id": "7vp2cbB5okgD",
 333 |         "outputId": "1afdc064-3be4-4174-f895-503a44657601"
 334 |       },
 335 |       "outputs": [],
 336 |       "source": [
 337 |         "# VISUALING SAMPLE DATA { run: \"auto\" }\n",
 338 |         "pic_size = 16\n",
 339 |         "\n",
 340 |         "dataiter = iter(train_dataloader)\n",
 341 |         "images = dataiter.next()\n",
 342 |         "\n",
 343 |         "for i in range(len(images[\"A\"])):\n",
 344 |         "  imshow(make_grid([images[\"A\"][i],images[\"B\"][i]]), size=pic_size)"
 345 |       ]
 346 |     },
 347 |     {
 348 |       "cell_type": "markdown",
 349 |       "metadata": {},
 350 |       "source": [
 351 |         "## Unet\n",
 352 |         "\n",
 353 |         "Unet is an end-to-end fully convolutional network (FCN), i.e. it only contains Convolutional layers and does not contain any Dense layer because of which it can accept image of any size.\n",
 354 |         "\n",
 355 |         "The left hand side is the contraction path (Encoder) where we apply regular convolutions and max pooling layers.\n",
 356 |         "\n",
 357 |         "In the Encoder, the size of the image gradually reduces while the depth gradually increases. (e.g. Starting from 128x128x3 to 8x8x256 )\n",
 358 |         "\n",
 359 |         "\n",
 360 |         "The right hand side is the expansion path (Decoder) where we apply transposed convolutions along with regular convolutions\n",
 361 |         "\n",
 362 |         "In the decoder, the size of the image gradually increases and the depth gradually decreases."
 363 |       ]
 364 |     },
 365 |     {
 366 |       "cell_type": "code",
 367 |       "execution_count": 60,
 368 |       "metadata": {
 369 |         "id": "EEb5TdBmIy7l"
 370 |       },
 371 |       "outputs": [],
 372 |       "source": [
 373 |         "from torchvision.models import resnet18\n",
 374 |         "\n",
 375 |         "########################################################\n",
 376 |         "# Initialize convolution layer weights to N(0,0.02)\n",
 377 |         "########################################################\n",
 378 |         "def weights_init_normal(m):\n",
 379 |         "    classname = m.__class__.__name__\n",
 380 |         "    if classname.find(\"Conv\") != -1:\n",
 381 |         "        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)\n",
 382 |         "    elif classname.find(\"BatchNorm2d\") != -1:\n",
 383 |         "        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)\n",
 384 |         "        torch.nn.init.constant_(m.bias.data, 0.0)\n",
 385 |         "\n",
 386 |         "\n",
 387 |         "##############################\n",
 388 |         "#           U-NET\n",
 389 |         "##############################\n",
 390 |         "'''  As per the Paper - \"For generator G, we use the U-Net, which contains an encoder-decoder\n",
 391 |         "architecture, with symmetric skip connections.\" \n",
 392 |         "In the Encoder, the size of the image gradually reduces while the depth gradually increases. \n",
 393 |         "'''\n",
 394 |         "\n",
 395 |         "class UNetDown(nn.Module):\n",
 396 |         "    def __init__(self, in_size, out_size, normalize=True, dropout=0.0):\n",
 397 |         "        super(UNetDown, self).__init__()\n",
 398 |         "        layers = [nn.Conv2d(in_size, out_size, 3, stride=2, padding=1, bias=False)]\n",
 399 |         "        if normalize:\n",
 400 |         "            layers.append(nn.BatchNorm2d(out_size, 0.8))\n",
 401 |         "        layers.append(nn.LeakyReLU(0.2))\n",
 402 |         "        self.model = nn.Sequential(*layers)\n",
 403 |         "\n",
 404 |         "    def forward(self, x):\n",
 405 |         "        return self.model(x)\n",
 406 |         "\n",
 407 |         "''' In the decoder, the size of the image gradually increases and the depth gradually decreases. '''\n",
 408 |         "class UNetUp(nn.Module):\n",
 409 |         "    def __init__(self, in_size, out_size):\n",
 410 |         "        super(UNetUp, self).__init__()\n",
 411 |         "        self.model = nn.Sequential(\n",
 412 |         "            nn.Upsample(scale_factor=2), # Upsampling by a scaling factor of 2\n",
 413 |         "            nn.Conv2d(in_size, out_size, 3, stride=1, padding=1, bias=False),\n",
 414 |         "            nn.BatchNorm2d(out_size, 0.8),\n",
 415 |         "            nn.ReLU(inplace=True),\n",
 416 |         "        )\n",
 417 |         "\n",
 418 |         "    def forward(self, x, skip_input):\n",
 419 |         "        x = self.model(x)\n",
 420 |         "        x = torch.cat((x, skip_input), 1)\n",
 421 |         "        return x\n",
 422 |         "\n",
 423 |         "\n",
 424 |         "class Generator(nn.Module):\n",
 425 |         "    def __init__(self, latent_dim, img_shape):\n",
 426 |         "        super(Generator, self).__init__()\n",
 427 |         "        channels, self.h, self.w = img_shape\n",
 428 |         "\n",
 429 |         "        self.fc = nn.Linear(latent_dim, self.h * self.w)\n",
 430 |         "\n",
 431 |         "        self.down1 = UNetDown(channels + 1, 64, normalize=False)\n",
 432 |         "        self.down2 = UNetDown(64, 128)\n",
 433 |         "        self.down3 = UNetDown(128, 256)\n",
 434 |         "        self.down4 = UNetDown(256, 512)\n",
 435 |         "        self.down5 = UNetDown(512, 512)\n",
 436 |         "        self.down6 = UNetDown(512, 512)\n",
 437 |         "        self.down7 = UNetDown(512, 512, normalize=False)\n",
 438 |         "        self.up1 = UNetUp(512, 512)\n",
 439 |         "        self.up2 = UNetUp(1024, 512)\n",
 440 |         "        self.up3 = UNetUp(1024, 512)\n",
 441 |         "        self.up4 = UNetUp(1024, 256)\n",
 442 |         "        self.up5 = UNetUp(512, 128)\n",
 443 |         "        self.up6 = UNetUp(256, 64)\n",
 444 |         "\n",
 445 |         "        self.final = nn.Sequential(\n",
 446 |         "            nn.Upsample(scale_factor=2), \n",
 447 |         "            nn.Conv2d(128, channels, 3, stride=1, padding=1), nn.Tanh()\n",
 448 |         "        )\n",
 449 |         "\n",
 450 |         "    def forward(self, x, z):\n",
 451 |         "        # Propagate noise through fc layer and reshape to img shape\n",
 452 |         "        z = self.fc(z).view(z.size(0), 1, self.h, self.w)\n",
 453 |         "        d1 = self.down1(torch.cat((x, z), 1))\n",
 454 |         "        d2 = self.down2(d1)\n",
 455 |         "        d3 = self.down3(d2)\n",
 456 |         "        d4 = self.down4(d3)\n",
 457 |         "        d5 = self.down5(d4)\n",
 458 |         "        d6 = self.down6(d5)\n",
 459 |         "        d7 = self.down7(d6)\n",
 460 |         "        u1 = self.up1(d7, d6)\n",
 461 |         "        u2 = self.up2(u1, d5)\n",
 462 |         "        u3 = self.up3(u2, d4)\n",
 463 |         "        u4 = self.up4(u3, d3)\n",
 464 |         "        u5 = self.up5(u4, d2)\n",
 465 |         "        u6 = self.up6(u5, d1)\n",
 466 |         "\n",
 467 |         "        return self.final(u6)\n",
 468 |         "\n",
 469 |         "\n",
 470 |         "##############################\n",
 471 |         "#        Encoder\n",
 472 |         "##############################\n",
 473 |         "\n",
 474 |         "class Encoder(nn.Module):\n",
 475 |         "    # 1. Use this encoder and get mu and log_var\n",
 476 |         "    # 2. std = exp(log_var / 2)\n",
 477 |         "    # 3. random_z = N(0, 1)\n",
 478 |         "    # 4. encoded_z = random_z * std + mu (Reparameterization trick)\n",
 479 |         "    def __init__(self, latent_dim, input_shape):\n",
 480 |         "        super(Encoder, self).__init__()\n",
 481 |         "        resnet18_model = resnet18(pretrained=False)\n",
 482 |         "        self.feature_extractor = nn.Sequential(*list(resnet18_model.children())[:-3])\n",
 483 |         "        # [:-3] => Everything except the last 3 items\n",
 484 |         "        self.pooling = nn.AvgPool2d(kernel_size=8, stride=8, padding=0)\n",
 485 |         "        # Output is mu and log(var) for reparameterization trick used in VAEs\n",
 486 |         "        # mu and logvar assigned the same value (the encoder’s last layer output)\n",
 487 |         "        self.fc_mu = nn.Linear(256, latent_dim)\n",
 488 |         "        self.fc_logvar = nn.Linear(256, latent_dim)\n",
 489 |         "\n",
 490 |         "    def forward(self, img):\n",
 491 |         "        out = self.feature_extractor(img)\n",
 492 |         "        out = self.pooling(out)\n",
 493 |         "        out = out.view(out.size(0), -1)\n",
 494 |         "        mu = self.fc_mu(out)\n",
 495 |         "        logvar = self.fc_logvar(out)\n",
 496 |         "        return mu, logvar\n",
 497 |         "\n",
 498 |         "\n",
 499 |         "##############################\n",
 500 |         "#        Discriminator\n",
 501 |         "##############################\n",
 502 |         "\n",
 503 |         "class MultiDiscriminator(nn.Module):\n",
 504 |         "    def __init__(self, input_shape):\n",
 505 |         "        super(MultiDiscriminator, self).__init__()\n",
 506 |         "\n",
 507 |         "        def discriminator_block(in_filters, out_filters, normalize=True):\n",
 508 |         "            \"\"\"Returns downsampling layers of each discriminator block\"\"\"\n",
 509 |         "            layers = [nn.Conv2d(in_filters, out_filters, 4, stride=2, padding=1)]\n",
 510 |         "            if normalize:\n",
 511 |         "                layers.append(nn.BatchNorm2d(out_filters, 0.8))\n",
 512 |         "            layers.append(nn.LeakyReLU(0.2))\n",
 513 |         "            return layers\n",
 514 |         "\n",
 515 |         "        channels, _, _ = input_shape\n",
 516 |         "        # Extracts discriminator models\n",
 517 |         "        self.models = nn.ModuleList()\n",
 518 |         "        for i in range(3):\n",
 519 |         "            self.models.add_module(\n",
 520 |         "                \"disc_%d\" % i,\n",
 521 |         "                nn.Sequential(\n",
 522 |         "                    *discriminator_block(channels, 64, normalize=False),\n",
 523 |         "                    *discriminator_block(64, 128),\n",
 524 |         "                    *discriminator_block(128, 256),\n",
 525 |         "                    *discriminator_block(256, 512),\n",
 526 |         "                    nn.Conv2d(512, 1, 3, padding=1)\n",
 527 |         "                ),\n",
 528 |         "            )\n",
 529 |         "\n",
 530 |         "        self.downsample = nn.AvgPool2d(channels, stride=2, padding=[1, 1], count_include_pad=False)\n",
 531 |         "\n",
 532 |         "    def compute_loss(self, x, ground_truth):\n",
 533 |         "        \"\"\"Computes the MSE between model output and scalar ground_truth\"\"\"\n",
 534 |         "        loss = sum([torch.mean((out - ground_truth) ** 2) for out in self.forward(x)])\n",
 535 |         "        return loss\n",
 536 |         "\n",
 537 |         "    def forward(self, x):\n",
 538 |         "        outputs = []\n",
 539 |         "        for m in self.models:\n",
 540 |         "            outputs.append(m(x))\n",
 541 |         "            x = self.downsample(x)\n",
 542 |         "        return outputs"
 543 |       ]
 544 |     },
 545 |     {
 546 |       "cell_type": "code",
 547 |       "execution_count": 61,
 548 |       "metadata": {
 549 |         "colab": {
 550 |           "base_uri": "https://localhost:8080/"
 551 |         },
 552 |         "id": "YxFuX3PCKOVW",
 553 |         "outputId": "c0a5bccd-572c-444d-d805-226d29a71b99"
 554 |       },
 555 |       "outputs": [
 556 |         {
 557 |           "name": "stdout",
 558 |           "output_type": "stream",
 559 |           "text": [
 560 |             "Using CUDA\n"
 561 |           ]
 562 |         },
 563 |         {
 564 |           "data": {
 565 |             "text/plain": [
 566 |               "MultiDiscriminator(\n",
 567 |               "  (models): ModuleList(\n",
 568 |               "    (disc_0): Sequential(\n",
 569 |               "      (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 570 |               "      (1): LeakyReLU(negative_slope=0.2)\n",
 571 |               "      (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 572 |               "      (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 573 |               "      (4): LeakyReLU(negative_slope=0.2)\n",
 574 |               "      (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 575 |               "      (6): BatchNorm2d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 576 |               "      (7): LeakyReLU(negative_slope=0.2)\n",
 577 |               "      (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 578 |               "      (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 579 |               "      (10): LeakyReLU(negative_slope=0.2)\n",
 580 |               "      (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 581 |               "    )\n",
 582 |               "    (disc_1): Sequential(\n",
 583 |               "      (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 584 |               "      (1): LeakyReLU(negative_slope=0.2)\n",
 585 |               "      (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 586 |               "      (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 587 |               "      (4): LeakyReLU(negative_slope=0.2)\n",
 588 |               "      (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 589 |               "      (6): BatchNorm2d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 590 |               "      (7): LeakyReLU(negative_slope=0.2)\n",
 591 |               "      (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 592 |               "      (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 593 |               "      (10): LeakyReLU(negative_slope=0.2)\n",
 594 |               "      (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 595 |               "    )\n",
 596 |               "    (disc_2): Sequential(\n",
 597 |               "      (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 598 |               "      (1): LeakyReLU(negative_slope=0.2)\n",
 599 |               "      (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 600 |               "      (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 601 |               "      (4): LeakyReLU(negative_slope=0.2)\n",
 602 |               "      (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 603 |               "      (6): BatchNorm2d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 604 |               "      (7): LeakyReLU(negative_slope=0.2)\n",
 605 |               "      (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n",
 606 |               "      (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n",
 607 |               "      (10): LeakyReLU(negative_slope=0.2)\n",
 608 |               "      (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 609 |               "    )\n",
 610 |               "  )\n",
 611 |               "  (downsample): AvgPool2d(kernel_size=3, stride=2, padding=[1, 1])\n",
 612 |               ")"
 613 |             ]
 614 |           },
 615 |           "execution_count": 61,
 616 |           "metadata": {},
 617 |           "output_type": "execute_result"
 618 |         }
 619 |       ],
 620 |       "source": [
 621 |         "##############################################\n",
 622 |         "#  SETUP, LOSS, INITIALIZE MODELS and BUFFERS\n",
 623 |         "##############################################\n",
 624 |         "cuda = True if torch.cuda.is_available() else False\n",
 625 |         "print(\"Using CUDA\" if cuda else \"Not using CUDA\")\n",
 626 |         "\n",
 627 |         "# Loss functions\n",
 628 |         "mae_loss = torch.nn.L1Loss()\n",
 629 |         "input_shape = (hp.channels, hp.img_size, hp.img_size)\n",
 630 |         "\n",
 631 |         "# Initialize generator, encoder and discriminators\n",
 632 |         "generator = Generator(hp.latent_dim, input_shape)\n",
 633 |         "encoder = Encoder(hp.latent_dim, input_shape)\n",
 634 |         "\n",
 635 |         "D_VAE = MultiDiscriminator(input_shape)\n",
 636 |         "D_LR = MultiDiscriminator(input_shape)\n",
 637 |         "\n",
 638 |         "if cuda:\n",
 639 |         "    generator = generator.cuda()\n",
 640 |         "    encoder.cuda()\n",
 641 |         "    D_VAE = D_VAE.cuda()\n",
 642 |         "    D_LR = D_LR.cuda()\n",
 643 |         "    mae_loss.cuda()\n",
 644 |         "\n",
 645 |         "# Initialize weights\n",
 646 |         "generator.apply(weights_init_normal)\n",
 647 |         "D_VAE.apply(weights_init_normal)\n",
 648 |         "D_LR.apply(weights_init_normal)"
 649 |       ]
 650 |     },
 651 |     {
 652 |       "cell_type": "code",
 653 |       "execution_count": 70,
 654 |       "metadata": {
 655 |         "id": "TpEIFH7BIRwA"
 656 |       },
 657 |       "outputs": [],
 658 |       "source": [
 659 |         "#  SAMPLING IMAGES\n",
 660 |         "def sample_images(batches_done):\n",
 661 |         "    \"\"\"From the validation set this method will create images and \n",
 662 |         "    save those Generated samples in a path  \"\"\"\n",
 663 |         "    generator.eval()\n",
 664 |         "    imgs = next(iter(val_dataloader))\n",
 665 |         "    # next() will supply each subsequent element from the iterable\n",
 666 |         "    # So in this case each subsequent set of images from val_dataloader\n",
 667 |         "    img_samples = None\n",
 668 |         "    # For below line to work, I need to create a folder named 'maps' in the root_path\n",
 669 |         "    path = \"/content/%s/%s.png\" % ('maps', batches_done)\n",
 670 |         "    for img_A, img_B in zip(imgs[\"A\"], imgs[\"B\"]):\n",
 671 |         "        # Repeat input image by number of desired columns\n",
 672 |         "        real_A = img_A.view(1, *img_A.shape).repeat(hp.latent_dim, 1, 1, 1)\n",
 673 |         "        real_A = Variable(real_A.type(Tensor))\n",
 674 |         "        # Sample latent representations\n",
 675 |         "        sampled_z = Variable(Tensor(np.random.normal(0, 1, (hp.latent_dim, hp.latent_dim))))\n",
 676 |         "        # Generate samples\n",
 677 |         "        fake_B = generator(real_A, sampled_z)\n",
 678 |         "        # Concatenate samples horizontally\n",
 679 |         "        fake_B = torch.cat([x for x in fake_B.data.cpu()], -1)\n",
 680 |         "        img_sample = torch.cat((img_A, fake_B), -1)\n",
 681 |         "        img_sample = img_sample.view(1, *img_sample.shape)\n",
 682 |         "        # Concatenate with previous samples vertically\n",
 683 |         "        img_samples = img_sample if img_samples is None else torch.cat((img_samples, img_sample), -2)\n",
 684 |         "    save_image(img_samples, path, nrow=8, normalize=True)\n",
 685 |         "    generator.train() \n",
 686 |         "    return path\n",
 687 |         "    "
 688 |       ]
 689 |     },
 690 |     {
 691 |       "cell_type": "code",
 692 |       "execution_count": 71,
 693 |       "metadata": {
 694 |         "id": "fgRoX2DbHSwk"
 695 |       },
 696 |       "outputs": [],
 697 |       "source": [
 698 |         "#  OPTIMIZERS\n",
 699 |         "optimizer_E = torch.optim.Adam(encoder.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n",
 700 |         "optimizer_G = torch.optim.Adam(generator.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n",
 701 |         "\n",
 702 |         "optimizer_D_VAE = torch.optim.Adam(D_VAE.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n",
 703 |         "optimizer_D_LR = torch.optim.Adam(D_LR.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n",
 704 |         "\n",
 705 |         "Tensor = torch.cuda.FloatTensor if cuda else torch.Tensor"
 706 |       ]
 707 |     },
 708 |     {
 709 |       "cell_type": "markdown",
 710 |       "metadata": {},
 711 |       "source": [
 712 |         "## The reparameterization trick\n",
 713 |         "\n",
 714 |         "#### The reparameterization trick is to learn two vectors σ and μ, \n",
 715 |         "\n",
 716 |         "First, sample ϵ from N(0,1) and then your latent vector Z would be (where ⊙ symbol or notation is the element-wise product.):\n",
 717 |         "\n",
 718 |         "## Z = μ +  ϵ ⊙ σ\n",
 719 |         "\n",
 720 |         "\n",
 721 |         "So, if an input data point is to be mapped into a latent variable `z` via sampling (after getting passed through a neural network), it has to follow the following equation:\n",
 722 |         "\n",
 723 |         "## z = z_mean + epsilon * std \n",
 724 |         "\n",
 725 |         "where \n",
 726 |         "\n",
 727 |         "### std = torch.exp(z_log_var / 2)\n",
 728 |         "\n",
 729 |         "----\n"
 730 |       ]
 731 |     },
 732 |     {
 733 |       "cell_type": "markdown",
 734 |       "metadata": {},
 735 |       "source": [
 736 |         "## Reconciliation between 2 mathematical expression for z\n",
 737 |         "\n",
 738 |         "## 1st  -> z = z_mean + epsilon * std\n",
 739 |         "\n",
 740 |         "## 2nd (where the std is expressed as below )\n",
 741 |         "\n",
 742 |         "![Imgur](https://imgur.com/1G3iT3m.png)\n",
 743 |         "\n",
 744 |         "\n",
 745 |         "The σ in the first equation is the standard deviation which as you know is the square root of the variance. Then you can see that the multiplication of 0.5 outside the log equates to raising the variance inside of the log to the power of 0.5:\n",
 746 |         "\n",
 747 |         "\n",
 748 |         "![Imgur](https://imgur.com/CQxP66m.png)\n",
 749 |         "\n",
 750 |         "\n",
 751 |         "So they are the same."
 752 |       ]
 753 |     },
 754 |     {
 755 |       "cell_type": "code",
 756 |       "execution_count": 72,
 757 |       "metadata": {
 758 |         "id": "45hmQrQZLoe4"
 759 |       },
 760 |       "outputs": [],
 761 |       "source": [
 762 |         "'''  REPARAMETERIZE \n",
 763 |         "\n",
 764 |         "The reparameterization trick is to learn two vectors σ and μ, sample ϵ from N(0,1) and \n",
 765 |         "then your latent vector Z would be as below (where ⊙ is the element-wise product.):\n",
 766 |         "\n",
 767 |         "z = μ +  ϵ ⊙  σ\n",
 768 |         "\n",
 769 |         "z = z_mean +  epsilon * sigma\n",
 770 |         "\n",
 771 |         "'''\n",
 772 |         "\n",
 773 |         "def reparameterization(z_mean, z_log_var):\n",
 774 |         "    std = torch.exp(z_log_var / 2)\n",
 775 |         "    sampled_z = Variable(Tensor(np.random.normal(0, 1, (z_mean.size(0), hp.latent_dim))))\n",
 776 |         "    z = z_mean + sampled_z * std\n",
 777 |         "    return z"
 778 |       ]
 779 |     },
 780 |     {
 781 |       "cell_type": "markdown",
 782 |       "metadata": {},
 783 |       "source": [
 784 |         "So in above implementation the `sampled_z` is replacing 'epsilon'\n",
 785 |         "\n"
 786 |       ]
 787 |     },
 788 |     {
 789 |       "cell_type": "markdown",
 790 |       "metadata": {},
 791 |       "source": [
 792 |         "### `np.random.normal()`\n",
 793 |         "\n",
 794 |         "The `random.normal` method has the following syntax:\n",
 795 |         "\n",
 796 |         "`numpy.random.normal(m,s,n)`\n",
 797 |         "\n",
 798 |         "The random.normal function takes in three parameters:\n",
 799 |         "\n",
 800 |         "* m: the mean of the normal distribution.\n",
 801 |         "* s: the standard deviation of the distribution.\n",
 802 |         "* n: the total number of samples to be drawn."
 803 |       ]
 804 |     },
 805 |     {
 806 |       "cell_type": "code",
 807 |       "execution_count": null,
 808 |       "metadata": {
 809 |         "colab": {
 810 |           "base_uri": "https://localhost:8080/",
 811 |           "height": 975
 812 |         },
 813 |         "id": "GiWH4SwoI7jA",
 814 |         "outputId": "a9c5a587-ce74-407b-b1ab-af378ba0d570"
 815 |       },
 816 |       "outputs": [],
 817 |       "source": [
 818 |         "#  TRAINING\n",
 819 |         "# Adversarial loss\n",
 820 |         "valid = 1\n",
 821 |         "fake = 0\n",
 822 |         "\n",
 823 |         "prev_time = time.time()\n",
 824 |         "for epoch in range(hp.epoch, hp.n_epochs):\n",
 825 |         "    for i, batch in enumerate(train_dataloader):\n",
 826 |         "\n",
 827 |         "        # Set model input\n",
 828 |         "        real_A = Variable(batch[\"A\"].type(Tensor))\n",
 829 |         "        real_B = Variable(batch[\"B\"].type(Tensor))\n",
 830 |         "\n",
 831 |         "        ################################\n",
 832 |         "        #  Train Generator and Encoder\n",
 833 |         "        #################################\n",
 834 |         "        '''  The first component of Bicycle-GAN is cVAE-GAN. It first encodes the ground truth image(B) into the latent space using encoder E.\n",
 835 |         "\n",
 836 |         "        ### Then input image(A) and encoded ground truth image(i.e. latent vector - Z) are passed into the Generator G which produces the output image(B^). That is, The generator attempts to map the input image A along with a sampled z back into the original image B.\n",
 837 |         "        the flow of cVAE-GAN => B -> Z -> B^\n",
 838 |         "        '''\n",
 839 |         "\n",
 840 |         "        optimizer_E.zero_grad()\n",
 841 |         "        optimizer_G.zero_grad()\n",
 842 |         "\n",
 843 |         "        #################\n",
 844 |         "        # cVAE-GAN\n",
 845 |         "        #################\n",
 846 |         "\n",
 847 |         "        # Produce output using encoding of B (cVAE-GAN)\n",
 848 |         "        mu, logvar = encoder(real_B)\n",
 849 |         "        # reparameterize so backprogation can be done on the\n",
 850 |         "        # stochastically generated z variable\n",
 851 |         "        encoded_z = reparameterization(mu, logvar)\n",
 852 |         "        fake_B = generator(real_A, encoded_z)\n",
 853 |         "\n",
 854 |         "        # Pixelwise loss of translated image by VAE\n",
 855 |         "        loss_pixel_L1_vae = mae_loss(fake_B, real_B)\n",
 856 |         "        \n",
 857 |         "        # Kullback-Leibler divergence of encoded B\n",
 858 |         "        # Refer - https://stackoverflow.com/questions/61597340/how-is-kl-divergence-in-pytorch-code-related-to-the-formula\n",
 859 |         "        loss_kl = 0.5 * torch.sum(torch.exp(logvar) + mu ** 2 - logvar - 1)\n",
 860 |         "        # Adversarial loss\n",
 861 |         "        # compute_loss() - Computes the MSE between model output and scalar ground_truth\n",
 862 |         "        loss_VAE_GAN = D_VAE.compute_loss(fake_B, valid)\n",
 863 |         "\n",
 864 |         "        ####################################\n",
 865 |         "        # cLR-GAN\n",
 866 |         "        # Conditional Latent Regressor GAN\n",
 867 |         "        ####################################\n",
 868 |         "        \n",
 869 |         "        ''' This is the second component of the Bicycle-GAN. Here a randomly drawn latent vector (sampled_z below) along with the input image(A) is provided to the generator. The generated output(B^) may not look like ground truth image(B), but it should look realistic.\n",
 870 |         "        Then the generated output is passed through the encoder, encoder tries to regain the latent vector from the output image. \n",
 871 |         "        \n",
 872 |         "        the flow of cLR-GAN is Z -> B^ -> Z^\n",
 873 |         "        '''\n",
 874 |         "\n",
 875 |         "        # real_A need to be a 4-D Tensor of Batch_size, Channel, Height, Width\n",
 876 |         "        # ic(real_A.size()) # torch.Size([8, 3, 128, 128])\n",
 877 |         "        # Produce output using sampled z (cLR-GAN)\n",
 878 |         "        # sampled_z need to be a 2-D Tensor of Batch_size (i.e. 8) and Latent_dim (i.e 8)\n",
 879 |         "        sampled_z = Variable(Tensor(np.random.normal(0, 1, (real_A.size(0), hp.latent_dim))))\n",
 880 |         "        # Draw random samples from a normal (Gaussian) distribution.\n",
 881 |         "        # ic(sampled_z.size()) # torch.Size([8, 8])\n",
 882 |         "        ''' ic(sampled_z) will output below\n",
 883 |         "        tensor([[ 0.2202,  0.3848, -1.0489, -0.5884, -0.0094,  0.1678, -1.5106, -0.2802],\n",
 884 |         "                       [ 1.9893, -0.8738, -2.4284,  1.0219,  0.4162,  0.3345, -1.7501,  0.0511],\n",
 885 |         "                       [-0.5878,  0.0200, -0.9107,  0.8697, -1.8777,  0.3819,  0.4788, -3.0111],\n",
 886 |         "                       [ 0.9784,  0.7068, -0.2902,  0.9084,  0.2643, -0.0090,  0.6117, -1.4820],\n",
 887 |         "                       [-0.1651, -0.5930,  0.1434,  1.6912, -0.6974, -1.7704, -3.2449, -0.4491],\n",
 888 |         "                       [-1.1474, -1.6918,  1.6326, -0.7691,  0.4847,  2.0244, -0.3476, -1.1350],\n",
 889 |         "                       [ 0.3965,  0.3789,  0.3177, -1.5840,  0.6150, -1.4996,  0.2707, -0.7499],\n",
 890 |         "                       [-0.1608, -0.8812, -0.0048, -0.6618, -0.5037, -0.8252,  0.7017,  0.4248]],\n",
 891 |         "                      device='cuda:0') '''\n",
 892 |         "        \n",
 893 |         "        _fake_B = generator(real_A, sampled_z)\n",
 894 |         "        # cLR Loss: Adversarial loss\n",
 895 |         "        loss_CLR_GAN = D_LR.compute_loss(_fake_B, valid)\n",
 896 |         "\n",
 897 |         "        #########################################\n",
 898 |         "        # Total Loss (Generator + Encoder)\n",
 899 |         "        #########################################\n",
 900 |         "\n",
 901 |         "        loss_total_gen_encoder = loss_VAE_GAN + loss_CLR_GAN + hp.lambda_pixel * loss_pixel_L1_vae + hp.lambda_kl * loss_kl\n",
 902 |         "\n",
 903 |         "        loss_total_gen_encoder.backward(retain_graph=True)\n",
 904 |         "        optimizer_E.step()\n",
 905 |         "\n",
 906 |         "        ######################\n",
 907 |         "        # Generator Only Loss\n",
 908 |         "        ######################\n",
 909 |         "        '''Under CLR-GAN - Then the generated output (_fake_B) is passed through the encoder, encoder tries to regain the latent vector from the output image. '''\n",
 910 |         "\n",
 911 |         "        # Latent L1 loss\n",
 912 |         "        _mu, _ = encoder(_fake_B)\n",
 913 |         "        loss_latent = hp.lambda_latent * mae_loss(_mu, sampled_z)\n",
 914 |         "\n",
 915 |         "        loss_latent.backward()\n",
 916 |         "        optimizer_G.step()\n",
 917 |         "\n",
 918 |         "        #######################################\n",
 919 |         "        #  Train Discriminator (cVAE-GAN)\n",
 920 |         "        #######################################\n",
 921 |         "\n",
 922 |         "        optimizer_D_VAE.zero_grad()\n",
 923 |         "\n",
 924 |         "        loss_D_VAE = D_VAE.compute_loss(real_B, valid) + D_VAE.compute_loss(fake_B.detach(), fake)\n",
 925 |         "\n",
 926 |         "        loss_D_VAE.backward()\n",
 927 |         "        optimizer_D_VAE.step()\n",
 928 |         "\n",
 929 |         "        ####################################\n",
 930 |         "        # Train Discriminator (cLR-GAN)\n",
 931 |         "        # Conditional Latent Regressor GAN\n",
 932 |         "        ####################################\n",
 933 |         "\n",
 934 |         "        optimizer_D_LR.zero_grad()\n",
 935 |         "\n",
 936 |         "        loss_D_LR = D_LR.compute_loss(real_B, valid) + D_LR.compute_loss(_fake_B.detach(), fake)\n",
 937 |         "\n",
 938 |         "        loss_D_LR.backward()\n",
 939 |         "        optimizer_D_LR.step()\n",
 940 |         "\n",
 941 |         "        #################\n",
 942 |         "        #  Log Progress\n",
 943 |         "        #################\n",
 944 |         "\n",
 945 |         "        # Determine approximate time left\n",
 946 |         "        batches_done = epoch * len(train_dataloader) + i\n",
 947 |         "        batches_left = hp.n_epochs * len(train_dataloader) - batches_done\n",
 948 |         "        time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time))\n",
 949 |         "        prev_time = time.time()\n",
 950 |         "\n",
 951 |         "        # Print log\n",
 952 |         "        sys.stdout.write(\n",
 953 |         "            \"\\r[Epoch %d/%d] [Batch %d/%d] [D VAE_loss: %f, LR_loss: %f] [G loss: %f, pixel: %f, kl: %f, latent: %f] ETA: %s\"\n",
 954 |         "            % (\n",
 955 |         "                epoch,\n",
 956 |         "                hp.n_epochs,\n",
 957 |         "                i,\n",
 958 |         "                len(train_dataloader),\n",
 959 |         "                loss_D_VAE.item(),\n",
 960 |         "                loss_D_LR.item(),\n",
 961 |         "                loss_total_gen_encoder.item(),\n",
 962 |         "                loss_pixel_L1_vae.item(),\n",
 963 |         "                loss_kl.item(),\n",
 964 |         "                loss_latent.item(),\n",
 965 |         "                time_left,\n",
 966 |         "            )\n",
 967 |         "        )\n",
 968 |         "        \n",
 969 |         "        # If at sample interval save image\n",
 970 |         "        if batches_done % hp.sample_interval == 0:\n",
 971 |         "          clear_output()\n",
 972 |         "          visualise_output(sample_images(batches_done), 30, 10)   "
 973 |       ]
 974 |     }
 975 |   ],
 976 |   "metadata": {
 977 |     "accelerator": "GPU",
 978 |     "colab": {
 979 |       "collapsed_sections": [],
 980 |       "name": "GEN_5_BiCycleGAN.ipynb",
 981 |       "provenance": []
 982 |     },
 983 |     "kernelspec": {
 984 |       "display_name": "Python 3",
 985 |       "name": "python3"
 986 |     },
 987 |     "language_info": {
 988 |       "codemirror_mode": {
 989 |         "name": "ipython",
 990 |         "version": 3
 991 |       },
 992 |       "file_extension": ".py",
 993 |       "mimetype": "text/x-python",
 994 |       "name": "python",
 995 |       "nbconvert_exporter": "python",
 996 |       "pygments_lexer": "ipython3",
 997 |       "version": "3.9.12"
 998 |     }
 999 |   },
1000 |   "nbformat": 4,
1001 |   "nbformat_minor": 0
1002 | }
1003 | 


--------------------------------------------------------------------------------
/DCGAN-from-Scratch-with-PyTorch/DCGAN_ONLY_train.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "pycharm": {
 7 |      "name": "#%% md\n"
 8 |     }
 9 |    },
10 |    "source": [
11 |     "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13)\n",
12 |     "\n",
13 |     "[![Imgur](https://imgur.com/kL41U2b.png)](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13)\n",
14 |     "\n"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {
21 |     "pycharm": {
22 |      "name": "#%%\n"
23 |     }
24 |    },
25 |    "outputs": [],
26 |    "source": [
27 |     "from google.colab import drive\n",
28 |     "drive.mount(\"/content/drive\")"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "metadata": {
35 |     "pycharm": {
36 |      "name": "#%%\n"
37 |     }
38 |    },
39 |    "outputs": [],
40 |    "source": [
41 |     "# !python3 \"/content/train.py\"\n",
42 |     "!python train.py"
43 |    ]
44 |   }
45 |  ],
46 |  "metadata": {
47 |   "interpreter": {
48 |    "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930"
49 |   },
50 |   "kernelspec": {
51 |    "display_name": "Python 3.9.10 64-bit",
52 |    "language": "python",
53 |    "name": "python3"
54 |   },
55 |   "language_info": {
56 |    "name": "python",
57 |    "version": "3.9.10"
58 |   },
59 |   "orig_nbformat": 4
60 |  },
61 |  "nbformat": 4,
62 |  "nbformat_minor": 2
63 | }


--------------------------------------------------------------------------------
/DCGAN-from-Scratch-with-PyTorch/Readme.md:
--------------------------------------------------------------------------------
 1 | ### To run this notebook,
 2 | 
 3 | - you can just run the Jupyter Notebook, as this notebook imports the train.py file and which in turn imports all the other modules from the other files (i.e. dcgan.py and utils.py)
 4 | 
 5 | For this notebook the directory where the MNIST dataset was downloaded is in Google Drive, so you only have to change the path of the directory where you want your MNIST to get downloaded.
 6 | 
 7 | And also the variable 'output_path' where the generated fake images will be saved.
 8 | 
 9 | # [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13)
10 | 
11 | [![Imgur](https://imgur.com/kL41U2b.png)](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13)


--------------------------------------------------------------------------------
/DCGAN-from-Scratch-with-PyTorch/dcgan.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | import numpy as np
  6 | from torch.autograd import Variable
  7 | from torchvision.utils import save_image
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | from torchvision import datasets, transforms
 11 | import math
 12 | import itertools
 13 | from glob import glob
 14 | 
 15 | """
 16 | torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)
 17 | 
 18 | """
 19 | 
 20 | 
 21 | class Generator(nn.Module):
 22 |     """
 23 |     noise_vector:  is the length of the z input vector.
 24 | 
 25 |     num_gen_filter: size of the feature maps that are propagated through the generator,
 26 | 
 27 |     num_ch: The number of channels in the output image (set to 1 for Grayscale images).
 28 | 
 29 |     Here, the height and width dimension of the image does not change, only the channel dimension decreases.
 30 | 
 31 |     For the Conv and ConvTranspose layers:
 32 |     * in_channels (int) – Number of channels/filters in the input image
 33 |     * out_channels (int) – Number of channels/filters produced by the convolution
 34 | 
 35 |     """
 36 | 
 37 |     def __init__(self, num_ch, noise_vector, num_gen_filter):
 38 |         super(Generator, self).__init__()
 39 |         self.network = nn.Sequential(
 40 |             nn.ConvTranspose2d(
 41 |                 in_channels=noise_vector,
 42 |                 out_channels=num_gen_filter * 4,
 43 |                 kernel_size=4,
 44 |                 stride=1,
 45 |                 padding=0,
 46 |                 bias=False,
 47 |             ),
 48 |             nn.BatchNorm2d(num_gen_filter * 4),
 49 |             nn.ReLU(True),
 50 |             nn.ConvTranspose2d(
 51 |                 in_channels=num_gen_filter * 4,
 52 |                 out_channels=num_gen_filter * 2,
 53 |                 kernel_size=3,
 54 |                 stride=2,
 55 |                 padding=1,
 56 |                 bias=False,
 57 |             ),
 58 |             nn.BatchNorm2d(num_gen_filter * 2),
 59 |             nn.ReLU(True),
 60 |             nn.ConvTranspose2d(
 61 |                 in_channels=num_gen_filter * 2,
 62 |                 out_channels=num_gen_filter,
 63 |                 kernel_size=4,
 64 |                 stride=2,
 65 |                 padding=1,
 66 |                 bias=False,
 67 |             ),
 68 |             nn.BatchNorm2d(num_gen_filter),
 69 |             nn.ReLU(True),
 70 |             nn.ConvTranspose2d(
 71 |                 in_channels=num_gen_filter,
 72 |                 out_channels=num_ch,
 73 |                 kernel_size=4,
 74 |                 stride=2,
 75 |                 padding=1,
 76 |                 bias=False,
 77 |             ),
 78 |             nn.Tanh(),
 79 |         )
 80 | 
 81 |     def forward(self, input):
 82 |         output = self.network(input)
 83 |         return output
 84 | 
 85 | 
 86 | class Discriminator(nn.Module):
 87 |     """
 88 |     torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
 89 |     Here, the height and width dimension of the image does not change, only the channel dimension increases.
 90 |     """
 91 | 
 92 |     def __init__(self, num_ch, num_disc_filter):
 93 |         super(Discriminator, self).__init__()
 94 |         self.network = nn.Sequential(
 95 |             nn.Conv2d(
 96 |                 in_channels=num_ch,
 97 |                 out_channels=num_disc_filter,
 98 |                 kernel_size=4,
 99 |                 stride=2,
100 |                 padding=1,
101 |                 bias=False,
102 |             ),
103 |             nn.LeakyReLU(0.2, inplace=True),
104 |             nn.Conv2d(
105 |                 in_channels=num_disc_filter,
106 |                 out_channels=num_disc_filter * 2,
107 |                 kernel_size=4,
108 |                 stride=2,
109 |                 padding=1,
110 |                 bias=False,
111 |             ),
112 |             nn.BatchNorm2d(num_disc_filter * 2),
113 |             nn.LeakyReLU(0.2, inplace=True),
114 |             nn.Conv2d(
115 |                 in_channels=num_disc_filter * 2,
116 |                 out_channels=num_disc_filter * 4,
117 |                 kernel_size=3,
118 |                 stride=2,
119 |                 padding=1,
120 |                 bias=False,
121 |             ),
122 |             nn.BatchNorm2d(num_disc_filter * 4),
123 |             nn.LeakyReLU(0.2, inplace=True),
124 |             nn.Conv2d(
125 |                 in_channels=num_disc_filter * 4,
126 |                 out_channels=1,
127 |                 kernel_size=4,
128 |                 stride=1,
129 |                 padding=0,
130 |                 bias=False,
131 |             ),
132 |             nn.Sigmoid(),
133 |         )
134 | 
135 |     # The Discriminator outputs a scalar probability to classify the input image as real or fake.
136 |     def forward(self, input):
137 |         output = self.network(input)
138 |         return output.view(-1, 1).squeeze(1)
139 | 


--------------------------------------------------------------------------------
/DCGAN-from-Scratch-with-PyTorch/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.optim as optim
  5 | import numpy as np
  6 | from torch.autograd import Variable
  7 | from torchvision.utils import save_image
  8 | from torchsummary import summary
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | from torchvision import datasets, transforms
 12 | 
 13 | # The below 2 lines should be in a separate Jupyter NB file
 14 | # for train.py to run in Google Colab
 15 | # Keeping this line inside train.py will give error while
 16 | # running in Colab with just `python train.py`
 17 | # from google.colab import drive
 18 | # drive.mount("/content/drive")
 19 | 
 20 | # Device configuration
 21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 22 | print("Using", device)
 23 | 
 24 | from utils import *
 25 | from dcgan import *
 26 | 
 27 | 
 28 | def train(
 29 |     num_epochs,
 30 |     discriminator_net,
 31 |     generator_net,
 32 |     optimizerD,
 33 |     optimizerG,
 34 |     train_loader,
 35 |     fake_label,
 36 |     real_label,
 37 |     criterion,
 38 |     output_path,
 39 |     num_test_samples,
 40 |     device,
 41 |     noise_vector,
 42 | ):
 43 |     num_batches = len(train_loader)
 44 | 
 45 |     for epoch in range(num_epochs):
 46 |         for i, (real_images, _) in enumerate(train_loader):
 47 |             batch_size_real_imgs = real_images.shape[0]
 48 | 
 49 |             ############################
 50 |             # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
 51 |             ###########################
 52 |             """ The standard process to train a DCGAN network is to first train
 53 |             the discriminator on the batch of samples.
 54 |             """
 55 |             discriminator_net.zero_grad()
 56 | 
 57 |             real_images = real_images.to(device)
 58 | 
 59 |             # First training on real image, hence fill it with 1
 60 |             # Create Labels
 61 |             label = torch.full((batch_size_real_imgs,), real_label, device=device)
 62 | 
 63 |             """ The discriminator is used to classify real images (drawn from the training set)
 64 |             and fake images (produced by the generator).
 65 |             So, next, train the discriminator network on real images and real labels:
 66 |             """
 67 |             output = discriminator_net(real_images)
 68 | 
 69 |             loss_disc_real = criterion(output, label)
 70 | 
 71 |             loss_disc_real.backward()
 72 | 
 73 |             D_x = output.mean().item()
 74 | 
 75 |             # Creating noise variables for the input to whole adversarial network
 76 |             noise = torch.randn(batch_size_real_imgs, noise_vector, 1, 1, device=device)
 77 | 
 78 |             # Generate a batch of fake images using the generator network
 79 |             fake_images = generator_net(noise)
 80 | 
 81 |             # As now training on fake image, fill label with 0's
 82 |             label.fill_(fake_label)
 83 | 
 84 |             # Now train Discriminator on fake images
 85 |             output = discriminator_net(fake_images.detach())
 86 | 
 87 |             loss_disc_fake = criterion(output, label)
 88 |             loss_disc_fake.backward()
 89 | 
 90 |             D_G_z1 = output.mean().item()
 91 | 
 92 |             # Total Discriminator Loss
 93 |             loss_disc_total = loss_disc_real + loss_disc_fake
 94 | 
 95 |             optimizerD.step()
 96 | 
 97 |             ############################
 98 |             # (2) Update Generator network: maximize log(D(G(z)))
 99 |             ###########################
100 | 
101 |             """ When we train the generator network we have to
102 |             freeze the discriminator network, as we have already trained it. """
103 | 
104 |             generator_net.zero_grad()
105 | 
106 |             # Now, set Image Label vector values equal to 1
107 |             # To fool the Discriminator Network
108 |             label.fill_(real_label)
109 | 
110 |             # After filling all labels with 1 (representing real labels), run discriminator network with fake images to fool it
111 |             # To classify real images (drawn from the training set) and fakes images (produced by the generator).
112 |             output = discriminator_net(fake_images)
113 | 
114 |             # And now after I tried to fool discriminator, check how much it was fooled.
115 |             # so to the extent above output does not match with "labels" variable (which were all filed up with 1)
116 |             # That will be the failure of Generator Network i.e. Generator Loss
117 |             loss_generator = criterion(output, label)
118 | 
119 |             loss_generator.backward()
120 | 
121 |             D_G_z2 = output.mean().item()
122 | 
123 |             optimizerG.step()
124 | 
125 |             if (i + 1) % 100 == 0:
126 |                 print(
127 |                     "Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, Discriminator - D(G(x)): {:.2f}, Generator - D(G(x)): {:.2f}".format(
128 |                         epoch + 1,
129 |                         num_epochs,
130 |                         i + 1,
131 |                         num_batches,
132 |                         loss_disc_total.item(),
133 |                         loss_generator.item(),
134 |                         D_x,
135 |                         D_G_z1,
136 |                         D_G_z2,
137 |                     )
138 |                 )
139 |     generator_net.eval()
140 |     plot_images(
141 |         epoch,
142 |         output_path,
143 |         num_test_samples,
144 |         generator_net,
145 |         device,
146 |     )
147 |     generator_net.train()
148 | 
149 | 
150 | ##########################################
151 | # Initialize all the necessary variables
152 | #########################################
153 | 
154 | batch_size = 256
155 | 
156 | output_path = "/content/sample_data/"
157 | 
158 | # Gather MNIST Dataset
159 | train_loader = get_data_loader(batch_size)
160 | 
161 | # Create the Discriminator and Generator network
162 | discriminator_net = Discriminator(num_ch=1, num_disc_filter=32).to(device)
163 | 
164 | generator_net = Generator(num_ch=1, noise_vector=100, num_gen_filter=32).to(device)
165 | 
166 | # loss function
167 | criterion = nn.BCELoss()
168 | 
169 | # optimizers
170 | optimizerD = optim.Adam(discriminator_net.parameters(), lr=0.001)
171 | optimizerG = optim.Adam(generator_net.parameters(), lr=0.001)
172 | 
173 | # initialize variables required for training
174 | real_label = 1.0
175 | fake_label = 0.0
176 | # num_batches = len(train_loader)
177 | 
178 | num_test_samples = 16
179 | 
180 | fixed_noise = torch.randn(num_test_samples, 100, 1, 1, device=device)
181 | 
182 | num_epochs = 70
183 | 
184 | noise_vector = 100
185 | 
186 | ##########################################
187 | # Execute the train Function
188 | #########################################
189 | 
190 | train(
191 |     num_epochs=num_epochs,
192 |     discriminator_net=discriminator_net,
193 |     generator_net=generator_net,
194 |     optimizerD=optimizerD,
195 |     optimizerG=optimizerG,
196 |     train_loader=train_loader,
197 |     fake_label=fake_label,
198 |     real_label=real_label,
199 |     criterion=criterion,
200 |     output_path=output_path,
201 |     num_test_samples=num_test_samples,
202 |     device=device,
203 |     noise_vector=noise_vector,
204 | )
205 | 


--------------------------------------------------------------------------------
/DCGAN-from-Scratch-with-PyTorch/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import torch.optim as optim
 5 | import numpy as np
 6 | from torch.autograd import Variable
 7 | from torchvision.utils import save_image
 8 | 
 9 | import matplotlib.pyplot as plt
10 | from torchvision import datasets, transforms
11 | import math
12 | import itertools
13 | from glob import glob
14 | 
15 | 
16 | def get_data_loader(batch_size):
17 |     # MNIST Dataset
18 |     transform = transforms.Compose(
19 |         [transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))]
20 |     )
21 | 
22 |     train_dataset = datasets.MNIST(
23 |         root="/content/drive/MyDrive/All_Datasets/MNIST",
24 |         train=True,
25 |         transform=transform,
26 |         download=True,
27 |     )
28 | 
29 |     # Data Loader
30 |     train_loader = torch.utils.data.DataLoader(
31 |         dataset=train_dataset, batch_size=batch_size, shuffle=True
32 |     )
33 |     return train_loader
34 | 
35 | 
36 | def plot_images(epoch, path, num_test_samples, generator, device):
37 |     z = torch.randn(num_test_samples, 100, 1, 1, device=device)
38 | 
39 |     plot_grid_size = int(math.sqrt(num_test_samples))
40 | 
41 |     title = None
42 | 
43 |     generated_fake_images = generator(z)
44 | 
45 |     path += "variable_noise/"
46 | 
47 |     title = "Variable Noise"
48 | 
49 |     fig, ax = plt.subplots(plot_grid_size, plot_grid_size, figsize=(6, 6))
50 | 
51 |     for i, j in itertools.product(range(plot_grid_size), range(plot_grid_size)):
52 |         ax[i, j].get_xaxis().set_visible(False)
53 |         ax[i, j].get_yaxis().set_visible(False)
54 | 
55 |     for sample in range(num_test_samples):
56 |         i = sample // 4
57 |         j = sample % 4
58 |         ax[i, j].cla()
59 |         ax[i, j].imshow(
60 |             generated_fake_images[sample].data.cpu().numpy().reshape(28, 28),
61 |             cmap="Greys",
62 |         )
63 | 
64 |     label = "Epoch_{}".format(epoch + 1)
65 |     fig.text(0.5, 0.04, label, ha="center")
66 |     fig.suptitle(title)
67 | 


--------------------------------------------------------------------------------
/LeNet5_PyTorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "## Build the architecture of LeNet5 from Scratch\n",
  8 |         "\n",
  9 |         "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=Uq5sQUoLXpA&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=9)\n",
 10 |         "\n",
 11 |         "[![Imgur](https://imgur.com/yEmSfK0.png)](https://www.youtube.com/watch?v=Uq5sQUoLXpA&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=9)\n",
 12 |         "\n",
 13 |         "There are two main steps after that. \n",
 14 |         "\n",
 15 |         "First is initializing the layers that we are going to use in our CNN inside __init__ , and \n",
 16 |         "\n",
 17 |         "Then the other is to define the sequence in which those layers will process the image. This is defined inside the forward function."
 18 |       ]
 19 |     },
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {},
 23 |       "source": [
 24 |         "![Imgur](https://imgur.com/yrIrojL.png)\n",
 25 |         "\n",
 26 |         "The above diagram shows a description of the LeNet-5 architecture as shown in the original document.\n",
 27 |         "\n",
 28 |         " \n",
 29 |         "**Layer 1**- The first layer is the input layer; It is generally not considered a layer of the network as nothing is learned on that layer. The input layer supports 32x32, and these are the dimensions of the images that will be passed to the next layer.\n",
 30 |         "\n",
 31 |         "The grayscale images used in the research paper had their pixel values normalized from 0 to 255, to values between -0.1 and 1.175. The reason for normalization is to ensure that the batch of images have a mean of 0 and a standard deviation of 1, the benefits of this is seen in the reduction in the amount of training time. In the image classification with LeNet-5 example below, we’ll be normalizing the pixel values of the images to take on values between 0 to 1.\n",
 32 |         "\n",
 33 |         " \n",
 34 |         "**Layer 2**- Layer C1 is a convolution layer with six 5 × 5 convolution kernels, and the feature allocation size is 28 × 28, whereby input image information can be avoided.\n",
 35 |         "\n",
 36 |         " \n",
 37 |         "**Layer 3**- Layer S2 is the undersampling / grouping layer which generates 6 function graphs of length 14x14. Each cell in every function map is attached to 2x2 neighborhoods at the corresponding function map in C1. \n",
 38 |         " \n",
 39 |         "**Layer 4** - C3 convolution layer encompass sixteen 5x5 convolution kernels The input of the primary six function maps C3 is every continuous subset of the 3 function maps in S2, the access of the following six function maps comes from the access of the 4 continuous subsets and the input for the following 3 function maps is crafted from the 4 discontinuous subsets.Finally, the input for the very last function diagram comes from all the S2 function diagrams. \n",
 40 |         " \n",
 41 |         "**Layer 5**- Layer S4 is just like S2 with a length of 2x2 and an output of sixteen 5x5 function graphics. \n",
 42 |         "\n",
 43 |         "**Layer 6**- Layer C5 is a convolution layer with one hundred twenty convolution cores of length 5x5. Each cell is attached to the 5x5 neighborhoods along sixteen S4 function charts. Since the function chart length of S4 is likewise 5x5, the output length of C5 is 1 * 1, so S4 and C5 are absolutely linked.\n",
 44 |         "\n",
 45 |         "It is referred to as a convolutional layer in preference to a completely linked layer due to the fact if the input of LeNet-5 becomes large and its shape stays unchanged, then its output length is bigger than 1x1, i.e. now no longer a completely linked layer.\n",
 46 |         " \n",
 47 |         "\n",
 48 |         "**Layer 7-** The F6 layer is connected to C5 and 84 feature charts are generated. In the grayscale images used in the research, the pixel values ​​from 0 to 255 were normalized to values ​​between -0.1 and 1,175 The reason for normalization is to make sure the image stack has a mean of 0 and a standard deviation of 1.\n",
 49 |         "\n",
 50 |         "The advantages of this are in the reduction of the training time. In the following example we will normalize the pixel values ​​of the images to take values ​​between 0 and 1."
 51 |       ]
 52 |     },
 53 |     {
 54 |       "cell_type": "code",
 55 |       "execution_count": 27,
 56 |       "metadata": {
 57 |         "colab": {
 58 |           "base_uri": "https://localhost:8080/"
 59 |         },
 60 |         "id": "Vx23hoSrw87A",
 61 |         "outputId": "cfd18dde-d1a9-48ca-8107-a93b87d70c9f"
 62 |       },
 63 |       "outputs": [
 64 |         {
 65 |           "data": {
 66 |             "text/plain": [
 67 |               "device(type='cuda')"
 68 |             ]
 69 |           },
 70 |           "execution_count": 27,
 71 |           "metadata": {},
 72 |           "output_type": "execute_result"
 73 |         }
 74 |       ],
 75 |       "source": [
 76 |         "import torch\n",
 77 |         "import torch.nn as nn\n",
 78 |         "import torchvision\n",
 79 |         "import torchvision.transforms as transforms\n",
 80 |         "import matplotlib.pyplot as plt\n",
 81 |         "\n",
 82 |         "import warnings\n",
 83 |         "warnings.filterwarnings('ignore')\n",
 84 |         "\n",
 85 |         "# Device will determine whether to run the training on GPU or CPU.\n",
 86 |         "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
 87 |         "# device = 'cpu'\n",
 88 |         "device"
 89 |       ]
 90 |     },
 91 |     {
 92 |       "cell_type": "code",
 93 |       "execution_count": 17,
 94 |       "metadata": {
 95 |         "colab": {
 96 |           "base_uri": "https://localhost:8080/"
 97 |         },
 98 |         "id": "SBNQhORMxEpf",
 99 |         "outputId": "ab8eca2e-2d95-4535-8de0-c465fedf4577"
100 |       },
101 |       "outputs": [
102 |         {
103 |           "name": "stdout",
104 |           "output_type": "stream",
105 |           "text": [
106 |             "Wed Feb 16 15:15:37 2022       \n",
107 |             "+-----------------------------------------------------------------------------+\n",
108 |             "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
109 |             "|-------------------------------+----------------------+----------------------+\n",
110 |             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
111 |             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
112 |             "|                               |                      |               MIG M. |\n",
113 |             "|===============================+======================+======================|\n",
114 |             "|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |\n",
115 |             "| N/A   48C    P0    35W / 250W |   1065MiB / 16280MiB |      0%      Default |\n",
116 |             "|                               |                      |                  N/A |\n",
117 |             "+-------------------------------+----------------------+----------------------+\n",
118 |             "                                                                               \n",
119 |             "+-----------------------------------------------------------------------------+\n",
120 |             "| Processes:                                                                  |\n",
121 |             "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
122 |             "|        ID   ID                                                   Usage      |\n",
123 |             "|=============================================================================|\n",
124 |             "+-----------------------------------------------------------------------------+\n"
125 |           ]
126 |         }
127 |       ],
128 |       "source": [
129 |         "!nvidia-smi"
130 |       ]
131 |     },
132 |     {
133 |       "cell_type": "code",
134 |       "execution_count": 18,
135 |       "metadata": {
136 |         "id": "Dt8ZQ7JeGcVL"
137 |       },
138 |       "outputs": [],
139 |       "source": [
140 |         "batch_size = 64\n",
141 |         "num_classes = 10"
142 |       ]
143 |     },
144 |     {
145 |       "cell_type": "markdown",
146 |       "metadata": {},
147 |       "source": [
148 |         "![Imgur](https://imgur.com/yrIrojL.png)"
149 |       ]
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "execution_count": null,
154 |       "metadata": {},
155 |       "outputs": [],
156 |       "source": [
157 |         "#Defining the convolutional neural network\n",
158 |         "class LeNet5(nn.Module):\n",
159 |         "    def __init__(self, num_classes):\n",
160 |         "        super(LeNet5, self).__init__()\n",
161 |         "        self.layer1 = nn.Sequential(\n",
162 |         "            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),\n",
163 |         "            # Above is Layer-1 - The input for LeNet-5 is a 32×32 grayscale image which passes through the first convolutional layer with 6 feature maps or filters having size 5×5 and a stride of one. The image dimensions changes from 32x32x1 to 28x28x6.\n",
164 |         "            \n",
165 |         "            nn.BatchNorm2d(6),\n",
166 |         "            nn.ReLU(),\n",
167 |         "            # Layer-2 - Then average pooling layer or sub-sampling layer with a filter size 2×2 and a stride of two. The resulting image dimensions will be reduced to 14x14x6.          \n",
168 |         "            nn.MaxPool2d(kernel_size = 2, stride = 2)\n",
169 |         "        )      \n",
170 |         "        \n",
171 |         "        self.layer2 = nn.Sequential(\n",
172 |         "            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),\n",
173 |         "            # Layer-3 Next, there is a second convolutional layer with 16 feature maps having size 5×5 and a stride of 1. In this layer, only 10 out of 16 feature maps are connected to 6 feature maps of the previous layer\n",
174 |         "            nn.BatchNorm2d(16),\n",
175 |         "            nn.ReLU(),\n",
176 |         "            # Layer-4 The fourth layer (S4) is again an average pooling layer with filter size 2×2 and a stride of 2. This layer is the same as the second layer (S2) except it has 16 feature maps so the output will be reduced to 5x5x16.\n",
177 |         "            nn.MaxPool2d(kernel_size = 2, stride = 2)\n",
178 |         "        )\n",
179 |         "        \n",
180 |         "        self.fc = nn.Linear(400, 120)\n",
181 |         "        ''' Layer-5 The fifth layer (C5) is a fully connected convolutional layer with 120 feature maps each of size 1×1. Each of the 120 units in C5 is connected to all the 400 nodes (5x5x16) in the fourth layer S4. '''\n",
182 |         "        self.relu = nn.ReLU()\n",
183 |         "        '''Layer-6 The sixth layer is a fully connected layer (F6) with 84 units. '''\n",
184 |         "        self.fc1 = nn.Linear(120, 84)\n",
185 |         "        self.relu1 = nn.ReLU()\n",
186 |         "        ''' Layer-7 - Finally, a fully connected layer ŷ with 10 possible values corresponding to the digits from 0 to 9. Since the MNIST data has 10 classes for each of the 10 numerical digits. '''\n",
187 |         "        self.fc2 = nn.Linear(84, num_classes)\n",
188 |         "        \n",
189 |         "    def forward(self, x):\n",
190 |         "        output = self.layer1(x)\n",
191 |         "        output = self.layer2(output)\n",
192 |         "        print('output after layer2', output.size()) # torch.Size([32, 16, 5, 5]\n",
193 |         "        # inside forward method image dimension are : [batch_size, channels, height, width]\n",
194 |         "        output = output.reshape(output.size(0), -1)\n",
195 |         "        # print('output after resize', output.size()) # torch.Size([32, 400])\n",
196 |         "        output = self.fc(output)\n",
197 |         "        output = self.relu(output)\n",
198 |         "        output = self.fc1(output)\n",
199 |         "        output = self.relu1(output)\n",
200 |         "        output = self.fc2(output)\n",
201 |         "        return output"
202 |       ]
203 |     },
204 |     {
205 |       "cell_type": "markdown",
206 |       "metadata": {},
207 |       "source": [
208 |         "\n",
209 |         "## Reason to reshape after `output = self.layer2(output)`\n",
210 |         "\n",
211 |         "In PyTorch, images are represented as [channels, height, width], so a color image would be [3, 256, 256].\n",
212 |         "\n",
213 |         "During the training you will get batches of images, so your shape in the forward method will get an additional batch dimension at dim0: [batch_size, channels, height, width].\n",
214 |         "\n",
215 |         "So before applying the fully connected layer with output-shape of 400, I have to convert the above 4-D Tensor to a 2-D Tensor. \n",
216 |         "And I definitely have to keep the first dimension, which is the batch-size, hence to the `.reshape()` function I am passing the unchanged first dimension of `output.size(0)`\n",
217 |         "\n",
218 |         "But for the second dimension, I am letting PyTorch to decide based on the Matrix calculation. i.e. the second dimension will be inferred given the first dimension.\n",
219 |         "\n",
220 |         "\n",
221 |         "-----------------\n",
222 |         "\n",
223 |         "## where is softmax in above ? \n",
224 |         "\n",
225 |         "If you thought that the last layer in a Neural Network should be some sort of activation function like sigmoid() or softmax(), and we dont see that happening here in the above function.\n",
226 |         "\n",
227 |         "So, where is softmax? And its right here:\n",
228 |         "\n",
229 |         "```py\n",
230 |         "criterion = nn.CrossEntropyLoss()\n",
231 |         "\n",
232 |         "```\n",
233 |         "\n",
234 |         "Inside `nn.CrossEntropyLoss()` function is handled the softmax computation which, of course, works with the raw output of your last layer\n",
235 |         "\n",
236 |         "---\n",
237 |         "\n",
238 |         "## Setting Hyperparameters\n"
239 |       ]
240 |     },
241 |     {
242 |       "cell_type": "markdown",
243 |       "metadata": {
244 |         "id": "XHOaF28qw87C"
245 |       },
246 |       "source": [
247 |         "### Loading the Dataset\n",
248 |         "Using torchvision , we will load the dataset as this will allow us to perform any pre-processing steps easily."
249 |       ]
250 |     },
251 |     {
252 |       "cell_type": "code",
253 |       "execution_count": 19,
254 |       "metadata": {
255 |         "id": "kqUweCDjGe2R"
256 |       },
257 |       "outputs": [],
258 |       "source": [
259 |         "class LeNet5(nn.Module):\n",
260 |         "    def __init__(self,num_classes):\n",
261 |         "        super(LeNet5,self).__init__()\n",
262 |         "        self.layer1 = nn.Sequential(\n",
263 |         "            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5,5), stride=(1,1), padding=(0,0)), #Layer 1\n",
264 |         "            nn.BatchNorm2d(6),\n",
265 |         "            nn.ReLU(),\n",
266 |         "            nn.MaxPool2d(kernel_size=(2, 2), stride=(2)) #Layer-2\n",
267 |         "        )                \n",
268 |         "        \n",
269 |         "        self.layer2 = nn.Sequential(\n",
270 |         "            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5,5), stride=(1,1), padding=(0,0)), #Layer 3\n",
271 |         "            nn.BatchNorm2d(16),\n",
272 |         "            nn.ReLU(),\n",
273 |         "            nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)) #Layer 4\n",
274 |         "        )\n",
275 |         "        self.fc = nn.Linear(400, 120) # Layer 5\n",
276 |         "        self.relu = nn.ReLU()\n",
277 |         "        self.fc1 = nn.Linear(120, 84) # Layer 6\n",
278 |         "        self.relu1 = nn.ReLU()\n",
279 |         "        self.fc2 = nn.Linear(84, num_classes) # Final Layer\n",
280 |         "        \n",
281 |         "    def forward(self, x):\n",
282 |         "        output = self.layer1(x)\n",
283 |         "        output = self.layer2(output)\n",
284 |         "        # print('output after layer2', output.size()) # torch.Size([32, 16, 5, 5]\n",
285 |         "        output = output.reshape(output.size(0), -1) # See note below for this line\n",
286 |         "        # print('output after resize', output.size()) # torch.Size([32, 400])\n",
287 |         "        output = self.fc(output)\n",
288 |         "        output = self.relu(output)\n",
289 |         "        output = self.fc1(output)\n",
290 |         "        output = self.relu1(output)\n",
291 |         "        output = self.fc2(output)\n",
292 |         "        return output\n",
293 |         "        "
294 |       ]
295 |     },
296 |     {
297 |       "cell_type": "code",
298 |       "execution_count": 20,
299 |       "metadata": {
300 |         "id": "hg3ntm7uw87E"
301 |       },
302 |       "outputs": [],
303 |       "source": [
304 |         "# Define relevant variables for the ML task\n",
305 |         "\n",
306 |         "''' Keeping num_classes at 10, as this will be the output shape from the final Layer of the LeNet5 Neural Network model, because, the output layer will have 10 output neurons, since the MNIST data has 10 classes for each of the 10 numerical digits. '''\n",
307 |         "\n",
308 |         "train_dataset = torchvision.datasets.MNIST(root = './data',\n",
309 |         "                                           train = True,\n",
310 |         "                                           transform = transforms.Compose([\n",
311 |         "                                                  transforms.Resize((32,32)),\n",
312 |         "                                                  transforms.ToTensor(),\n",
313 |         "                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),\n",
314 |         "                                           download = True)\n",
315 |         "\n",
316 |         "\n",
317 |         "test_dataset = torchvision.datasets.MNIST(root = './data',\n",
318 |         "                                          train = False,\n",
319 |         "                                          transform = transforms.Compose([\n",
320 |         "                                                  transforms.Resize((32,32)),\n",
321 |         "                                                  transforms.ToTensor(),\n",
322 |         "                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),\n",
323 |         "                                          download=True)\n",
324 |         "\n",
325 |         "\n",
326 |         "train_dataloader = torch.utils.data.DataLoader(dataset = train_dataset,\n",
327 |         "                                           batch_size = batch_size,\n",
328 |         "                                           shuffle = True)\n",
329 |         "\n",
330 |         "\n",
331 |         "test_dataloader = torch.utils.data.DataLoader(dataset = test_dataset,\n",
332 |         "                                           batch_size = batch_size,\n",
333 |         "                                           shuffle = True)"
334 |       ]
335 |     },
336 |     {
337 |       "cell_type": "markdown",
338 |       "metadata": {
339 |         "id": "PJSNQLNAw87G"
340 |       },
341 |       "source": [
342 |         "## Build the architecture of LeNet5 from Scratch"
343 |       ]
344 |     },
345 |     {
346 |       "cell_type": "code",
347 |       "execution_count": 21,
348 |       "metadata": {
349 |         "id": "QuJAdibTw87G"
350 |       },
351 |       "outputs": [],
352 |       "source": [
353 |         "learning_rate = 0.001\n",
354 |         "\n",
355 |         "model = LeNet5(num_classes).to(device)\n",
356 |         "\n",
357 |         "#Setting the loss function\n",
358 |         "criterion = nn.CrossEntropyLoss()\n",
359 |         "\n",
360 |         "#Setting the optimizer with the model parameters and learning rate\n",
361 |         "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
362 |         "\n",
363 |         "#this is defined to print how many steps are remaining when training\n",
364 |         "total_step = len(train_dataloader)"
365 |       ]
366 |     },
367 |     {
368 |       "cell_type": "code",
369 |       "execution_count": 22,
370 |       "metadata": {
371 |         "colab": {
372 |           "base_uri": "https://localhost:8080/"
373 |         },
374 |         "id": "cLjcifcvGj1H",
375 |         "outputId": "22c12c91-675b-4536-c44b-267fa911dcf1"
376 |       },
377 |       "outputs": [
378 |         {
379 |           "name": "stdout",
380 |           "output_type": "stream",
381 |           "text": [
382 |             "Length of train_dataloader  938\n",
383 |             "Length of test_dataloader  157\n"
384 |           ]
385 |         }
386 |       ],
387 |       "source": [
388 |         "print('Length of train_dataloader ', len(train_dataloader))\n",
389 |         "print('Length of test_dataloader ', len(test_dataloader))"
390 |       ]
391 |     },
392 |     {
393 |       "cell_type": "markdown",
394 |       "metadata": {
395 |         "id": "ZbZJsUIvw87H"
396 |       },
397 |       "source": [
398 |         "## Setting Hyperparameters\n"
399 |       ]
400 |     },
401 |     {
402 |       "cell_type": "code",
403 |       "execution_count": 23,
404 |       "metadata": {
405 |         "id": "Q3f-rT9_w87J"
406 |       },
407 |       "outputs": [],
408 |       "source": [
409 |         "def train(model, criterion, optimizer, num_epochs=10):\n",
410 |         "    total_training_loss = []\n",
411 |         "    total_step = len(train_dataloader)\n",
412 |         "    \n",
413 |         "    for epoch in range(num_epochs):\n",
414 |         "        running_loss = 0.0\n",
415 |         "        \n",
416 |         "        for i, (images, labels) in enumerate(train_dataloader):  \n",
417 |         "            images = images.to(device)\n",
418 |         "            labels = labels.to(device)\n",
419 |         "            \n",
420 |         "            #Forward pass\n",
421 |         "            outputs = model(images)\n",
422 |         "            loss = criterion(outputs, labels)\n",
423 |         "                \n",
424 |         "            # Backward and optimize\n",
425 |         "            optimizer.zero_grad() # Clear the past gradient by set the gradients to zero before every update\n",
426 |         "            loss.backward() #  calculate the new gradients\n",
427 |         "            # print('images.size ', images.size(0))\n",
428 |         "            '''  \"\"\" Update the running loss \n",
429 |         "            we need to use, loss.item() instead of loss alone in running_loss calculation and averaging. \n",
430 |         "            Because loss gives you a grad_function, not a float value. \n",
431 |         "            The item() method extracts the loss’s value as a Python float.\n",
432 |         "            \"\"\" '''\n",
433 |         "            running_loss += loss.item() * images.size(0)\n",
434 |         "            optimizer.step() # we update the weights \n",
435 |         "            \n",
436 |         "                \n",
437 |         "            if (i+1) % 400 == 0:\n",
438 |         "                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' \n",
439 |         "                            .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n",
440 |         "        epoch_loss = running_loss / len(train_dataloader)\n",
441 |         "        total_training_loss.append(epoch_loss)\n",
442 |         "        \"\"\" To summarize, in the above code what I did is\n",
443 |         "      \n",
444 |         "        1. => multiply each average batch loss with batch-length. \n",
445 |         "        The batch-length is inputs.size(0) which gives the number total images in each batch. \n",
446 |         "        Essentially I am un-averaging the Batch-Loss\n",
447 |         "\n",
448 |         "        Do this all the batches inside the batch-running loop.\n",
449 |         "        And then after that loop, i.e. outside the batch-loop and coming back to my epoch-loop\n",
450 |         "        \n",
451 |         "        2. => Divide this accumulated un-averaged Batch-loss from all batches, \n",
452 |         "        by the number of samples (len(train_dataloader)) to get the exact train loss average for the epoch \"\"\"\n",
453 |         "    return total_training_loss"
454 |       ]
455 |     },
456 |     {
457 |       "cell_type": "code",
458 |       "execution_count": 24,
459 |       "metadata": {
460 |         "colab": {
461 |           "base_uri": "https://localhost:8080/"
462 |         },
463 |         "id": "KHSSd1-gC9n4",
464 |         "outputId": "49b9194c-9e42-471e-a3e4-f2b449d018bc"
465 |       },
466 |       "outputs": [
467 |         {
468 |           "name": "stdout",
469 |           "output_type": "stream",
470 |           "text": [
471 |             "Epoch [1/30], Step [400/938], Loss: 0.0270\n",
472 |             "Epoch [1/30], Step [800/938], Loss: 0.1064\n",
473 |             "Epoch [2/30], Step [400/938], Loss: 0.0322\n",
474 |             "Epoch [2/30], Step [800/938], Loss: 0.0453\n",
475 |             "Epoch [3/30], Step [400/938], Loss: 0.0066\n",
476 |             "Epoch [3/30], Step [800/938], Loss: 0.0386\n",
477 |             "Epoch [4/30], Step [400/938], Loss: 0.0346\n",
478 |             "Epoch [4/30], Step [800/938], Loss: 0.0306\n",
479 |             "Epoch [5/30], Step [400/938], Loss: 0.0016\n",
480 |             "Epoch [5/30], Step [800/938], Loss: 0.0295\n",
481 |             "Epoch [6/30], Step [400/938], Loss: 0.0009\n",
482 |             "Epoch [6/30], Step [800/938], Loss: 0.0145\n",
483 |             "Epoch [7/30], Step [400/938], Loss: 0.0020\n",
484 |             "Epoch [7/30], Step [800/938], Loss: 0.0363\n",
485 |             "Epoch [8/30], Step [400/938], Loss: 0.0061\n",
486 |             "Epoch [8/30], Step [800/938], Loss: 0.0026\n",
487 |             "Epoch [9/30], Step [400/938], Loss: 0.0335\n",
488 |             "Epoch [9/30], Step [800/938], Loss: 0.0120\n",
489 |             "Epoch [10/30], Step [400/938], Loss: 0.0395\n",
490 |             "Epoch [10/30], Step [800/938], Loss: 0.0007\n",
491 |             "Epoch [11/30], Step [400/938], Loss: 0.0007\n",
492 |             "Epoch [11/30], Step [800/938], Loss: 0.0060\n",
493 |             "Epoch [12/30], Step [400/938], Loss: 0.0012\n",
494 |             "Epoch [12/30], Step [800/938], Loss: 0.0011\n",
495 |             "Epoch [13/30], Step [400/938], Loss: 0.0221\n",
496 |             "Epoch [13/30], Step [800/938], Loss: 0.0421\n",
497 |             "Epoch [14/30], Step [400/938], Loss: 0.0032\n",
498 |             "Epoch [14/30], Step [800/938], Loss: 0.0059\n",
499 |             "Epoch [15/30], Step [400/938], Loss: 0.0036\n",
500 |             "Epoch [15/30], Step [800/938], Loss: 0.0000\n",
501 |             "Epoch [16/30], Step [400/938], Loss: 0.0000\n",
502 |             "Epoch [16/30], Step [800/938], Loss: 0.0059\n",
503 |             "Epoch [17/30], Step [400/938], Loss: 0.0016\n",
504 |             "Epoch [17/30], Step [800/938], Loss: 0.0001\n",
505 |             "Epoch [18/30], Step [400/938], Loss: 0.0006\n",
506 |             "Epoch [18/30], Step [800/938], Loss: 0.0001\n",
507 |             "Epoch [19/30], Step [400/938], Loss: 0.0001\n",
508 |             "Epoch [19/30], Step [800/938], Loss: 0.0032\n",
509 |             "Epoch [20/30], Step [400/938], Loss: 0.0004\n",
510 |             "Epoch [20/30], Step [800/938], Loss: 0.0001\n",
511 |             "Epoch [21/30], Step [400/938], Loss: 0.0021\n",
512 |             "Epoch [21/30], Step [800/938], Loss: 0.0004\n",
513 |             "Epoch [22/30], Step [400/938], Loss: 0.0050\n",
514 |             "Epoch [22/30], Step [800/938], Loss: 0.0082\n",
515 |             "Epoch [23/30], Step [400/938], Loss: 0.0019\n",
516 |             "Epoch [23/30], Step [800/938], Loss: 0.0000\n",
517 |             "Epoch [24/30], Step [400/938], Loss: 0.0072\n",
518 |             "Epoch [24/30], Step [800/938], Loss: 0.0000\n",
519 |             "Epoch [25/30], Step [400/938], Loss: 0.0002\n",
520 |             "Epoch [25/30], Step [800/938], Loss: 0.0003\n",
521 |             "Epoch [26/30], Step [400/938], Loss: 0.0723\n",
522 |             "Epoch [26/30], Step [800/938], Loss: 0.0000\n",
523 |             "Epoch [27/30], Step [400/938], Loss: 0.0012\n",
524 |             "Epoch [27/30], Step [800/938], Loss: 0.0442\n",
525 |             "Epoch [28/30], Step [400/938], Loss: 0.0257\n",
526 |             "Epoch [28/30], Step [800/938], Loss: 0.0037\n",
527 |             "Epoch [29/30], Step [400/938], Loss: 0.0000\n",
528 |             "Epoch [29/30], Step [800/938], Loss: 0.0000\n",
529 |             "Epoch [30/30], Step [400/938], Loss: 0.0001\n",
530 |             "Epoch [30/30], Step [800/938], Loss: 0.0021\n"
531 |           ]
532 |         }
533 |       ],
534 |       "source": [
535 |         "total_training_loss = train(model, criterion, optimizer, num_epochs=30)"
536 |       ]
537 |     },
538 |     {
539 |       "cell_type": "code",
540 |       "execution_count": 25,
541 |       "metadata": {
542 |         "colab": {
543 |           "base_uri": "https://localhost:8080/"
544 |         },
545 |         "id": "KrAEczZ6DAWx",
546 |         "outputId": "9e683e9f-e5e3-4d80-99cb-ad55338f0657"
547 |       },
548 |       "outputs": [
549 |         {
550 |           "data": {
551 |             "text/plain": [
552 |               "[10.568266467943882,\n",
553 |               " 3.527540081909407,\n",
554 |               " 2.6088559687145545,\n",
555 |               " 2.1823721174309565,\n",
556 |               " 1.924547830021688,\n",
557 |               " 1.6057489041481285,\n",
558 |               " 1.3537017107074425,\n",
559 |               " 1.1566425667095708,\n",
560 |               " 1.1153176984784983,\n",
561 |               " 0.983036599652807,\n",
562 |               " 0.8899051300895366,\n",
563 |               " 0.6929740591073604,\n",
564 |               " 0.7318501428482301,\n",
565 |               " 0.7322920497585915,\n",
566 |               " 0.4553440813554328,\n",
567 |               " 0.6452689993842532,\n",
568 |               " 0.5951177723426396,\n",
569 |               " 0.46549444908607823,\n",
570 |               " 0.48881708133601715,\n",
571 |               " 0.4589629755210131,\n",
572 |               " 0.40898956688392407,\n",
573 |               " 0.433147037635138,\n",
574 |               " 0.3640874308061105,\n",
575 |               " 0.3657881085964735,\n",
576 |               " 0.3087785066473603,\n",
577 |               " 0.3707788242852709,\n",
578 |               " 0.24622227986580741,\n",
579 |               " 0.282929215015277,\n",
580 |               " 0.3211896866044501,\n",
581 |               " 0.24679545621306762]"
582 |             ]
583 |           },
584 |           "execution_count": 25,
585 |           "metadata": {},
586 |           "output_type": "execute_result"
587 |         }
588 |       ],
589 |       "source": [
590 |         "total_training_loss"
591 |       ]
592 |     },
593 |     {
594 |       "cell_type": "code",
595 |       "execution_count": 28,
596 |       "metadata": {
597 |         "colab": {
598 |           "base_uri": "https://localhost:8080/",
599 |           "height": 278
600 |         },
601 |         "id": "gVWgnhLgDACb",
602 |         "outputId": "48899b3d-1325-437f-ea99-d7b4e7a2e617"
603 |       },
604 |       "outputs": [
605 |         {
606 |           "data": {
607 |             "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de3RU9bn/8fcDieES5RYQBSqgFASk2AZE9GjwwlHkFD2K1aL10h6tB0Xpr4jVVYtdUi+tWmm11lZae1TUini0tmJFIVY5argpN6vlIlGEJEIgChjC8/vjOyHhnpDM7Mzsz2utvWZm75nsZzP67O98r+buiIhIfDSLOgAREUktJX4RkZhR4hcRiRklfhGRmFHiFxGJmayoA6iLvLw87969e9RhiIiklXnz5pW6e8fd96dF4u/evTtFRUVRhyEiklbMbPXe9quqR0QkZpT4RURiRolfRCRm0qKOX0SalsrKSoqLi9m6dWvUoQjQokULunbtSnZ2dp3er8QvIvVWXFzMoYceSvfu3TGzqMOJNXenrKyM4uJievToUafPqKpHROpt69atdOjQQUm/CTAzOnToUK9fX0r8InJQlPSbjvp+F0r8IiIxk9mJ/7bb4NvfjjoKEWlkZWVlDBw4kIEDB9K5c2e6dOmy8/WXX365388WFRUxbty4A55j6NChjRLr7NmzGTlyZKP8rcaS2Y27H30EhYVRRyEijaxDhw4sXLgQgEmTJpGbm8sPf/jDnce3b99OVtbe01t+fj75+fkHPMebb77ZOME2QZld4s/Lg9JS0CpjIhnv8ssv5/vf/z4nnHACN954I2+//TYnnngixx9/PEOHDuX9998Hdi2BT5o0iSuvvJKCggJ69uzJlClTdv693Nzcne8vKCjgggsuoE+fPowZM4bqlQv/+te/0qdPH77xjW8wbty4epXsp02bxnHHHUf//v2ZOHEiAFVVVVx++eX079+f4447jvvuuw+AKVOm0LdvXwYMGMBFF13U4H+rzC7x5+XBtm3w+eeQ+BJFJAkKCvbcd+GF8N//DV98ASNG7Hn88svDVloKF1yw67HZsw8qjOLiYt58802aN2/Opk2beP3118nKyuKVV17h5ptvZvr06Xt8Zvny5bz22mts3ryZ3r17c8011+zRH37BggUsWbKEI488kpNOOok33niD/Px8rr76agoLC+nRowcXX3xxneP85JNPmDhxIvPmzaNdu3YMHz6c5557jm7duvHxxx+zePFiADZu3AjAnXfeycqVK8nJydm5ryEyv8QP4T8sEcl4o0ePpnnz5gCUl5czevRo+vfvz/jx41myZMleP3POOeeQk5NDXl4enTp1Yt26dXu8Z/DgwXTt2pVmzZoxcOBAVq1axfLly+nZs+fOvvP1SfzvvPMOBQUFdOzYkaysLMaMGUNhYSE9e/ZkxYoVXHfddbz00kscdthhAAwYMIAxY8bw2GOP7bMKqz4yu8TfvTuceCJUVkYdiUhm218JvVWr/R/PyzvoEv7uWrduvfP5j3/8Y4YNG8aMGTNYtWoVBXv7VQLk5OTsfN68eXO2b99+UO9pDO3atWPRokXMnDmThx56iKeffpqpU6fy4osvUlhYyAsvvMDkyZN57733GnQDyOwS/7Bh8Oab0KtX1JGISIqVl5fTpUsXAP74xz82+t/v3bs3K1asYNWqVQA89dRTdf7s4MGDmTNnDqWlpVRVVTFt2jROPfVUSktL2bFjB+effz6333478+fPZ8eOHaxZs4Zhw4Zx1113UV5eTkVFRYNiz+wSv4jE1o033shll13G7bffzjnnnNPof79ly5Y8+OCDnHXWWbRu3ZpBgwbt872zZs2ia9euO1//+c9/5s4772TYsGG4O+eccw6jRo1i0aJFXHHFFezYsQOAO+64g6qqKi655BLKy8txd8aNG0fbtm0bFLt5knq8mNlUYCSw3t37J/a1B54CugOrgAvdfcOB/lZ+fr4f1EIsFRUwdChcfz1897v1/7yI7NWyZcs49thjow4jchUVFeTm5uLujB07ll69ejF+/PhIYtnbd2Jm89x9j76ryazq+SNw1m77bgJmuXsvYFbidfK0agVLl0Lip5iISGP63e9+x8CBA+nXrx/l5eVcffXVUYdUJ0mr6nH3QjPrvtvuUUBB4vmjwGxgYrJioFkz6NBBvXpEJCnGjx8fWQm/IVLduHu4u69NPP8UOHxfbzSzq8ysyMyKSkpKDv6MSvwiSZGsamKpv/p+F5H16vEQ6T6jdfeH3T3f3fM7dtxjkfi6y8uDhtw4RGQPLVq0oKysTMm/Caiej79FixZ1/kyqe/WsM7Mj3H2tmR0BrE/6GQsKQiOviDSarl27UlxcTIN+jUujqV6Bq65SnfifBy4D7kw8/m/Sz/jTnyb9FCJxk52dXefVnqTpSVpVj5lNA+YCvc2s2My+S0j4Z5rZB8AZidciIpJCyezVs6+JK05P1jn3aupUmDABVqyANm1SemoRkaYos6dsAMjKgs8+U88eEZGEzE/8mqFTRGQXSvwiIjGjxC8iEjOZn/g7dQoLrnfrFnUkIiJNQuZPy5ybC48/HnUUIiJNRuaX+KtVVUUdgYhIkxCPxH/CCTB6dNRRiIg0CfFI/K1aqXFXRCQhHok/L0+JX0QkQYlfRCRm4pP4y8ogsYCxiEicZX53ToBTToFt26CyEnJyoo5GRCRS8Uj8Z54ZNhERiUlVjzts3hxK/SIiMRePxD9vHhx2GMycGXUkIiKRi0fi10RtIiI7xSvxl5VFG4eISBMQj8TfujW0aKESv4gIcUn8ZhrEJSKSEI/unAATJ8JRR0UdhYhI5OKT+K+9NuoIRESahHhU9QCUl8OKFVFHISISufgk/ltugUGDoo5CRCRy8Un8eXmwYQNs3x51JCIikYpX4ncPyV9EJMbilfhBXTpFJPaU+EVEYiY+ib9fP3jwQejZM+pIREQiFUniN7PxZrbEzBab2TQza5H0kx5xBFxzDXTpkvRTiYg0ZSlP/GbWBRgH5Lt7f6A5cFHST+wOixbB6tVJP5WISFMWVVVPFtDSzLKAVsAnST+jGQwdClOmJP1UIiJNWcoTv7t/DPwC+AhYC5S7+8u7v8/MrjKzIjMrKikpaZyTa6I2EZFIqnraAaOAHsCRQGszu2T397n7w+6e7+75HTt2bJyT5+VpTn4Rib0oqnrOAFa6e4m7VwLPAkNTcuaOHVXiF5HYiyLxfwQMMbNWZmbA6cCylJxZVT0iIqmfltnd3zKzZ4D5wHZgAfBwSk5+7bUwZkxKTiUi0lRFMh+/u/8E+EnKTzxkSMpPKSLS1MRn5C7AunXwt7/B559HHYmISGTilfgLC2HECFi5MupIREQiE6/Er4naRERilvg7dAiPSvwiEmPxSvwq8YuIxCzxq8QvIhJNd87I5OTAzJnQp0/UkYiIRCZeiR9g+PCoIxARiVS8qnoA5swJfflFRGIqfiX+u+6C9evh7LOjjkREJBLxK/FramYRibl4Jn716hGRGItn4q+ogK1bo45ERCQS8Uz8oOoeEYmt+CX+886Dd98Nq3GJiMRQ/Hr1dOyopC8isRa/Ev+mTfDAA7B4cdSRiIhEIn6Jf+vWsATjnDlRRyIiEon4Jf727cOjunSKSEzFL/FnZUHbtkr8IhJb8Uv8oEFcIhJrSvwiIjETv+6cANOnQ8uWUUchIhKJeCb+I4+MOgIRkcjEs6qnsBB+/GNwjzoSEZGUi2finzsXbr8dvvgi6khERFIunolfE7WJSIzFO/GrZ4+IxJASv4hIzCjxi4jETCTdOc2sLfB7oD/gwJXuPjdlARxzDGzeDK1bp+yUIiJNRVT9+O8HXnL3C8zsEKBVSs/evDnk5qb0lCIiTUXKq3rMrA1wCvAIgLt/6e4bUx0Ht90Gf/pTyk8rIhK1KOr4ewAlwB/MbIGZ/d7M9qhzMbOrzKzIzIpKSkoaP4onn4S//KXx/66ISBMXReLPAr4O/Mbdjwc+B27a/U3u/rC757t7fsdkLJXYoYMad0UklqJI/MVAsbu/lXj9DOFGkFqaoVNEYqpOid/MWptZs8Tzr5rZN80s+2BO6O6fAmvMrHdi1+nA0oP5Ww2ixC8iMVXXEn8h0MLMugAvA5cCf2zAea8DHjezd4GBwM8a8LcOTl4eVFZqojYRiZ26Jn5z9y+A/wQedPfRQL+DPam7L0zU3w9w93PdfcPB/q2DdscdUFICZik/tYhIlOqc+M3sRGAM8GJiX/PkhJQiSvgiElN1Tfw3AD8CZrj7EjPrCbyWvLBSYMkSuPhiWL486khERFKqTonf3ee4+zfd/a5EI2+pu49LcmzJtXlz6Mu/cmXUkYiIpFRde/U8YWaHJQZaLQaWmtmE5IaWZJqoTURiqq5VPX3dfRNwLvA3wujbS5MWVSoo8YtITNU18Wcn+u2fCzzv7pWEWTXTV5s2YbI2JX4RiZm6Jv7fAquA1kChmR0FbEpWUClhBr17h+QvIhIjdZqW2d2nAFNq7VptZsOSE1IKLVkSdQQiIilX18bdNmZ2b/VsmWZ2D6H0LyIiaaauVT1Tgc3AhYltE/CHZAWVMj//OXznO1FHISKSUnVdgetodz+/1uvbzGxhMgJKqQ8/hJkzo45CRCSl6lri32JmJ1e/MLOTgC3JCSmF8vKgrAx27Ig6EhGRlKlrif/7wJ8SyyYCbAAuS05IKZSXB1VVUF4O7dpFHY2ISErUdcqGRe7+NWAAMCCxctZpSY0sFTSIS0RiqF4rcLn7psQIXoAfJCGe1OrWDb72tTAvv4hITNS1qmdv0n9e44ICWJj+bdQiIvXRkDV303vKBhGRmNpv4jezzWa2aS/bZuDIFMWYPFu3wpAh8MgjUUciIpIy+63qcfdDUxVIJHJyYMEC+OCDqCMREUmZhlT1pD+z0LNHvXpEJEbinfhBiV9EYkeJX4lfRGKmId05M8OQIUr8IhIrSvyTJ0cdgYhISqmqR0QkZpT4p00LUzeoukdEYkKJ3x2Ki5X4RSQ2lPg1Q6eIxIwSvxK/iMRMZInfzJqb2QIz+0tUMQBK/CISO1GW+K8HlkV4/iAvD849F7p0iToSEZGUiKQfv5l1Bc4BJhP1gi6tWsGMGZGGICKSSlGV+H8J3Ajsc5VzM7vKzIrMrKikpCR1kYmIZLiUJ34zGwmsd/d5+3ufuz/s7vnunt+xY8fkBnXGGaG6R0QkBqIo8Z8EfNPMVgFPAqeZ2WMRxFGjWTNYty7SEEREUiXlid/df+TuXd29O3AR8Kq7X5LqOHahGTpFJEbUjx+U+EUkViKdndPdZwOzo4wBCIl/40aorITs7KijERFJKpX4IczJP3ZsSPwiIhlO8/EDDB8eNhGRGFCJv1plJWzdGnUUIiJJp8QPsH49tG0Ll14adSQiIkmnxA/QqRPcdBM88wy88krU0YiIJJUSf7UJE6BnT7juOvjyy6ijERFJGiX+ai1awP33w/Ll4VFEJEMp8dc2cmTYlkU/W7SISLKoO+funnkGcnKijkJEJGlU4t9dddJftgzm7XcCURGRtKQS/97s2AGjRoWbwPz5msZBRDKKSvx706wZ3H03LF4MDz4YdTQiIo1KiX9fRo2Cs86CW2+FTz+NOhoRkUajxL8vZjBlSpjGYeLEqKMREWk0quPfn1694Ec/CsnfPdwMRETSnBL/gUyaFHUEIiKNSlU9dfX3v8OTT0YdhYhIgynx14U73HUXXHMNlJREHY2ISIMo8deFGfzqV1BREWbxFBFJY0r8dXXssXDDDTB1Krz1VtTRiIgcNCX++rj1VjjySBgzBrZsiToaEZGDol499XHoofDss6HE37Jl1NGIiBwUlfjr64QTYNy48PzVV+Haa2HbtmhjEhGpByX+hpg7Fx54AE49FdasiToaEZE6UeJviFtuCfP3L10KX/86zJoVdUQiIgekxN9Q558P77wTFmwfPlw9fkSkyVPjbmPo3Tsk/KlTYfDgsE9z+4hIE6USf2PJzQ2NvmawYgUMGQLvvht1VCIie1DiT4aystDYO2QI3HgjrF0bdUQiIjulPPGbWTcze83MlprZEjO7PtUxJN2gQWHJxnPPhXvuge7dw6hfEZEmIIoS/3bg/7l7X2AIMNbM+kYQR3J17gxPPAH//CdccUVYzrHa++9HF5eIxF7KE7+7r3X3+Ynnm4FlQJdUx5EyRx8NDz0E994bXr/+OvTpAyNGQGFhaAQWEUmhSOv4zaw7cDywRx9IM7vKzIrMrKgkk6ZCPu44mDwZiorCwK+TT4YXXoAdO6KOTERiwjyiEqeZ5QJzgMnu/uz+3pufn+9FRUWpCSxVtmwJ3T9//vMw3fPq1dC6ddRRiUgGMbN57p6/+/5ISvxmlg1MBx4/UNLPWC1bwtix8MEHMHt2SPrbt4eF3TX9g4gkURS9egx4BFjm7vem+vxNTnY29O8fni9cCFOmhAFhP/2ppn4WkaSIosR/EnApcJqZLUxsIyKIo+nJz4fly2HkSPjJT8LiL888owZgEWlUkdXx10dG1vEfyOzZcP31UFkJixaFXwYiIvXQpOr4pQ4KCmDePHjppZD0N20Ko4DLyqKOTETSnBJ/U5aVBV/5Snj+2mthLECvXnDbbVBaGm1sIpK2lPjTxahRofH35JNh0qRwQ7juOqiqijoyEUkzSvzppH9/eP55WLIELroIPvoImjcPx1avjjY2EUkbSvzpqG/fMPhrxozwevXqMDXEGWfAzJnqBSQi+6XEn86qJ35r1w7uuAOWLYOzzoLjj4fHHw89gkREdqPEnwkOOwwmTAgLwEydGhL+5ZfDp5+G4+Xl+hUgIjsp8WeSnJwwBfR774V1gLt1C/vPOw/69YOf/QxWrYo0RBGJnhJ/JmrWDAYOrHn97W9DXh7ccgv06AH/9m/w3HPRxScikVLij4PvfS/M/b9yZZgSurS0phfQ5s3w1FNhsNjq1WGmUFULiWQ0TdkQR+6h/39WFjz2GFx66a7HDzkEXn0VTjopPD74IHToELZjjoFTTgm9iMyiiV9E6mRfUzZkRRGMRMwsJH0I4wF69QoNwWVl4ddAWVnNiOENG2Dp0rCvrKxmwNjSpWESufffD43JffvuurykiDRZSvxxl5UFJ5yw7+Pnnx82CKuELV8Ob74Zlo+EsJDMI49A+/ZhVPEpp4Q2hEGD9ItApIlSVY80zOrVYSbR118P7QgffBCqgT78MBy/995QtdS/f+hZ1KWLbggiKaKqHkmOo46Cyy4LG8DatVBcXHP8scdgwYKa123awHe+ExacAfi//wtVRm3apC5mkZhT4pfGdcQRYas2f35oN1iyBBYvDo/HHBOOVVaGqqFmzeA//gMuuQTOPjs0LotI0qiqR6JTWRmqiV58EaZNg/XrQ1vB1KlhNlIRaRBV9UjTk50NZ54Ztl/8Al55JVQN9e4djr/2WuhOOmZMTWPygWzfXtNjac0a+PLLXccltGwZ2hkgjGLOyoKuXRvtkkTSgfrfSdOQlRUmmHvssZokP3dumGbi2GNDL6E77oBHH635zIQJYaWyAQNC8m7VCk47reb42WeHaqVevWq2K6+sOX7qqWFai3//99DWIBITKvFL03XzzWHuoSefDDeEm28OPYaqG5I3bgxdTI8+OtwY2rev+bUA4aZRXh6eV/ckqt3+8MtfhjaH+++HE08MN4q77w49kA7Wtm1hwNthh8Hhh0PnzuHx8MPVdiFNhur4JX2UloZSfatWjft3Kyrg178OYxL+8pdwE6iqqlnk5kA++CBMif3Nb4Zqpd69w77aLr4YnngiPB8xItwYOneGr341jHvo108D4KTRqY5f0l9eXnL+bm4u3HQTXH99aAMAGDs2jGaeNGnXCe+qrVkT5jiaNi30XGrfHtatC1VW8+fDZ5+Fz69bFx6POip8rrIyzI/04Yeh62tFRdg/YUL4tVFZCW+/Dfn5YbZVkSRQ4hepVp30Abp3D1VMxx8fprWeNCm0JQDcdx/84Afh+aBBcM898K1v1TQq5+aGrXrai9qys8NgNwi/DlatCq/79Qv75s8PI6BzcmDw4PBr4OSTw2Nubqi6Ki6Gzz8P2xdfhMcRI8LxuXPhjTfCL4ra26BB4W9u2xZ+WWRnJ+EfUNKFqnpE9mXjxtAOcN99sGlT6GE0bFgYkPbii2Geo+oxCY1l0yaYNQv+8Y9wQ5g/P1Q7zZoVGq4ffhiuvnrPz1XPnVT7plTbJ5+E9o3bbgs3sZYtww2hU6fQ/jBjRrhx/OMfYRbX2u0TeXk11V6bN4ebT0VFzc3HPTSUQ/h3MQvTfx91VONXy0m97KuqR4lf5EA2bIAHHghVPiNHpvbcn38eehydeGJIov/6FxQVQevWu27HHBNK9FVVsGVLuIGUl4fHTZtC76fqXxtz5oR9GzaEsRMlJSHhN2sG//Vf8Pvf7xpDq1Yh0ZvBBRfA9Om7Hu/SpWa09tlnw0sv1Rzr1CkM0vvzn8PrF14IcfToEUZru4e427cPx9euDV1y3Wu2li3DDagxVFbCxx+Hf5s+fTK+Ok2JX0QObPPmkHzXratpn9iyJbRBALz8Mnz00a43nTZtQpsEhBvJhx+GKqzqrX17uPPOcLxXr5p5nKqNHBluCBB+lVQvGVpt9Gh4+unwvKAgnLNLFzjyyPCYnx+q5HbsCLGvWbPrNnIknH46vPtuuHlX57xDDgmfnTw5/F335M4jtX59+DfdsCFcw9FHQ9u2yTsfatwVkbo49NCwffWrez8+fPj+P9+pU9iGDt378dmzw81g5cqahu3qhm8IA/m2bAkJuHqr7qJbVRVi+/jj8Ktn/fqwf/z4kPhLS/ccjNeqVWivOf30cJ5bbw3vyc0Niw+98Qa0aBHe+8ILoZps6NCarV+/mmqu7dtD9d+GDTXb5s3hVxDA//xPGHT42Wfh2GefhV838+eH49/7Xs0Nrtpxx4UbUvXnv/wy3BCOPjrc1JLU00slfhFJT5WVoYSfnR1+KbjDb38bBuV17Roe27Wreym+sDC06bzxRs1N5dBDQym9ZUu44YYw5qM2s3BDaNYsHJ8+PfzCadcuPHbuHMZ1QLjplZaGY5s2hWq7Zs1q2mQGDAjrZVfLyYFf/SpUvx0kVfWIiNSFe/hF8sYboZfUHXeE6qzZs0PpvF27msTerl34RdIYJfPt20M12r/+BStWhMfzzgvtOwepSSV+MzsLuB9oDvze3e/c3/uV+EVE6m9fiT/lQwXNrDnwAHA20Be42Mz6pjoOEZG4imKM+GDgQ3df4e5fAk8CmoNXRCRFokj8XYA1tV4XJ/aJiEgKNNlZoczsKjMrMrOikpKSqMMREckYUST+j4FutV53Tezbhbs/7O757p7fsWPHlAUnIpLpokj87wC9zKyHmR0CXAQ8H0EcIiKxlPKRu+6+3cyuBWYSunNOdfclqY5DRCSuIpmywd3/Cvw1inOLiMRdWozcNbMSYPVuu/OA0gjCSZZMux7IvGvS9TR9mXZNDb2eo9x9j0bStEj8e2NmRXsbkZauMu16IPOuSdfT9GXaNSXreppsd04REUkOJX4RkZhJ58T/cNQBNLJMux7IvGvS9TR9mXZNSbmetK3jFxGRg5POJX4RETkISvwiIjGTdonfzM4ys/fN7EMzuynqeBqDma0ys/fMbKGZpd2KM2Y21czWm9niWvvam9nfzeyDxGO7KGOsr31c0yQz+zjxPS00sxFRxlgfZtbNzF4zs6VmtsTMrk/sT8vvaT/Xk87fUQsze9vMFiWu6bbE/h5m9lYi5z2VmOqmYedKpzr+xCIu/wTOJEzn/A5wsbsvjTSwBjKzVUC+u6flwBMzOwWoAP7k7v0T++4GPnP3OxM36HbuPjHKOOtjH9c0Cahw919EGdvBMLMjgCPcfb6ZHQrMA84FLicNv6f9XM+FpO93ZEBrd68ws2zgH8D1wA+AZ939STN7CFjk7r9pyLnSrcSvRVyaIHcvBD7bbfco4NHE80cJ/1OmjX1cU9py97XuPj/xfDOwjLAORlp+T/u5nrTlQUXiZXZic+A04JnE/kb5jtIt8WfqIi4OvGxm88zsqqiDaSSHu/vaxPNPgcOjDKYRXWtm7yaqgtKiWmR3ZtYdOB54iwz4nna7Hkjj78jMmpvZQmA98HfgX8BGd9+eeEuj5Lx0S/yZ6mR3/zphHeKxiWqGjOGhPjF96hT37TfA0cBAYC1wT7Th1J+Z5QLTgRvcfVPtY+n4Pe3letL6O3L3KncfSFinZDDQJxnnSbfEX6dFXNKNu3+ceFwPzCB84eluXaIetro+dn3E8TSYu69L/I+5A/gdafY9JeqNpwOPu/uzid1p+z3t7XrS/Tuq5u4bgdeAE4G2ZlY9k3Kj5Lx0S/wZt4iLmbVONE5hZq2B4cDi/X8qLTwPXJZ4fhnwvxHG0iiqE2TCeaTR95RoOHwEWObu99Y6lJbf076uJ82/o45m1jbxvCWhE8sywg3ggsTbGuU7SqtePQCJ7lm/pGYRl8kRh9QgZtaTUMqHsD7CE+l2TWY2DSggTCG7DvgJ8BzwNPAVwpTaF7p72jSW7uOaCghVCA6sAq6uVT/epJnZycDrwHvAjsTumwn14mn3Pe3nei4mfb+jAYTG2+aEQvnT7v7TRI54EmgPLAAucfdtDTpXuiV+ERFpmHSr6hERkQZS4hcRiRklfhGRmFHiFxGJGSV+EZGYUeIXAcysqtaMjgsbc+ZXM+tee5ZPkahlHfgtIrGwJTFUXiTjqcQvsh+JtRLuTqyX8LaZHZPY393MXk1MBjbLzL6S2H+4mc1IzKm+yMyGJv5UczP7XWKe9ZcTIzNFIqHELxK03K2q51u1jpW7+3HArwmjxgF+BTzq7gOAx4Epif1TgDnu/jXg68CSxP5ewAPu3g/YCJyf5OsR2SeN3BUBzKzC3XP3sn8VcJq7r0hMCvapu3cws1LCQiCVif1r3T3PzEqArrWH1CemDf67u/dKvJ4IZLv77cm/MpE9qcQvcmC+j+f1UXtulSrUviYRUuIXObBv1Xqcm3j+JmF2WIAxhAnDAGYB18DORTXapCpIkbpSqUMkaJlY+ajaS+5e3aWznZm9Syi1X5zYdx3wBzObAJQAVzfiJpQAAABQSURBVCT2Xw88bGbfJZTsryEsCCLSZKiOX2Q/EnX8+e5eGnUsIo1FVT0iIjGjEr+ISMyoxC8iEjNK/CIiMaPELyISM0r8IiIxo8QvIhIz/x9On3wOiMO+jAAAAABJRU5ErkJggg==",
608 |             "text/plain": [
609 |               "<Figure size 432x288 with 1 Axes>"
610 |             ]
611 |           },
612 |           "metadata": {
613 |             "needs_background": "light"
614 |           },
615 |           "output_type": "display_data"
616 |         }
617 |       ],
618 |       "source": [
619 |         "\n",
620 |         "epoch_count = range(1, len(total_training_loss) + 1)\n",
621 |         "\n",
622 |         "# Visualize loss history\n",
623 |         "plt.plot(epoch_count, total_training_loss, 'r--')\n",
624 |         "plt.legend(['Training Loss', 'Test Loss'])\n",
625 |         "plt.xlabel('Epoch')\n",
626 |         "plt.ylabel('Loss')\n",
627 |         "plt.show();"
628 |       ]
629 |     }
630 |   ],
631 |   "metadata": {
632 |     "accelerator": "GPU",
633 |     "colab": {
634 |       "collapsed_sections": [],
635 |       "machine_shape": "hm",
636 |       "name": "LeNet_From_Scratch.ipynb",
637 |       "provenance": []
638 |     },
639 |     "interpreter": {
640 |       "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930"
641 |     },
642 |     "kernelspec": {
643 |       "display_name": "Python 3.9.10 64-bit",
644 |       "language": "python",
645 |       "name": "python3"
646 |     },
647 |     "language_info": {
648 |       "codemirror_mode": {
649 |         "name": "ipython",
650 |         "version": 3
651 |       },
652 |       "file_extension": ".py",
653 |       "mimetype": "text/x-python",
654 |       "name": "python",
655 |       "nbconvert_exporter": "python",
656 |       "pygments_lexer": "ipython3",
657 |       "version": "3.9.10"
658 |     },
659 |     "orig_nbformat": 4
660 |   },
661 |   "nbformat": 4,
662 |   "nbformat_minor": 0
663 | }
664 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## This Repo has all the Deep Learning Paper Implementations codes for my [YouTube Channel](https://www.youtube.com/channel/UC0_a8SNpTFkmVv5SLMs1CIA/featured)
 2 | 
 3 | [yt_cover]: /assets/Youtube_Cover.jpg
 4 | 
 5 | [![Youtube Link][yt_cover]](https://www.youtube.com/channel/UC0_a8SNpTFkmVv5SLMs1CIA/videos)
 6 | 
 7 | ### Author: Rohan Paul
 8 | 
 9 | #### Find me here..
10 | 
11 | - 🐦 TWITTER: https://twitter.com/rohanpaul_ai
12 | - ​👨‍🔧​ Kaggle: https://www.kaggle.com/paulrohan2020
13 | - 👨🏻‍💼 LINKEDIN: https://www.linkedin.com/in/rohan-paul-b27285129/
14 | - 👨‍💻 GITHUB: https://github.com/rohan-paul
15 | - 🤖: My Website and Blog: https://rohan-paul-ai.netlify.app/
16 | - 🧑‍🦰 Facebook Page: https://www.facebook.com/Computer-Vision-with-Rohan-Paul-109348958325690
17 | - 📸 Instagram: https://www.instagram.com/rohan_paul_2020/
18 | 
19 | ---
20 | 
21 | [logo]: https://raw.githubusercontent.com/rohan-paul/MachineLearning-DeepLearning-Code-for-my-Youtube-Channel/master/assets/yt_logo.png
22 | 
23 | 


--------------------------------------------------------------------------------
/ResNet56_PyTorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "## ResNet Paper Implementation from Scratch with PyTorch on CIFAR-10 Dataset\n",
  8 |         "\n",
  9 |         "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=P8U1VL93jzA&list=PLxqBkZuBynVRX6QExfPyzRGj5Ap_zmcAJ&index=6)\n",
 10 |         "\n",
 11 |         "[![Imgur](https://imgur.com/NhVhb4u.png)](https://www.youtube.com/watch?v=P8U1VL93jzA&list=PLxqBkZuBynVRX6QExfPyzRGj5Ap_zmcAJ&index=6)\n",
 12 |         "\n",
 13 |         "---\n",
 14 |         "\n",
 15 |         "The below comments are taken from the original paper from implementing ResNet on CIFAR-10 Dataset. And I will follow this structure for this implementation of ResNet on CIFAR10.\n",
 16 |         "\n",
 17 |         "\"We conducted more studies on the CIFAR-10 dataset\n",
 18 |         "which consists of 50k training images and 10k test-\n",
 19 |         "ing images in 10 classes.\n",
 20 |         "\n",
 21 |         " The network inputs are 32×32 images, with\n",
 22 |         "the per-pixel mean subtracted. The first layer is 3×3 convolutions. Then we use a stack of 6n layers with 3×3 convolutions on the feature maps of sizes {32, 16, 8} respectively,\n",
 23 |         "with 2n layers for each feature map size. The numbers of\n",
 24 |         "filters are {16, 32, 64} respectively.\n",
 25 |         "\n",
 26 |         "The subsampling is performed by convolutions with a stride of 2. The network ends\n",
 27 |         "with a global average pooling, a 10-way fully-connected\n",
 28 |         "layer, and softmax.\n",
 29 |         "\n",
 30 |         "There are totally 6n+2 stacked weighted layers.\n",
 31 |         "\n",
 32 |         "We compare n = {3, 5, 7, 9}, leading to 20, 32, 44, and 56-layer networks.\n",
 33 |         "\n",
 34 |         "When shortcut connections are used, they are connected\n",
 35 |         "to the pairs of 3×3 layers (totally 3n shortcuts).\n",
 36 |         "\n",
 37 |         "On this dataset we use identity shortcuts in all cases (i.e., option A), so our residual models have exactly the same depth, width, and number of parameters as the plain counterparts.\"\n",
 38 |         "---\n",
 39 |         "\n",
 40 |         "For this example here in this file, I have used n=9. So my ResNet blocks are like [9, 9, 9]\n",
 41 |         "\n",
 42 |         "And thats why the total layers are 56 (i.e. 9 * 6 + 2)\n",
 43 |         "\n",
 44 |         "![Imgur](https://imgur.com/ifD8qbd.png)\n",
 45 |         "\n"
 46 |       ]
 47 |     },
 48 |     {
 49 |       "cell_type": "code",
 50 |       "execution_count": 1,
 51 |       "metadata": {
 52 |         "id": "9g7Qx3Sb7-tD"
 53 |       },
 54 |       "outputs": [],
 55 |       "source": [
 56 |         "import os\n",
 57 |         "import shutil\n",
 58 |         "from collections import OrderedDict\n",
 59 |         "\n",
 60 |         "import torch\n",
 61 |         "import torch.nn as nn\n",
 62 |         "import torch.nn.functional as F\n",
 63 |         "import torch.optim as optim\n",
 64 |         "from torchvision import transforms, datasets\n",
 65 |         "from torchsummary import summary\n",
 66 |         "from torch.utils.data import Dataset, DataLoader, random_split"
 67 |       ]
 68 |     },
 69 |     {
 70 |       "cell_type": "code",
 71 |       "execution_count": 2,
 72 |       "metadata": {
 73 |         "colab": {
 74 |           "base_uri": "https://localhost:8080/"
 75 |         },
 76 |         "id": "9mPnawhC7-tF",
 77 |         "outputId": "646b4228-0910-48d0-c56a-e0cf599b801b"
 78 |       },
 79 |       "outputs": [
 80 |         {
 81 |           "name": "stdout",
 82 |           "output_type": "stream",
 83 |           "text": [
 84 |             "Mounted at /content/drive\n"
 85 |           ]
 86 |         }
 87 |       ],
 88 |       "source": [
 89 |         "from google.colab import drive\n",
 90 |         "drive.mount('/content/drive')"
 91 |       ]
 92 |     },
 93 |     {
 94 |       "cell_type": "code",
 95 |       "execution_count": 3,
 96 |       "metadata": {
 97 |         "colab": {
 98 |           "base_uri": "https://localhost:8080/"
 99 |         },
100 |         "id": "5ZL8T3Do8J5s",
101 |         "outputId": "81edc139-34c1-4dbe-cb6e-06d1e48ef1a6"
102 |       },
103 |       "outputs": [
104 |         {
105 |           "name": "stdout",
106 |           "output_type": "stream",
107 |           "text": [
108 |             "Sat Mar  5 13:11:50 2022       \n",
109 |             "+-----------------------------------------------------------------------------+\n",
110 |             "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
111 |             "|-------------------------------+----------------------+----------------------+\n",
112 |             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
113 |             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
114 |             "|                               |                      |               MIG M. |\n",
115 |             "|===============================+======================+======================|\n",
116 |             "|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |\n",
117 |             "| N/A   35C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |\n",
118 |             "|                               |                      |                  N/A |\n",
119 |             "+-------------------------------+----------------------+----------------------+\n",
120 |             "                                                                               \n",
121 |             "+-----------------------------------------------------------------------------+\n",
122 |             "| Processes:                                                                  |\n",
123 |             "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
124 |             "|        ID   ID                                                   Usage      |\n",
125 |             "|=============================================================================|\n",
126 |             "|  No running processes found                                                 |\n",
127 |             "+-----------------------------------------------------------------------------+\n"
128 |           ]
129 |         }
130 |       ],
131 |       "source": [
132 |         "!nvidia-smi"
133 |       ]
134 |     },
135 |     {
136 |       "cell_type": "code",
137 |       "execution_count": 4,
138 |       "metadata": {
139 |         "id": "pwngtd6i7-tL"
140 |       },
141 |       "outputs": [],
142 |       "source": [
143 |         "class LambdaLayer(nn.Module):\n",
144 |         "    \n",
145 |         "    def __init__(self, lambd):\n",
146 |         "        super(LambdaLayer, self).__init__()\n",
147 |         "        self.lambd = lambd\n",
148 |         "    \n",
149 |         "    def forward(self, x):\n",
150 |         "        return self.lambd(x)\n",
151 |         "\n",
152 |         "class BasicConvBlock(nn.Module):\n",
153 |         "    \n",
154 |         "    ''' The BasicConvBlock takes an input with in_channels, applies some blocks of convolutional layers \n",
155 |         "    to reduce it to out_channels and sum it up to the original input. \n",
156 |         "    If their sizes mismatch, then the input goes into an identity. \n",
157 |         "    \n",
158 |         "    Basically The BasicConvBlock will implement the regular basic Conv Block + \n",
159 |         "    the shortcut block that does the dimension matching job (option A or B) when dimension changes between 2 blocks\n",
160 |         "    '''\n",
161 |         "    \n",
162 |         "    def __init__(self, in_channels, out_channels, stride=1, option='A'):\n",
163 |         "        super(BasicConvBlock, self).__init__()\n",
164 |         "        \n",
165 |         "        self.features = nn.Sequential(OrderedDict([\n",
166 |         "            ('conv1', nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)),\n",
167 |         "            ('bn1', nn.BatchNorm2d(out_channels)),\n",
168 |         "            ('act1', nn.ReLU()),\n",
169 |         "            ('conv2', nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)),\n",
170 |         "            ('bn2', nn.BatchNorm2d(out_channels))\n",
171 |         "        ]))\n",
172 |         "\n",
173 |         "        self.shortcut = nn.Sequential()\n",
174 |         "        \n",
175 |         "        '''  When input and output spatial dimensions don't match, we have 2 options, with stride:\n",
176 |         "            - A) Use identity shortcuts with zero padding to increase channel dimension.    \n",
177 |         "            - B) Use 1x1 convolution to increase channel dimension (projection shortcut).\n",
178 |         "         '''\n",
179 |         "        if stride != 1 or in_channels != out_channels:\n",
180 |         "            if option == 'A':\n",
181 |         "                # Use identity shortcuts with zero padding to increase channel dimension.\n",
182 |         "                pad_to_add = out_channels//4\n",
183 |         "                ''' ::2 is doing the job of stride = 2\n",
184 |         "                F.pad apply padding to (W,H,C,N).\n",
185 |         "                \n",
186 |         "                The padding lengths are specified in reverse order of the dimensions,\n",
187 |         "                F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0))\n",
188 |         "\n",
189 |         "                [width_beginning, width_end, height_beginning, height_end, channel_beginning, channel_end, batchLength_beginning, batchLength_end ]\n",
190 |         "\n",
191 |         "                '''\n",
192 |         "                self.shortcut = LambdaLayer(lambda x:\n",
193 |         "                            F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad_to_add, pad_to_add, 0,0)))\n",
194 |         "            if option == 'B':\n",
195 |         "                self.shortcut = nn.Sequential(OrderedDict([\n",
196 |         "                    ('s_conv1', nn.Conv2d(in_channels, 2*out_channels, kernel_size=1, stride=stride, padding=0, bias=False)),\n",
197 |         "                    ('s_bn1', nn.BatchNorm2d(2*out_channels))\n",
198 |         "                ]))\n",
199 |         "        \n",
200 |         "    def forward(self, x):\n",
201 |         "        out = self.features(x)\n",
202 |         "        # sum it up with shortcut layer\n",
203 |         "        out += self.shortcut(x)\n",
204 |         "        out = F.relu(out)\n",
205 |         "        return out"
206 |       ]
207 |     },
208 |     {
209 |       "cell_type": "markdown",
210 |       "metadata": {},
211 |       "source": [
212 |         "\n",
213 |         "\n",
214 |         "### Explanations on using Option A and B in below code\n",
215 |         "\n",
216 |         "```py\n",
217 |         "\n",
218 |         "if stride != 1 or in_channels != out_channels:\n",
219 |         "            if option == 'A':\n",
220 |         "                pad = out_channels//4\n",
221 |         "                # ::2 replace the stride 2 + F.pad apply padding to (W,H,C,N).\n",
222 |         "                self.shortcut = LambdaLayer(lambda x:\n",
223 |         "                            F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0)))\n",
224 |         "            if option == 'B':\n",
225 |         "                self.shortcut = nn.Sequential(OrderedDict([\n",
226 |         "                    ('s_conv1', nn.Conv2d(in_channels, 2*out_channels, kernel_size=1, stride=stride, padding=0, bias=False)),\n",
227 |         "                    ('s_bn1', nn.BatchNorm2d(2*out_channels))\n",
228 |         "                ]))\n",
229 |         "\n",
230 |         "```\n",
231 |         "\n",
232 |         "As per the original Paper\n",
233 |         "\n",
234 |         "#### We use identity shortcuts when input and output channel dimensions are the same.\n",
235 |         "\n",
236 |         "#### Otherwise, When input and output spatial dimensions don't match, we have 2 options, with stride:\n",
237 |         "\n",
238 |         "    - A) Use identity shortcuts with zero padding to increase channel dimension.\n",
239 |         "\n",
240 |         "    - B) Use 1x1 convolution to increase channel dimension (projection shortcut).\n",
241 |         "\n",
242 |         "-----------------------\n",
243 |         "\n",
244 |         "### Understanding `F.pad` on a 4-D Tensor and the following line\n",
245 |         "\n",
246 |         "### `F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0)))`\n",
247 |         "\n",
248 |         "https://stackoverflow.com/a/61945903/1902852\n",
249 |         "\n",
250 |         "The padding lengths are specified in reverse order of the dimensions, where every dimension has two values, one for the padding at the beginning and one for the padding at the end.\n",
251 |         "\n",
252 |         "For an image with the dimensions `[channels, height, width]` the padding is given as:\n",
253 |         "\n",
254 |         "`[width_beginning, width_end, height_beginning, height_end, channels_beginning, channels_end]`,\n",
255 |         "\n",
256 |         "which can be reworded to\n",
257 |         "\n",
258 |         "`[left, right, top, bottom]`\n",
259 |         "\n",
260 |         "Therefore the code above pads the images to the right and bottom. The channels are left out, because they are not being padded, which also means that the same padding could be directly applied to the masks.\n",
261 |         "\n",
262 |         "So the below line means\n",
263 |         "\n",
264 |         "`F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0))`\n",
265 |         "\n",
266 |         "\n",
267 |         "`[width_beginning, width_end, height_beginning, height_end, channel_beginning, channel_end, batchLength_beginning, batchLength_end ]`"
268 |       ]
269 |     },
270 |     {
271 |       "cell_type": "code",
272 |       "execution_count": 5,
273 |       "metadata": {
274 |         "id": "6jvgyHXI7-tM"
275 |       },
276 |       "outputs": [],
277 |       "source": [
278 |         "\n",
279 |         "class ResNet(nn.Module):\n",
280 |         "    \"\"\"\n",
281 |         "        ResNet-56 architecture for CIFAR-10 Dataset of shape 32*32*3\n",
282 |         "    \"\"\"\n",
283 |         "    def __init__(self, block_type, num_blocks):\n",
284 |         "        super(ResNet, self).__init__()\n",
285 |         "        \n",
286 |         "        self.in_channels = 16\n",
287 |         "        \n",
288 |         "        self.conv0 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)\n",
289 |         "        self.bn0 = nn.BatchNorm2d(16)\n",
290 |         "        \n",
291 |         "        self.block1 = self.__build_layer(block_type, 16, num_blocks[0], starting_stride=1)\n",
292 |         "        \n",
293 |         "        self.block2 = self.__build_layer(block_type, 32, num_blocks[1], starting_stride=2)\n",
294 |         "        \n",
295 |         "        self.block3 = self.__build_layer(block_type, 64, num_blocks[2], starting_stride=2)\n",
296 |         "        \n",
297 |         "        self.avgpool = nn.AdaptiveAvgPool2d((1,1))\n",
298 |         "        self.linear = nn.Linear(64, 10)\n",
299 |         "    \n",
300 |         "    def __build_layer(self, block_type, out_channels, num_blocks, starting_stride):\n",
301 |         "        \n",
302 |         "        strides_list_for_current_block = [starting_stride] + [1]*(num_blocks-1)\n",
303 |         "        ''' Above line will generate an array whose first element is starting_stride\n",
304 |         "        And it will have (num_blocks-1) more elements each of value 1\n",
305 |         "         '''\n",
306 |         "        # print('strides_list_for_current_block ', strides_list_for_current_block)\n",
307 |         "        \n",
308 |         "        layers = []\n",
309 |         "        \n",
310 |         "        for stride in strides_list_for_current_block:\n",
311 |         "            layers.append(block_type(self.in_channels, out_channels, stride))\n",
312 |         "            self.in_channels = out_channels\n",
313 |         "        \n",
314 |         "        return nn.Sequential(*layers)\n",
315 |         "    \n",
316 |         "    def forward(self, x):\n",
317 |         "        out = F.relu(self.bn0(self.conv0(x)))\n",
318 |         "        out = self.block1(out)\n",
319 |         "        out = self.block2(out)        \n",
320 |         "        out = self.block3(out)\n",
321 |         "        out = self.avgpool(out)\n",
322 |         "        out = torch.flatten(out, 1)\n",
323 |         "        out = self.linear(out)\n",
324 |         "        return out"
325 |       ]
326 |     },
327 |     {
328 |       "cell_type": "markdown",
329 |       "metadata": {},
330 |       "source": [
331 |         "### _build_layer() method\n",
332 |         "\n",
333 |         "In ResNet Every layer downsamples the input at the start using stride equals to 2 i.e for 1st convolutional layer in 1st block of a layer.\n",
334 |         "\n",
335 |         "If we look at the first operation of each layer, we see that the stride used at that first one is 2, instead of 1 like for the rest of them.\n",
336 |         "\n",
337 |         "This is because, here in ResNet, reduction between layers is achieved by an increase on the stride, from 1 to 2, at the first convolution of each layer; instead of by a pooling operation, which we are used to see as down samplers.\n",
338 |         "\n",
339 |         "Quoting from Paper\n",
340 |         "\n",
341 |         "\" For both options, when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2.\""
342 |       ]
343 |     },
344 |     {
345 |       "cell_type": "code",
346 |       "execution_count": 6,
347 |       "metadata": {
348 |         "id": "pnvR0RuC7-tN"
349 |       },
350 |       "outputs": [],
351 |       "source": [
352 |         "def ResNet56():\n",
353 |         "    return ResNet(block_type=BasicConvBlock, num_blocks=[9,9,9])"
354 |       ]
355 |     },
356 |     {
357 |       "cell_type": "code",
358 |       "execution_count": 7,
359 |       "metadata": {
360 |         "colab": {
361 |           "base_uri": "https://localhost:8080/"
362 |         },
363 |         "id": "47k9cNnn7-tN",
364 |         "outputId": "9832007a-4e1f-4c88-c4c0-931f0af67bf6"
365 |       },
366 |       "outputs": [
367 |         {
368 |           "name": "stdout",
369 |           "output_type": "stream",
370 |           "text": [
371 |             "----------------------------------------------------------------\n",
372 |             "        Layer (type)               Output Shape         Param #\n",
373 |             "================================================================\n",
374 |             "            Conv2d-1           [-1, 16, 32, 32]             432\n",
375 |             "       BatchNorm2d-2           [-1, 16, 32, 32]              32\n",
376 |             "            Conv2d-3           [-1, 16, 32, 32]           2,304\n",
377 |             "       BatchNorm2d-4           [-1, 16, 32, 32]              32\n",
378 |             "              ReLU-5           [-1, 16, 32, 32]               0\n",
379 |             "            Conv2d-6           [-1, 16, 32, 32]           2,304\n",
380 |             "       BatchNorm2d-7           [-1, 16, 32, 32]              32\n",
381 |             "    BasicConvBlock-8           [-1, 16, 32, 32]               0\n",
382 |             "            Conv2d-9           [-1, 16, 32, 32]           2,304\n",
383 |             "      BatchNorm2d-10           [-1, 16, 32, 32]              32\n",
384 |             "             ReLU-11           [-1, 16, 32, 32]               0\n",
385 |             "           Conv2d-12           [-1, 16, 32, 32]           2,304\n",
386 |             "      BatchNorm2d-13           [-1, 16, 32, 32]              32\n",
387 |             "   BasicConvBlock-14           [-1, 16, 32, 32]               0\n",
388 |             "           Conv2d-15           [-1, 16, 32, 32]           2,304\n",
389 |             "      BatchNorm2d-16           [-1, 16, 32, 32]              32\n",
390 |             "             ReLU-17           [-1, 16, 32, 32]               0\n",
391 |             "           Conv2d-18           [-1, 16, 32, 32]           2,304\n",
392 |             "      BatchNorm2d-19           [-1, 16, 32, 32]              32\n",
393 |             "   BasicConvBlock-20           [-1, 16, 32, 32]               0\n",
394 |             "           Conv2d-21           [-1, 16, 32, 32]           2,304\n",
395 |             "      BatchNorm2d-22           [-1, 16, 32, 32]              32\n",
396 |             "             ReLU-23           [-1, 16, 32, 32]               0\n",
397 |             "           Conv2d-24           [-1, 16, 32, 32]           2,304\n",
398 |             "      BatchNorm2d-25           [-1, 16, 32, 32]              32\n",
399 |             "   BasicConvBlock-26           [-1, 16, 32, 32]               0\n",
400 |             "           Conv2d-27           [-1, 16, 32, 32]           2,304\n",
401 |             "      BatchNorm2d-28           [-1, 16, 32, 32]              32\n",
402 |             "             ReLU-29           [-1, 16, 32, 32]               0\n",
403 |             "           Conv2d-30           [-1, 16, 32, 32]           2,304\n",
404 |             "      BatchNorm2d-31           [-1, 16, 32, 32]              32\n",
405 |             "   BasicConvBlock-32           [-1, 16, 32, 32]               0\n",
406 |             "           Conv2d-33           [-1, 16, 32, 32]           2,304\n",
407 |             "      BatchNorm2d-34           [-1, 16, 32, 32]              32\n",
408 |             "             ReLU-35           [-1, 16, 32, 32]               0\n",
409 |             "           Conv2d-36           [-1, 16, 32, 32]           2,304\n",
410 |             "      BatchNorm2d-37           [-1, 16, 32, 32]              32\n",
411 |             "   BasicConvBlock-38           [-1, 16, 32, 32]               0\n",
412 |             "           Conv2d-39           [-1, 16, 32, 32]           2,304\n",
413 |             "      BatchNorm2d-40           [-1, 16, 32, 32]              32\n",
414 |             "             ReLU-41           [-1, 16, 32, 32]               0\n",
415 |             "           Conv2d-42           [-1, 16, 32, 32]           2,304\n",
416 |             "      BatchNorm2d-43           [-1, 16, 32, 32]              32\n",
417 |             "   BasicConvBlock-44           [-1, 16, 32, 32]               0\n",
418 |             "           Conv2d-45           [-1, 16, 32, 32]           2,304\n",
419 |             "      BatchNorm2d-46           [-1, 16, 32, 32]              32\n",
420 |             "             ReLU-47           [-1, 16, 32, 32]               0\n",
421 |             "           Conv2d-48           [-1, 16, 32, 32]           2,304\n",
422 |             "      BatchNorm2d-49           [-1, 16, 32, 32]              32\n",
423 |             "   BasicConvBlock-50           [-1, 16, 32, 32]               0\n",
424 |             "           Conv2d-51           [-1, 16, 32, 32]           2,304\n",
425 |             "      BatchNorm2d-52           [-1, 16, 32, 32]              32\n",
426 |             "             ReLU-53           [-1, 16, 32, 32]               0\n",
427 |             "           Conv2d-54           [-1, 16, 32, 32]           2,304\n",
428 |             "      BatchNorm2d-55           [-1, 16, 32, 32]              32\n",
429 |             "   BasicConvBlock-56           [-1, 16, 32, 32]               0\n",
430 |             "           Conv2d-57           [-1, 32, 16, 16]           4,608\n",
431 |             "      BatchNorm2d-58           [-1, 32, 16, 16]              64\n",
432 |             "             ReLU-59           [-1, 32, 16, 16]               0\n",
433 |             "           Conv2d-60           [-1, 32, 16, 16]           9,216\n",
434 |             "      BatchNorm2d-61           [-1, 32, 16, 16]              64\n",
435 |             "      LambdaLayer-62           [-1, 32, 16, 16]               0\n",
436 |             "   BasicConvBlock-63           [-1, 32, 16, 16]               0\n",
437 |             "           Conv2d-64           [-1, 32, 16, 16]           9,216\n",
438 |             "      BatchNorm2d-65           [-1, 32, 16, 16]              64\n",
439 |             "             ReLU-66           [-1, 32, 16, 16]               0\n",
440 |             "           Conv2d-67           [-1, 32, 16, 16]           9,216\n",
441 |             "      BatchNorm2d-68           [-1, 32, 16, 16]              64\n",
442 |             "   BasicConvBlock-69           [-1, 32, 16, 16]               0\n",
443 |             "           Conv2d-70           [-1, 32, 16, 16]           9,216\n",
444 |             "      BatchNorm2d-71           [-1, 32, 16, 16]              64\n",
445 |             "             ReLU-72           [-1, 32, 16, 16]               0\n",
446 |             "           Conv2d-73           [-1, 32, 16, 16]           9,216\n",
447 |             "      BatchNorm2d-74           [-1, 32, 16, 16]              64\n",
448 |             "   BasicConvBlock-75           [-1, 32, 16, 16]               0\n",
449 |             "           Conv2d-76           [-1, 32, 16, 16]           9,216\n",
450 |             "      BatchNorm2d-77           [-1, 32, 16, 16]              64\n",
451 |             "             ReLU-78           [-1, 32, 16, 16]               0\n",
452 |             "           Conv2d-79           [-1, 32, 16, 16]           9,216\n",
453 |             "      BatchNorm2d-80           [-1, 32, 16, 16]              64\n",
454 |             "   BasicConvBlock-81           [-1, 32, 16, 16]               0\n",
455 |             "           Conv2d-82           [-1, 32, 16, 16]           9,216\n",
456 |             "      BatchNorm2d-83           [-1, 32, 16, 16]              64\n",
457 |             "             ReLU-84           [-1, 32, 16, 16]               0\n",
458 |             "           Conv2d-85           [-1, 32, 16, 16]           9,216\n",
459 |             "      BatchNorm2d-86           [-1, 32, 16, 16]              64\n",
460 |             "   BasicConvBlock-87           [-1, 32, 16, 16]               0\n",
461 |             "           Conv2d-88           [-1, 32, 16, 16]           9,216\n",
462 |             "      BatchNorm2d-89           [-1, 32, 16, 16]              64\n",
463 |             "             ReLU-90           [-1, 32, 16, 16]               0\n",
464 |             "           Conv2d-91           [-1, 32, 16, 16]           9,216\n",
465 |             "      BatchNorm2d-92           [-1, 32, 16, 16]              64\n",
466 |             "   BasicConvBlock-93           [-1, 32, 16, 16]               0\n",
467 |             "           Conv2d-94           [-1, 32, 16, 16]           9,216\n",
468 |             "      BatchNorm2d-95           [-1, 32, 16, 16]              64\n",
469 |             "             ReLU-96           [-1, 32, 16, 16]               0\n",
470 |             "           Conv2d-97           [-1, 32, 16, 16]           9,216\n",
471 |             "      BatchNorm2d-98           [-1, 32, 16, 16]              64\n",
472 |             "   BasicConvBlock-99           [-1, 32, 16, 16]               0\n",
473 |             "          Conv2d-100           [-1, 32, 16, 16]           9,216\n",
474 |             "     BatchNorm2d-101           [-1, 32, 16, 16]              64\n",
475 |             "            ReLU-102           [-1, 32, 16, 16]               0\n",
476 |             "          Conv2d-103           [-1, 32, 16, 16]           9,216\n",
477 |             "     BatchNorm2d-104           [-1, 32, 16, 16]              64\n",
478 |             "  BasicConvBlock-105           [-1, 32, 16, 16]               0\n",
479 |             "          Conv2d-106           [-1, 32, 16, 16]           9,216\n",
480 |             "     BatchNorm2d-107           [-1, 32, 16, 16]              64\n",
481 |             "            ReLU-108           [-1, 32, 16, 16]               0\n",
482 |             "          Conv2d-109           [-1, 32, 16, 16]           9,216\n",
483 |             "     BatchNorm2d-110           [-1, 32, 16, 16]              64\n",
484 |             "  BasicConvBlock-111           [-1, 32, 16, 16]               0\n",
485 |             "          Conv2d-112             [-1, 64, 8, 8]          18,432\n",
486 |             "     BatchNorm2d-113             [-1, 64, 8, 8]             128\n",
487 |             "            ReLU-114             [-1, 64, 8, 8]               0\n",
488 |             "          Conv2d-115             [-1, 64, 8, 8]          36,864\n",
489 |             "     BatchNorm2d-116             [-1, 64, 8, 8]             128\n",
490 |             "     LambdaLayer-117             [-1, 64, 8, 8]               0\n",
491 |             "  BasicConvBlock-118             [-1, 64, 8, 8]               0\n",
492 |             "          Conv2d-119             [-1, 64, 8, 8]          36,864\n",
493 |             "     BatchNorm2d-120             [-1, 64, 8, 8]             128\n",
494 |             "            ReLU-121             [-1, 64, 8, 8]               0\n",
495 |             "          Conv2d-122             [-1, 64, 8, 8]          36,864\n",
496 |             "     BatchNorm2d-123             [-1, 64, 8, 8]             128\n",
497 |             "  BasicConvBlock-124             [-1, 64, 8, 8]               0\n",
498 |             "          Conv2d-125             [-1, 64, 8, 8]          36,864\n",
499 |             "     BatchNorm2d-126             [-1, 64, 8, 8]             128\n",
500 |             "            ReLU-127             [-1, 64, 8, 8]               0\n",
501 |             "          Conv2d-128             [-1, 64, 8, 8]          36,864\n",
502 |             "     BatchNorm2d-129             [-1, 64, 8, 8]             128\n",
503 |             "  BasicConvBlock-130             [-1, 64, 8, 8]               0\n",
504 |             "          Conv2d-131             [-1, 64, 8, 8]          36,864\n",
505 |             "     BatchNorm2d-132             [-1, 64, 8, 8]             128\n",
506 |             "            ReLU-133             [-1, 64, 8, 8]               0\n",
507 |             "          Conv2d-134             [-1, 64, 8, 8]          36,864\n",
508 |             "     BatchNorm2d-135             [-1, 64, 8, 8]             128\n",
509 |             "  BasicConvBlock-136             [-1, 64, 8, 8]               0\n",
510 |             "          Conv2d-137             [-1, 64, 8, 8]          36,864\n",
511 |             "     BatchNorm2d-138             [-1, 64, 8, 8]             128\n",
512 |             "            ReLU-139             [-1, 64, 8, 8]               0\n",
513 |             "          Conv2d-140             [-1, 64, 8, 8]          36,864\n",
514 |             "     BatchNorm2d-141             [-1, 64, 8, 8]             128\n",
515 |             "  BasicConvBlock-142             [-1, 64, 8, 8]               0\n",
516 |             "          Conv2d-143             [-1, 64, 8, 8]          36,864\n",
517 |             "     BatchNorm2d-144             [-1, 64, 8, 8]             128\n",
518 |             "            ReLU-145             [-1, 64, 8, 8]               0\n",
519 |             "          Conv2d-146             [-1, 64, 8, 8]          36,864\n",
520 |             "     BatchNorm2d-147             [-1, 64, 8, 8]             128\n",
521 |             "  BasicConvBlock-148             [-1, 64, 8, 8]               0\n",
522 |             "          Conv2d-149             [-1, 64, 8, 8]          36,864\n",
523 |             "     BatchNorm2d-150             [-1, 64, 8, 8]             128\n",
524 |             "            ReLU-151             [-1, 64, 8, 8]               0\n",
525 |             "          Conv2d-152             [-1, 64, 8, 8]          36,864\n",
526 |             "     BatchNorm2d-153             [-1, 64, 8, 8]             128\n",
527 |             "  BasicConvBlock-154             [-1, 64, 8, 8]               0\n",
528 |             "          Conv2d-155             [-1, 64, 8, 8]          36,864\n",
529 |             "     BatchNorm2d-156             [-1, 64, 8, 8]             128\n",
530 |             "            ReLU-157             [-1, 64, 8, 8]               0\n",
531 |             "          Conv2d-158             [-1, 64, 8, 8]          36,864\n",
532 |             "     BatchNorm2d-159             [-1, 64, 8, 8]             128\n",
533 |             "  BasicConvBlock-160             [-1, 64, 8, 8]               0\n",
534 |             "          Conv2d-161             [-1, 64, 8, 8]          36,864\n",
535 |             "     BatchNorm2d-162             [-1, 64, 8, 8]             128\n",
536 |             "            ReLU-163             [-1, 64, 8, 8]               0\n",
537 |             "          Conv2d-164             [-1, 64, 8, 8]          36,864\n",
538 |             "     BatchNorm2d-165             [-1, 64, 8, 8]             128\n",
539 |             "  BasicConvBlock-166             [-1, 64, 8, 8]               0\n",
540 |             "AdaptiveAvgPool2d-167             [-1, 64, 1, 1]               0\n",
541 |             "          Linear-168                   [-1, 10]             650\n",
542 |             "================================================================\n",
543 |             "Total params: 853,018\n",
544 |             "Trainable params: 853,018\n",
545 |             "Non-trainable params: 0\n",
546 |             "----------------------------------------------------------------\n",
547 |             "Input size (MB): 0.01\n",
548 |             "Forward/backward pass size (MB): 12.16\n",
549 |             "Params size (MB): 3.25\n",
550 |             "Estimated Total Size (MB): 15.42\n",
551 |             "----------------------------------------------------------------\n"
552 |           ]
553 |         }
554 |       ],
555 |       "source": [
556 |         "model = ResNet56()\n",
557 |         "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
558 |         "# device = 'cpu'\n",
559 |         "model.to(device)\n",
560 |         "summary(model, (3, 32, 32))"
561 |       ]
562 |     },
563 |     {
564 |       "cell_type": "markdown",
565 |       "metadata": {
566 |         "id": "QjPLm6BENFbq"
567 |       },
568 |       "source": [
569 |         "## Loading CIFAR-10 Dataset"
570 |       ]
571 |     },
572 |     {
573 |       "cell_type": "code",
574 |       "execution_count": 8,
575 |       "metadata": {
576 |         "id": "Us3eora2NFbs"
577 |       },
578 |       "outputs": [],
579 |       "source": [
580 |         "def dataloader_cifar():\n",
581 |         "    transform = transforms.Compose([transforms.ToTensor(),\n",
582 |         "                                    transforms.Normalize(mean=[0.5], std=[0.5])])\n",
583 |         "            \n",
584 |         "    # Input Data in Local Machine\n",
585 |         "    # train_dataset = datasets.CIFAR10('../input_data', train=True, download=True, transform=transform)\n",
586 |         "    # test_dataset = datasets.CIFAR10('../input_data', train=False, download=True, transform=transform)\n",
587 |         "    \n",
588 |         "    # Input Data in Google Drive\n",
589 |         "    train_dataset = datasets.CIFAR10('/content/drive/MyDrive/All_Datasets/CIFAR10', train=True, download=True, transform=transform)\n",
590 |         "    test_dataset = datasets.CIFAR10('/content/drive/MyDrive/All_Datasets/CIFAR10', train=False, download=True, transform=transform)\n",
591 |         "\n",
592 |         "    # Split dataset into training set and validation set.\n",
593 |         "    train_dataset, val_dataset = random_split(train_dataset, (45000, 5000))\n",
594 |         "    \n",
595 |         "    print(\"Image shape of a random sample image : {}\".format(train_dataset[0][0].numpy().shape), end = '\\n\\n')\n",
596 |         "    \n",
597 |         "    print(\"Training Set:   {} images\".format(len(train_dataset)))\n",
598 |         "    print(\"Validation Set:   {} images\".format(len(val_dataset)))\n",
599 |         "    print(\"Test Set:       {} images\".format(len(test_dataset)))\n",
600 |         "    \n",
601 |         "    BATCH_SIZE = 32\n",
602 |         "\n",
603 |         "    # Generate dataloader\n",
604 |         "    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)\n",
605 |         "    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)\n",
606 |         "    test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=True)\n",
607 |         "    \n",
608 |         "    return train_loader, val_loader, test_loader"
609 |       ]
610 |     },
611 |     {
612 |       "cell_type": "code",
613 |       "execution_count": 9,
614 |       "metadata": {
615 |         "colab": {
616 |           "base_uri": "https://localhost:8080/"
617 |         },
618 |         "id": "4E--f56MNFbs",
619 |         "outputId": "771fcf9c-e61d-4613-cab6-0e1a97844207"
620 |       },
621 |       "outputs": [
622 |         {
623 |           "name": "stdout",
624 |           "output_type": "stream",
625 |           "text": [
626 |             "Files already downloaded and verified\n",
627 |             "Files already downloaded and verified\n",
628 |             "Image shape of a random sample image : (3, 32, 32)\n",
629 |             "\n",
630 |             "Training Set:   45000 images\n",
631 |             "Validation Set:   5000 images\n",
632 |             "Test Set:       10000 images\n"
633 |           ]
634 |         }
635 |       ],
636 |       "source": [
637 |         "train_loader, val_loader, test_loader = dataloader_cifar()"
638 |       ]
639 |     },
640 |     {
641 |       "cell_type": "markdown",
642 |       "metadata": {
643 |         "id": "tOv_8vpcNFbt"
644 |       },
645 |       "source": [
646 |         "## Start Actual Training"
647 |       ]
648 |     },
649 |     {
650 |       "cell_type": "code",
651 |       "execution_count": 10,
652 |       "metadata": {
653 |         "id": "ICxcakbK8DRP"
654 |       },
655 |       "outputs": [],
656 |       "source": [
657 |         "criterion = nn.CrossEntropyLoss()\n",
658 |         "optimizer = optim.Adam(model.parameters(), lr=0.01)"
659 |       ]
660 |     },
661 |     {
662 |       "cell_type": "code",
663 |       "execution_count": 15,
664 |       "metadata": {
665 |         "id": "sB_vMJgl8Ei5"
666 |       },
667 |       "outputs": [],
668 |       "source": [
669 |         "def train_model():\n",
670 |         "    EPOCHS = 15\n",
671 |         "    train_samples_num = 45000\n",
672 |         "    val_samples_num = 5000\n",
673 |         "    train_costs, val_costs = [], []\n",
674 |         "    \n",
675 |         "    #Training phase.    \n",
676 |         "    for epoch in range(EPOCHS):\n",
677 |         "\n",
678 |         "        train_running_loss = 0\n",
679 |         "        correct_train = 0\n",
680 |         "        \n",
681 |         "        model.train().cuda()\n",
682 |         "        \n",
683 |         "        for inputs, labels in train_loader:\n",
684 |         "            inputs, labels = inputs.to(device), labels.to(device)\n",
685 |         "            \n",
686 |         "            \"\"\" for every mini-batch during the training phase, we typically want to explicitly set the gradients \n",
687 |         "            to zero before starting to do backpropragation \"\"\"\n",
688 |         "            optimizer.zero_grad()\n",
689 |         "            \n",
690 |         "            # Start the forward pass\n",
691 |         "            prediction = model(inputs)\n",
692 |         "                        \n",
693 |         "            loss = criterion(prediction, labels)\n",
694 |         "          \n",
695 |         "            # do backpropagation and update weights with step()\n",
696 |         "            loss.backward()         \n",
697 |         "            optimizer.step()\n",
698 |         "            \n",
699 |         "            # print('outputs on which to apply torch.max ', prediction)\n",
700 |         "            # find the maximum along the rows, use dim=1 to torch.max()\n",
701 |         "            _, predicted_outputs = torch.max(prediction.data, 1)\n",
702 |         "            \n",
703 |         "            # Update the running corrects \n",
704 |         "            correct_train += (predicted_outputs == labels).float().sum().item()\n",
705 |         "            \n",
706 |         "            ''' Compute batch loss\n",
707 |         "            multiply each average batch loss with batch-length. \n",
708 |         "            The batch-length is inputs.size(0) which gives the number total images in each batch. \n",
709 |         "            Essentially I am un-averaging the previously calculated Loss '''\n",
710 |         "            train_running_loss += (loss.data.item() * inputs.shape[0])\n",
711 |         "\n",
712 |         "\n",
713 |         "        train_epoch_loss = train_running_loss / train_samples_num\n",
714 |         "        \n",
715 |         "        train_costs.append(train_epoch_loss)\n",
716 |         "        \n",
717 |         "        train_acc =  correct_train / train_samples_num\n",
718 |         "\n",
719 |         "        # Now check trained weights on the validation set\n",
720 |         "        val_running_loss = 0\n",
721 |         "        correct_val = 0\n",
722 |         "      \n",
723 |         "        model.eval().cuda()\n",
724 |         "    \n",
725 |         "        with torch.no_grad():\n",
726 |         "            for inputs, labels in val_loader:\n",
727 |         "                inputs, labels = inputs.to(device), labels.to(device)\n",
728 |         "\n",
729 |         "                # Forward pass.\n",
730 |         "                prediction = model(inputs)\n",
731 |         "\n",
732 |         "                # Compute the loss.\n",
733 |         "                loss = criterion(prediction, labels)\n",
734 |         "\n",
735 |         "                # Compute validation accuracy.\n",
736 |         "                _, predicted_outputs = torch.max(prediction.data, 1)\n",
737 |         "                correct_val += (predicted_outputs == labels).float().sum().item()\n",
738 |         "\n",
739 |         "            # Compute batch loss.\n",
740 |         "            val_running_loss += (loss.data.item() * inputs.shape[0])\n",
741 |         "\n",
742 |         "            val_epoch_loss = val_running_loss / val_samples_num\n",
743 |         "            val_costs.append(val_epoch_loss)\n",
744 |         "            val_acc =  correct_val / val_samples_num\n",
745 |         "        \n",
746 |         "        info = \"[Epoch {}/{}]: train-loss = {:0.6f} | train-acc = {:0.3f} | val-loss = {:0.6f} | val-acc = {:0.3f}\"\n",
747 |         "        \n",
748 |         "        print(info.format(epoch+1, EPOCHS, train_epoch_loss, train_acc, val_epoch_loss, val_acc))\n",
749 |         "        \n",
750 |         "        torch.save(model.state_dict(), '/content/checkpoint_gpu_{}'.format(epoch + 1)) \n",
751 |         "                                                                \n",
752 |         "    torch.save(model.state_dict(), '/content/resnet-56_weights_gpu')  \n",
753 |         "        \n",
754 |         "    return train_costs, val_costs\n",
755 |         "\n",
756 |         "    "
757 |       ]
758 |     },
759 |     {
760 |       "cell_type": "code",
761 |       "execution_count": 16,
762 |       "metadata": {
763 |         "colab": {
764 |           "base_uri": "https://localhost:8080/"
765 |         },
766 |         "id": "JJaEBgbt9INy",
767 |         "outputId": "56270746-ac9e-4ffd-b481-b3d0b901ca82"
768 |       },
769 |       "outputs": [
770 |         {
771 |           "name": "stdout",
772 |           "output_type": "stream",
773 |           "text": [
774 |             "[Epoch 1/15]: train-loss = 0.876177 | train-acc = 0.692 | val-loss = 0.001271 | val-acc = 0.734\n",
775 |             "[Epoch 2/15]: train-loss = 0.694989 | train-acc = 0.759 | val-loss = 0.002828 | val-acc = 0.769\n",
776 |             "[Epoch 3/15]: train-loss = 0.580110 | train-acc = 0.800 | val-loss = 0.000333 | val-acc = 0.778\n",
777 |             "[Epoch 4/15]: train-loss = 0.492105 | train-acc = 0.829 | val-loss = 0.001201 | val-acc = 0.780\n",
778 |             "[Epoch 5/15]: train-loss = 0.416747 | train-acc = 0.854 | val-loss = 0.001592 | val-acc = 0.810\n",
779 |             "[Epoch 6/15]: train-loss = 0.351747 | train-acc = 0.877 | val-loss = 0.000720 | val-acc = 0.784\n",
780 |             "[Epoch 7/15]: train-loss = 0.297035 | train-acc = 0.896 | val-loss = 0.001334 | val-acc = 0.794\n",
781 |             "[Epoch 8/15]: train-loss = 0.248202 | train-acc = 0.912 | val-loss = 0.000497 | val-acc = 0.823\n",
782 |             "[Epoch 9/15]: train-loss = 0.206233 | train-acc = 0.927 | val-loss = 0.001534 | val-acc = 0.814\n",
783 |             "[Epoch 10/15]: train-loss = 0.169824 | train-acc = 0.940 | val-loss = 0.000273 | val-acc = 0.815\n",
784 |             "[Epoch 11/15]: train-loss = 0.146284 | train-acc = 0.948 | val-loss = 0.000277 | val-acc = 0.818\n",
785 |             "[Epoch 12/15]: train-loss = 0.126156 | train-acc = 0.955 | val-loss = 0.002989 | val-acc = 0.822\n",
786 |             "[Epoch 13/15]: train-loss = 0.111513 | train-acc = 0.961 | val-loss = 0.000481 | val-acc = 0.824\n",
787 |             "[Epoch 14/15]: train-loss = 0.109781 | train-acc = 0.962 | val-loss = 0.000927 | val-acc = 0.824\n",
788 |             "[Epoch 15/15]: train-loss = 0.091795 | train-acc = 0.968 | val-loss = 0.003517 | val-acc = 0.833\n"
789 |           ]
790 |         }
791 |       ],
792 |       "source": [
793 |         "# !pwd\n",
794 |         "train_costs, val_costs = train_model()"
795 |       ]
796 |     },
797 |     {
798 |       "cell_type": "code",
799 |       "execution_count": 17,
800 |       "metadata": {
801 |         "colab": {
802 |           "base_uri": "https://localhost:8080/"
803 |         },
804 |         "id": "kg45TKgb8N0k",
805 |         "outputId": "fb8457c3-aaf2-42d1-d456-454012473c58"
806 |       },
807 |       "outputs": [
808 |         {
809 |           "data": {
810 |             "text/plain": [
811 |               "<All keys matched successfully>"
812 |             ]
813 |           },
814 |           "execution_count": 17,
815 |           "metadata": {},
816 |           "output_type": "execute_result"
817 |         }
818 |       ],
819 |       "source": [
820 |         "#Restore the model.\n",
821 |         "model = ResNet56()\n",
822 |         "model.load_state_dict(torch.load('/content/resnet-56_weights_gpu'))"
823 |       ]
824 |     },
825 |     {
826 |       "cell_type": "markdown",
827 |       "metadata": {
828 |         "id": "AODk608HNFbv"
829 |       },
830 |       "source": [
831 |         "## Test the trained model on Test dataset"
832 |       ]
833 |     },
834 |     {
835 |       "cell_type": "code",
836 |       "execution_count": 18,
837 |       "metadata": {
838 |         "colab": {
839 |           "base_uri": "https://localhost:8080/"
840 |         },
841 |         "id": "MPbkor1g8Q3r",
842 |         "outputId": "66352854-1346-4280-d198-c31310685fc7"
843 |       },
844 |       "outputs": [
845 |         {
846 |           "name": "stdout",
847 |           "output_type": "stream",
848 |           "text": [
849 |             "Test accuracy: 0.8344\n"
850 |           ]
851 |         }
852 |       ],
853 |       "source": [
854 |         "test_samples_num = 10000\n",
855 |         "correct = 0 \n",
856 |         "\n",
857 |         "model.eval().cuda()\n",
858 |         "\n",
859 |         "with  torch.no_grad():\n",
860 |         "    for inputs, labels in test_loader:\n",
861 |         "        inputs, labels = inputs.to(device), labels.to(device)\n",
862 |         "        # Make predictions.\n",
863 |         "        prediction = model(inputs)\n",
864 |         "\n",
865 |         "        # Retrieve predictions indexes.\n",
866 |         "        _, predicted_class = torch.max(prediction.data, 1)\n",
867 |         "\n",
868 |         "        # Compute number of correct predictions.\n",
869 |         "        correct += (predicted_class == labels).float().sum().item()\n",
870 |         "\n",
871 |         "test_accuracy = correct / test_samples_num\n",
872 |         "print('Test accuracy: {}'.format(test_accuracy))"
873 |       ]
874 |     }
875 |   ],
876 |   "metadata": {
877 |     "accelerator": "GPU",
878 |     "colab": {
879 |       "collapsed_sections": [],
880 |       "name": "ResNet_MyWork.ipynb",
881 |       "provenance": []
882 |     },
883 |     "interpreter": {
884 |       "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930"
885 |     },
886 |     "kernelspec": {
887 |       "display_name": "Python 3.9.10 64-bit",
888 |       "language": "python",
889 |       "name": "python3"
890 |     },
891 |     "language_info": {
892 |       "codemirror_mode": {
893 |         "name": "ipython",
894 |         "version": 3
895 |       },
896 |       "file_extension": ".py",
897 |       "mimetype": "text/x-python",
898 |       "name": "python",
899 |       "nbconvert_exporter": "python",
900 |       "pygments_lexer": "ipython3",
901 |       "version": "3.9.10"
902 |     },
903 |     "orig_nbformat": 4
904 |   },
905 |   "nbformat": 4,
906 |   "nbformat_minor": 0
907 | }
908 | 


--------------------------------------------------------------------------------
/Unet_Brain_segmentation_unet_with_keras/unet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import pandas as pd
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | plt.style.use("ggplot")
  8 | # %matplotlib inline
  9 | 
 10 | import cv2
 11 | from tqdm import tqdm_notebook, tnrange
 12 | from glob import glob
 13 | from itertools import chain
 14 | from skimage.io import imread, imshow, concatenate_images
 15 | from skimage.transform import resize
 16 | from skimage.morphology import label
 17 | from sklearn.model_selection import train_test_split
 18 | 
 19 | import tensorflow as tf
 20 | from skimage.color import rgb2gray
 21 | from tensorflow.keras import Input
 22 | from tensorflow.keras.models import Model, load_model, save_model
 23 | from tensorflow.keras.layers import (
 24 |     Input,
 25 |     Activation,
 26 |     BatchNormalization,
 27 |     Dropout,
 28 |     Lambda,
 29 |     Conv2D,
 30 |     Conv2DTranspose,
 31 |     MaxPooling2D,
 32 |     concatenate,
 33 | )
 34 | from tensorflow.keras.optimizers import Adam
 35 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
 36 | 
 37 | from tensorflow.keras import backend as K
 38 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
 39 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
 40 | 
 41 | 
 42 | def unet(input_size=(256, 256, 3)):
 43 |     inputs = Input(input_size)
 44 | 
 45 |     # First DownConvolution / Encoder Leg will begin, so start with Conv2D
 46 |     conv1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(inputs)
 47 |     bn1 = Activation("relu")(conv1)
 48 |     conv1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(bn1)
 49 |     bn1 = BatchNormalization(axis=3)(conv1)
 50 |     bn1 = Activation("relu")(bn1)
 51 |     pool1 = MaxPooling2D(pool_size=(2, 2))(bn1)
 52 | 
 53 |     conv2 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(pool1)
 54 |     bn2 = Activation("relu")(conv2)
 55 |     conv2 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(bn2)
 56 |     bn2 = BatchNormalization(axis=3)(conv2)
 57 |     bn2 = Activation("relu")(bn2)
 58 |     pool2 = MaxPooling2D(pool_size=(2, 2))(bn2)
 59 | 
 60 |     conv3 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(pool2)
 61 |     bn3 = Activation("relu")(conv3)
 62 |     conv3 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(bn3)
 63 |     bn3 = BatchNormalization(axis=3)(conv3)
 64 |     bn3 = Activation("relu")(bn3)
 65 |     pool3 = MaxPooling2D(pool_size=(2, 2))(bn3)
 66 | 
 67 |     conv4 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(pool3)
 68 |     bn4 = Activation("relu")(conv4)
 69 |     conv4 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(bn4)
 70 |     bn4 = BatchNormalization(axis=3)(conv4)
 71 |     bn4 = Activation("relu")(bn4)
 72 |     pool4 = MaxPooling2D(pool_size=(2, 2))(bn4)
 73 | 
 74 |     conv5 = Conv2D(filters=1024, kernel_size=(3, 3), padding="same")(pool4)
 75 |     bn5 = Activation("relu")(conv5)
 76 |     conv5 = Conv2D(filters=1024, kernel_size=(3, 3), padding="same")(bn5)
 77 |     bn5 = BatchNormalization(axis=3)(conv5)
 78 |     bn5 = Activation("relu")(bn5)
 79 | 
 80 |     """ Now UpConvolution / Decoder Leg will begin, so start with Conv2DTranspose
 81 |     The gray arrows (in the above image) indicate the skip connections that concatenate the encoder feature map with the decoder, which helps the backward flow of gradients for improved training. """
 82 |     up6 = concatenate(
 83 |         [
 84 |             Conv2DTranspose(512, kernel_size=(2, 2), strides=(2, 2), padding="same")(
 85 |                 bn5
 86 |             ),
 87 |             conv4,
 88 |         ],
 89 |         axis=3,
 90 |     )
 91 |     """ After every concatenation we again apply two consecutive regular convolutions so that the model can learn to assemble a more precise output """
 92 |     conv6 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(up6)
 93 |     bn6 = Activation("relu")(conv6)
 94 |     conv6 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(bn6)
 95 |     bn6 = BatchNormalization(axis=3)(conv6)
 96 |     bn6 = Activation("relu")(bn6)
 97 | 
 98 |     up7 = concatenate(
 99 |         [
100 |             Conv2DTranspose(256, kernel_size=(2, 2), strides=(2, 2), padding="same")(
101 |                 bn6
102 |             ),
103 |             conv3,
104 |         ],
105 |         axis=3,
106 |     )
107 |     conv7 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(up7)
108 |     bn7 = Activation("relu")(conv7)
109 |     conv7 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(bn7)
110 |     bn7 = BatchNormalization(axis=3)(conv7)
111 |     bn7 = Activation("relu")(bn7)
112 | 
113 |     up8 = concatenate(
114 |         [
115 |             Conv2DTranspose(128, kernel_size=(2, 2), strides=(2, 2), padding="same")(
116 |                 bn7
117 |             ),
118 |             conv2,
119 |         ],
120 |         axis=3,
121 |     )
122 |     conv8 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(up8)
123 |     bn8 = Activation("relu")(conv8)
124 |     conv8 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(bn8)
125 |     bn8 = BatchNormalization(axis=3)(conv8)
126 |     bn8 = Activation("relu")(bn8)
127 | 
128 |     up9 = concatenate(
129 |         [
130 |             Conv2DTranspose(64, kernel_size=(2, 2), strides=(2, 2), padding="same")(
131 |                 bn8
132 |             ),
133 |             conv1,
134 |         ],
135 |         axis=3,
136 |     )
137 |     conv9 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(up9)
138 |     bn9 = Activation("relu")(conv9)
139 |     conv9 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(bn9)
140 |     bn9 = BatchNormalization(axis=3)(conv9)
141 |     bn9 = Activation("relu")(bn9)
142 | 
143 |     conv10 = Conv2D(filters=1, kernel_size=(1, 1), activation="sigmoid")(bn9)
144 | 
145 |     return Model(inputs=[inputs], outputs=[conv10])
146 | 


--------------------------------------------------------------------------------
/Unet_Brain_segmentation_unet_with_keras/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import pandas as pd
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | import cv2
 7 | from tensorflow.keras import backend as K
 8 | 
 9 | plt.style.use("ggplot")
10 | 
11 | 
12 | def plot_from_img_path(rows, columns, list_img_path, list_mask_path):
13 |     fig = plt.figure(figsize=(12, 12))
14 |     for i in range(1, rows * columns + 1):
15 |         fig.add_subplot(rows, columns, i)
16 |         img_path = list_img_path[i]
17 |         mask_path = list_mask_path[i]
18 |         image = cv2.imread(img_path)
19 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
20 |         mask = cv2.imread(mask_path)
21 |         plt.imshow(image)
22 |         plt.imshow(mask, alpha=0.4)
23 |     plt.show()
24 | 
25 | 
26 | def dice_coefficients(y_true, y_pred, smooth=100):
27 |     y_true_flatten = K.flatten(y_true)
28 |     y_pred_flatten = K.flatten(y_pred)
29 | 
30 |     intersection = K.sum(y_true_flatten * y_pred_flatten)
31 |     union = K.sum(y_true_flatten) + K.sum(y_pred_flatten)
32 |     return (2 * intersection + smooth) / (union + smooth)
33 | 
34 | 
35 | def dice_coefficients_loss(y_true, y_pred, smooth=100):
36 |     return -dice_coefficients(y_true, y_pred, smooth)
37 | 
38 | 
39 | def iou(y_true, y_pred, smooth=100):
40 |     intersection = K.sum(y_true * y_pred)
41 |     sum = K.sum(y_true + y_pred)
42 |     iou = (intersection + smooth) / (sum - intersection + smooth)
43 |     return iou
44 | 
45 | 
46 | def jaccard_distance(y_true, y_pred):
47 |     y_true_flatten = K.flatten(y_true)
48 |     y_pred_flatten = K.flatten(y_pred)
49 |     return -iou(y_true_flatten, y_pred_flatten)
50 | 


--------------------------------------------------------------------------------
/WGAN_Pytorch_From_Scratch_Full_Notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "## What Is a Wasserstein GAN?\n",
  8 |         "\n",
  9 |         "It is an extension of the GAN that seeks an alternate way of training the generator model to better approximate the distribution of data observed in a given training dataset.\n",
 10 |         "\n",
 11 |         "### Wasserstein GAN, or WGAN, is a type of generative adversarial network that minimizes an approximation of the Earth-Mover's distance (EM) rather than the Jensen-Shannon divergence as in the original GAN formulation.\n",
 12 |         "\n",
 13 |         "#### Here in WGAN, the discriminator does not actually classify instances. Rather here for each instance the Discriminator outputs a number. This number does not have to be less than one or greater than 0, so we can't use 0.5 as a threshold to decide whether an instance is real or fake. Discriminator training just tries to make the output bigger for real instances than for fake instances.\n",
 14 |         "\n",
 15 |         "Instead of using a discriminator to classify or predict the probability of generated images as being real or fake, the WGAN changes or replaces the discriminator model with a critic that scores the realness or fakeness of a given image.\n",
 16 |         "\n",
 17 |         "This change is motivated by a mathematical argument that training the generator should seek a minimization of the distance between the distribution of the data observed in the training dataset and the distribution observed in generated examples. The argument contrasts different distribution distance measures, such as Kullback-Leibler (KL) divergence, Jensen-Shannon (JS) divergence, and the Earth-Mover (EM) distance, referred to as Wasserstein distance.\n",
 18 |         "\n",
 19 |         "---\n",
 20 |         "\n",
 21 |         "\n",
 22 |         "The idea for the working of WGANs is to utilize two probability distributions. One is the probability distribution of the generator (Pg), which refers to the distribution from the output of the generator model.\n",
 23 |         "\n",
 24 |         "The other is the probability distribution from the real images (Pr).\n",
 25 |         "\n",
 26 |         "And the objective of WGAN is to ensure that both these probability distributions are close to each other so that the output generated is highly realistic and high-quality.\n",
 27 |         "\n",
 28 |         "For calculating the distance of these probability distributions, mathematical statistics in machine learning proposes three primary methods, namely\n",
 29 |         "\n",
 30 |         "- Kullback–Leibler divergence,\n",
 31 |         "- Jensen–Shannon divergence, and\n",
 32 |         "- Wasserstein distance.\n",
 33 |         "\n",
 34 |         "The Jensen–Shannon divergence (also a typical GAN loss) is the more utilized mechanism in simple GAN networks.\n",
 35 |         "\n",
 36 |         "#### But in WGAN, we use the Wasserstein distance (a.k.a Earth Mover’s Distance) instead of Jensen-Shannon Divergence to compare probability distributions.\n",
 37 |         "\n",
 38 |         "**The benefit of the WGAN is that the training process is more stable and less sensitive to model architecture and choice of hyperparameter configurations.**\n",
 39 |         "\n",
 40 |         "---\n",
 41 |         "\n",
 42 |         "## Compared to the original GAN algorithm, the WGAN undertakes the following changes:\n",
 43 |         "\n",
 44 |         "* After every gradient update on the critic function, clamp the weights to a small fixed range, [-c, c].\n",
 45 |         "\n",
 46 |         "* Use a new loss function derived from the Wasserstein distance, no logarithm anymore. The “discriminator” model does not play as a direct critic but a helper for estimating the Wasserstein metric between real and generated data distribution.\n",
 47 |         "\n",
 48 |         "* Empirically the authors recommended RMSProp optimizer on the critic, rather than a momentum based optimizer such as Adam which could cause instability in the model training.\n",
 49 |         "\n",
 50 |         "---\n",
 51 |         "\n",
 52 |         "\n",
 53 |         "## Key Points in WGAN\n",
 54 |         "\n",
 55 |         "![Imgur](https://imgur.com/cWROjs7.png)\n",
 56 |         "\n",
 57 |         "### 1. Critic Weight Clipping\n",
 58 |         "\n",
 59 |         "The critic F has to be a 1-Lipschitz function. To enforce the constraint, WGAN applies a very simple clipping to restrict the maximum weight value in F,\n",
 60 |         "\n",
 61 |         "i.e. the weights of the discriminator must be within a certain range controlled by the hyperparameters c\n",
 62 |         "\n",
 63 |         "### 2. Update Critic More Than Generator\n",
 64 |         "\n",
 65 |         "In the DCGAN, the generator and the discriminator model must be updated in equal amounts.\n",
 66 |         "\n",
 67 |         "Specifically, the discriminator is updated with a half batch of real and a half batch of fake samples each iteration, whereas the generator is updated with a single batch of generated samples.\n",
 68 |         "\n",
 69 |         "In the WGAN model, the critic model must be updated more than the generator model.\n",
 70 |         "\n",
 71 |         "Specifically, a new hyperparameter is defined to control the number of times that the critic is updated for each update to the generator model, called n_critic.\n",
 72 |         "\n",
 73 |         "### 3. Use RMSProp Stochastic Gradient Descent\n",
 74 |         "\n",
 75 |         "The DCGAN uses the Adam version of stochastic gradient descent with a small learning rate and modest momentum.\n",
 76 |         "\n",
 77 |         "The WGAN recommends the use of Root Mean Square Propagation or RMSProp instead (which is one of the Adaptive Learning Rate Gradient Descent), with a small learning rate of 0.00005.\n",
 78 |         "\n",
 79 |         "---\n",
 80 |         "\n",
 81 |         "## The loss function for WGAN\n",
 82 |         "\n",
 83 |         "#### First, for a Normal GAN (e.g. DCGAN) the Loss definition is ;\n",
 84 |         "\n",
 85 |         "Critic Loss: D(x) - D(G(z))\n",
 86 |         "\n",
 87 |         "Where,\n",
 88 |         "- D(x) is the discriminator's estimate of the probability that real data instance x is real.\n",
 89 |         "- G(z) is the generator's output when given noise z.\n",
 90 |         "- D(G(z)) is the discriminator's estimate of the probability that a fake instance is real.\n",
 91 |         "\n",
 92 |         "\n",
 93 |         "#### Now for WGAN the Loss is defined as:\n",
 94 |         "\n",
 95 |         "#### Critic Loss = [average critic score on real images] – [average critic score on fake images]\n",
 96 |         "\n",
 97 |         "Critic Loss: D(x) - D(G(z))\n",
 98 |         "\n",
 99 |         "In WGAN, the Discriminator, does not produce a Probability, rather it produces a pure score.\n",
100 |         "\n",
101 |         "Where,\n",
102 |         "    - D(x) is the critic's output for a real instance.\n",
103 |         "    - G(z) is the generator's output when given noise z.\n",
104 |         "    - D(G(z)) is the critic's output for a fake instance.\n",
105 |         "\n",
106 |         "The output of critic D does not have to be between 1 and 0.\n",
107 |         "\n",
108 |         "- The discriminator tries to maximize this function. In other words, it tries to maximize the difference between its output on real instances and its output on fake instances.\n",
109 |         "\n",
110 |         "- So, when compared to the Normal GAN's Discriminator, the Discriminator in WGAN, we do NOT classify or predict the probability of generated images as being real or fake. Instead, the WGAN replaces the discriminator model with a critic that scores the realness or fakeness of a given image.\n",
111 |         "\n",
112 |         "- It does this by removing the last Sigmoid() layer and have a linear layer at the end of the discriminator’s neural network.\n",
113 |         "\n",
114 |         "#### Generator Loss = -[average critic score on fake images]\n",
115 |         "\n",
116 |         "Generator Loss: D(G(z))\n",
117 |         "\n",
118 |         "The generator tries to maximize this function. In other words, It tries to maximize the discriminator's output for its fake instances. In these functions:\n",
119 |         "\n",
120 |         "\n",
121 |         "\n",
122 |         "---\n",
123 |         "\n",
124 |         "### Implementing Wasserstein Loss\n",
125 |         "\n",
126 |         "1.  Use a linear activation function in the output layer of the critic model (instead of sigmoid).\n",
127 |         "\n",
128 |         "2. Use Wasserstein loss to train the critic and generator models that promote larger difference between scores for real and generated images.\n",
129 |         "\n",
130 |         "3. Constrain critic model weights to a limited range after each mini batch update (e.g. [-0.01,0.01]).\n",
131 |         "\n",
132 |         "4. In order to have parameters w lie in a compact space, something simple we can do is clamp the weights to a fixed box (say W = [−0.01, 0.01]l ) after each gradient update.\n",
133 |         "\n",
134 |         "\n",
135 |         "We can summarize the function as it is described in the paper as follows:\n",
136 |         "\n",
137 |         "\n",
138 |         "#### Critic Loss = [average critic score on real images] – [average critic score on fake images]\n",
139 |         "\n",
140 |         "#### Generator Loss = -[average critic score on fake images]\n",
141 |         "\n",
142 |         "Where the average scores are calculated across a mini-batch of samples.\n",
143 |         "\n",
144 |         "The calculations are straightforward to interpret once we recall that stochastic gradient descent seeks to minimize loss.\n",
145 |         "\n",
146 |         "#### In the case of the generator, a larger score from the critic will result in a smaller loss for the generator, encouraging the critic to output larger scores for fake images. For example, an average score of 10 becomes -10, an average score of 50 becomes -50, which is smaller, and so on.\n",
147 |         "\n",
148 |         "#### In the case of the critic, a larger score for real images results in a larger resulting loss for the critic, penalizing the model. This encourages the critic to output smaller scores for real images. For example, an average score of 20 for real images and 50 for fake images results in a loss of -30; an average score of 10 for real images and 50 for fake images results in a loss of -40, which is better, and so on.\n",
149 |         "\n",
150 |         "#### The sign of the loss does not matter in this case, as long as loss for real images is a small number and the loss for fake images is a large number. The Wasserstein loss encourages the critic to separate these numbers.\n",
151 |         "\n",
152 |         "#### We can also reverse the situation and encourage the critic to output a large score for real images and a small score for fake images and achieve the same result.\n",
153 |         "\n",
154 |         "---\n",
155 |         "\n",
156 |         "### Main Equation\n",
157 |         "\n",
158 |         "The network uses Earth Mover’s Distance instead of Jensen-Shannon Divergence to compare probability distributions.\n",
159 |         "\n",
160 |         "![Imgur](https://imgur.com/EJg4nHM.png)\n",
161 |         "\n",
162 |         "In the above equation, the max value represents the constraint on the discriminator. In the WGAN architecture, the discriminator is referred to as the critic. One of the reasons for this convention is that there is no sigmoid activation function to limit the values to 0 or 1, which means real or fake. So the discriminator in WGAN, outputs a scalar score rather than a probability.\n",
163 |         "\n",
164 |         "The first part of the equation represents the real data, while the second half represents the generator data. The discriminator (or the critic) in the above equation aims to maximize the distance between the real data and the generated data, because it wants to be able to successfully distinguish the data accordingly.\n",
165 |         "\n",
166 |         "The generator network aims to minimize the distance between the real data and generated data because it wants the generated data to be as real as possible.\n",
167 |         "\n",
168 |         "---\n",
169 |         "\n",
170 |         "## Jensen Shannon Divergence (JSD)\n",
171 |         "\n",
172 |         "\n",
173 |         "The objective function of our original GAN is essentially the minimization of something called the Jensen Shannon Divergence (JSD). Specifically it is:\n",
174 |         "\n",
175 |         "![Imgur](https://imgur.com/kYc2Cfv.png)\n",
176 |         "\n",
177 |         "---\n",
178 |         "\n",
179 |         "Sadly, Wasserstein GAN is not perfect. Even the authors of the original WGAN paper mentioned that “Weight clipping is a clearly terrible way to enforce a Lipschitz constraint” (Oops!). WGAN still suffers from unstable training, slow convergence after weight clipping (when clipping window is too large), and vanishing gradients (when clipping window is too small).\n",
180 |         "\n",
181 |         "Some improvement, precisely replacing weight clipping with gradient penalty is one of the most prominent solution that has been proposed."
182 |       ]
183 |     },
184 |     {
185 |       "cell_type": "markdown",
186 |       "metadata": {
187 |         "id": "7A1WwPqxmfrE"
188 |       },
189 |       "source": [
190 |         "## Implementation from scratch"
191 |       ]
192 |     },
193 |     {
194 |       "cell_type": "code",
195 |       "execution_count": 6,
196 |       "metadata": {
197 |         "colab": {
198 |           "base_uri": "https://localhost:8080/"
199 |         },
200 |         "id": "lzolhsHfVzWO",
201 |         "outputId": "0d3b2a3f-d1d4-45bc-c091-808931832607",
202 |         "vscode": {
203 |           "languageId": "python"
204 |         }
205 |       },
206 |       "outputs": [
207 |         {
208 |           "name": "stdout",
209 |           "output_type": "stream",
210 |           "text": [
211 |             "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
212 |           ]
213 |         }
214 |       ],
215 |       "source": [
216 |         "from google.colab import drive\n",
217 |         "drive.mount('/content/drive')"
218 |       ]
219 |     },
220 |     {
221 |       "cell_type": "code",
222 |       "execution_count": 7,
223 |       "metadata": {
224 |         "id": "7Aa7VNjLmaw9",
225 |         "vscode": {
226 |           "languageId": "python"
227 |         }
228 |       },
229 |       "outputs": [],
230 |       "source": [
231 |         "import os\n",
232 |         "\n",
233 |         "import torchvision.transforms as transforms\n",
234 |         "from torchvision.utils import make_grid\n",
235 |         "\n",
236 |         "from torch.utils.data import DataLoader\n",
237 |         "from torchvision import datasets\n",
238 |         "from torch.autograd import Variable\n",
239 |         "\n",
240 |         "import torch.nn as nn\n",
241 |         "import torch.nn.functional as F\n",
242 |         "import torch\n",
243 |         "\n",
244 |         "import numpy as np\n",
245 |         "import matplotlib.pyplot as plt\n",
246 |         "from matplotlib.pyplot import figure\n",
247 |         "\n",
248 |         "from tqdm import tqdm \n",
249 |         "\n",
250 |         "plt.ion()\n",
251 |         "from IPython.display import clear_output"
252 |       ]
253 |     },
254 |     {
255 |       "cell_type": "code",
256 |       "execution_count": 8,
257 |       "metadata": {
258 |         "colab": {
259 |           "base_uri": "https://localhost:8080/"
260 |         },
261 |         "id": "lQ9BWJV3WYUt",
262 |         "outputId": "6018a674-99da-4eea-f989-9d4e5a753762",
263 |         "vscode": {
264 |           "languageId": "python"
265 |         }
266 |       },
267 |       "outputs": [
268 |         {
269 |           "name": "stdout",
270 |           "output_type": "stream",
271 |           "text": [
272 |             "Tue Mar 22 20:37:53 2022       \n",
273 |             "+-----------------------------------------------------------------------------+\n",
274 |             "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
275 |             "|-------------------------------+----------------------+----------------------+\n",
276 |             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
277 |             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
278 |             "|                               |                      |               MIG M. |\n",
279 |             "|===============================+======================+======================|\n",
280 |             "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
281 |             "| N/A   33C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |\n",
282 |             "|                               |                      |                  N/A |\n",
283 |             "+-------------------------------+----------------------+----------------------+\n",
284 |             "                                                                               \n",
285 |             "+-----------------------------------------------------------------------------+\n",
286 |             "| Processes:                                                                  |\n",
287 |             "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
288 |             "|        ID   ID                                                   Usage      |\n",
289 |             "|=============================================================================|\n",
290 |             "|  No running processes found                                                 |\n",
291 |             "+-----------------------------------------------------------------------------+\n"
292 |           ]
293 |         }
294 |       ],
295 |       "source": [
296 |         "!nvidia-smi"
297 |       ]
298 |     },
299 |     {
300 |       "cell_type": "markdown",
301 |       "metadata": {},
302 |       "source": [
303 |         "## HYPERPARAMETERS"
304 |       ]
305 |     },
306 |     {
307 |       "cell_type": "code",
308 |       "execution_count": null,
309 |       "metadata": {
310 |         "vscode": {
311 |           "languageId": "python"
312 |         }
313 |       },
314 |       "outputs": [],
315 |       "source": [
316 |         "class Hyperparameters(object):\n",
317 |         "      def __init__(self, **kwargs):\n",
318 |         "        self.__dict__.update(kwargs)\n",
319 |         "\n",
320 |         "hp = Hyperparameters(n_epochs=200,\n",
321 |         "                     batch_size=64,\n",
322 |         "                     lr=0.00005,                     \n",
323 |         "                     n_cpu=8,\n",
324 |         "                     latent_dim=100,\n",
325 |         "                     img_size=32,\n",
326 |         "                     channels=1,\n",
327 |         "                     n_critic=25,\n",
328 |         "                     clip_value=.005,\n",
329 |         "                     sample_interval=400)\n",
330 |         "\n",
331 |         "print(hp.lr)"
332 |       ]
333 |     },
334 |     {
335 |       "cell_type": "code",
336 |       "execution_count": 10,
337 |       "metadata": {
338 |         "id": "wQcu6fi-WPCF",
339 |         "vscode": {
340 |           "languageId": "python"
341 |         }
342 |       },
343 |       "outputs": [],
344 |       "source": [
345 |         "root_path = '/content/drive/MyDrive/All_Datasets/Fashion_MNIST'\n",
346 |         "''' The Fashion-MNIST dataset contains 60,000 training images (and 10,000 test images) of fashion and clothing items, taken from 10 classes. Each image is a standardized 28×28 size in grayscale (784 total pixels). '''\n",
347 |         "\n",
348 |         "dataloader = torch.utils.data.DataLoader(\n",
349 |         "  datasets.FashionMNIST(\n",
350 |         "    root_path,\n",
351 |         "    train=True,\n",
352 |         "    download=True,\n",
353 |         "    transform=transforms.Compose(\n",
354 |         "        [transforms.Resize(hp.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]\n",
355 |         "    ),\n",
356 |         "  ),\n",
357 |         "  batch_size=hp.batch_size,\n",
358 |         "  shuffle=True,\n",
359 |         ")"
360 |       ]
361 |     },
362 |     {
363 |       "cell_type": "markdown",
364 |       "metadata": {
365 |         "id": "yQ954M71otK9"
366 |       },
367 |       "source": [
368 |         "SETUP"
369 |       ]
370 |     },
371 |     {
372 |       "cell_type": "code",
373 |       "execution_count": 11,
374 |       "metadata": {
375 |         "id": "dbDFpxOdonQI",
376 |         "vscode": {
377 |           "languageId": "python"
378 |         }
379 |       },
380 |       "outputs": [],
381 |       "source": [
382 |         "# os.makedirs(\"images\", exist_ok=True)\n",
383 |         "img_shape = (hp.channels, hp.img_size, hp.img_size)\n",
384 |         "\n",
385 |         "cuda = True if torch.cuda.is_available() else False\n",
386 |         "\n",
387 |         "def weights_init_normal(m):\n",
388 |         "  classname = m.__class__.__name__\n",
389 |         "  if classname.find(\"Conv\") != -1:\n",
390 |         "    torch.nn.init.normal_(m.weight.data, 0.0, 0.02)\n",
391 |         "  elif classname.find(\"BatchNorm2d\") != -1:\n",
392 |         "    torch.nn.init.normal_(m.weight.data, 1.0, 0.02)\n",
393 |         "    torch.nn.init.constant_(m.bias.data, 0.0)\n",
394 |         "\n",
395 |         "def to_img(x):\n",
396 |         "  x = x.clamp(0, 1)\n",
397 |         "  return x\n",
398 |         "\n",
399 |         "def visualise_output(images, x, y):\n",
400 |         "  with torch.no_grad():  \n",
401 |         "    images = images.cpu()\n",
402 |         "    images = to_img(images)\n",
403 |         "    np_imagegrid = make_grid(images, x, y).numpy()\n",
404 |         "    figure(figsize=(20,20))\n",
405 |         "    plt.imshow(np.transpose(np_imagegrid, (1, 2, 0)))\n",
406 |         "    plt.show()"
407 |       ]
408 |     },
409 |     {
410 |       "cell_type": "markdown",
411 |       "metadata": {
412 |         "id": "xIXAbyxIo9r9"
413 |       },
414 |       "source": [
415 |         "## GENERATOR\n",
416 |         "\n",
417 |         "The generator model takes as input a point in the latent space and outputs a single 28×28 grayscale image.\n",
418 |         "\n",
419 |         "This is achieved by using a fully connected layer to interpret the point in the latent space. \n",
420 |         "\n",
421 |         "This is then upsampled couple of more times, doubling the size.\n",
422 |         "\n",
423 |         "### np.prod()\n",
424 |         "\n",
425 |         "Return the product of array elements over a given axis.\n",
426 |         "\n",
427 |         "If the input array is blank, then this method returns the neutral element: 1\n",
428 |         "\n",
429 |         "By default, the axis is set to None, thereby calculating the product of all the elements in the given array. "
430 |       ]
431 |     },
432 |     {
433 |       "cell_type": "code",
434 |       "execution_count": 12,
435 |       "metadata": {
436 |         "id": "bTfDyWs-o_cG",
437 |         "vscode": {
438 |           "languageId": "python"
439 |         }
440 |       },
441 |       "outputs": [],
442 |       "source": [
443 |         "class Generator(nn.Module):\n",
444 |         "    def __init__(self, img_shape, latent_dim):\n",
445 |         "        super(Generator, self).__init__()\n",
446 |         "\n",
447 |         "        def block(in_features, out_features, normalize=True):\n",
448 |         "            layers = [nn.Linear(in_features, out_features)]\n",
449 |         "            if normalize:\n",
450 |         "                layers.append(nn.BatchNorm1d(out_features, 0.8))\n",
451 |         "            layers.append(nn.LeakyReLU(0.2, inplace=True))\n",
452 |         "            return layers\n",
453 |         "\n",
454 |         "        self.model = nn.Sequential(\n",
455 |         "            *block(\n",
456 |         "                in_features=latent_dim, out_features=128, normalize=False\n",
457 |         "            ),  # Batch_size, 784 -> Batch_size, 128\n",
458 |         "            *block(\n",
459 |         "                in_features=128, out_features=256\n",
460 |         "            ),  # Batch_size, 128 -> Batch_size, 256\n",
461 |         "            *block(\n",
462 |         "                in_features=256, out_features=512\n",
463 |         "            ),  # Batch_size, 256 -> Batch_size, 512\n",
464 |         "            *block(\n",
465 |         "                in_features=512, out_features=1024\n",
466 |         "            ),  # Batch_size, 512 -> Batch_size, 1024\n",
467 |         "            nn.Linear(\n",
468 |         "                in_features=1024, out_features=int(np.prod(img_shape))\n",
469 |         "            ),  # Batch_size, 1024 -> Batch_size, np.prod(img_shape)\n",
470 |         "            nn.Tanh()\n",
471 |         "        )\n",
472 |         "\n",
473 |         "    def forward(self, img_shape, z):\n",
474 |         "        img = self.model(z)\n",
475 |         "        img = img.view(img.shape[0], *img_shape)\n",
476 |         "        return img"
477 |       ]
478 |     },
479 |     {
480 |       "cell_type": "markdown",
481 |       "metadata": {
482 |         "id": "P01xoE6FpL9-"
483 |       },
484 |       "source": [
485 |         "## DISCRIMINATOR\n",
486 |         "\n",
487 |         "One of the reasons for this convention is that there is no sigmoid activation function to limit the values to 0 or 1, which means real or fake. So the discriminator in WGAN, outputs a scalar score rather than a probability."
488 |       ]
489 |     },
490 |     {
491 |       "cell_type": "code",
492 |       "execution_count": 13,
493 |       "metadata": {
494 |         "id": "jYlPm9G8pN6u",
495 |         "vscode": {
496 |           "languageId": "python"
497 |         }
498 |       },
499 |       "outputs": [],
500 |       "source": [
501 |         "class Critic(nn.Module):\n",
502 |         "    def __init__(self, img_shape):\n",
503 |         "        super(Critic, self).__init__()\n",
504 |         "\n",
505 |         "        self.model = nn.Sequential(\n",
506 |         "            nn.Linear(\n",
507 |         "                in_features=int(np.prod(img_shape)), out_features=512\n",
508 |         "            ),  # Batch_size, np.prod(img_shape) -> Batch_size, 512\n",
509 |         "            nn.LeakyReLU(0.2, inplace=True),\n",
510 |         "            nn.Linear(\n",
511 |         "                in_features=512, out_features=256\n",
512 |         "            ),  # Batch_size, 512 -> Batch_size, 256\n",
513 |         "            nn.LeakyReLU(0.2, inplace=True),\n",
514 |         "            nn.Linear(\n",
515 |         "                in_features=256, out_features=1\n",
516 |         "            ),  # Batch_size, 256 -> Batch_size, 1\n",
517 |         "        )\n",
518 |         "\n",
519 |         "    def forward(self, img):\n",
520 |         "        img_flat = img.view(img.shape[0], -1)\n",
521 |         "        validity = self.model(img_flat)\n",
522 |         "        return validity"
523 |       ]
524 |     },
525 |     {
526 |       "cell_type": "markdown",
527 |       "metadata": {
528 |         "id": "0y-0F1URpfQd"
529 |       },
530 |       "source": [
531 |         "LOSS and MODELS"
532 |       ]
533 |     },
534 |     {
535 |       "cell_type": "code",
536 |       "execution_count": 14,
537 |       "metadata": {
538 |         "colab": {
539 |           "base_uri": "https://localhost:8080/"
540 |         },
541 |         "id": "7UPqaoCSphQ1",
542 |         "outputId": "dc9deab8-1c97-49c7-c4bb-d9a05ac7b52a",
543 |         "vscode": {
544 |           "languageId": "python"
545 |         }
546 |       },
547 |       "outputs": [
548 |         {
549 |           "data": {
550 |             "text/plain": [
551 |               "Critic(\n",
552 |               "  (model): Sequential(\n",
553 |               "    (0): Linear(in_features=1024, out_features=512, bias=True)\n",
554 |               "    (1): LeakyReLU(negative_slope=0.2, inplace=True)\n",
555 |               "    (2): Linear(in_features=512, out_features=256, bias=True)\n",
556 |               "    (3): LeakyReLU(negative_slope=0.2, inplace=True)\n",
557 |               "    (4): Linear(in_features=256, out_features=1, bias=True)\n",
558 |               "  )\n",
559 |               ")"
560 |             ]
561 |           },
562 |           "execution_count": 14,
563 |           "metadata": {},
564 |           "output_type": "execute_result"
565 |         }
566 |       ],
567 |       "source": [
568 |         "generator = Generator(img_shape, hp.latent_dim)\n",
569 |         "critic = Critic(img_shape)\n",
570 |         "\n",
571 |         "if cuda:\n",
572 |         "  generator.cuda()\n",
573 |         "  critic.cuda()  \n",
574 |         "\n",
575 |         "# Initialize weights\n",
576 |         "generator.apply(weights_init_normal)\n",
577 |         "critic.apply(weights_init_normal)"
578 |       ]
579 |     },
580 |     {
581 |       "cell_type": "markdown",
582 |       "metadata": {
583 |         "id": "Y0X-PJWYp6W9"
584 |       },
585 |       "source": [
586 |         "OPTIMIZERS and TENSOR SETUP"
587 |       ]
588 |     },
589 |     {
590 |       "cell_type": "code",
591 |       "execution_count": 15,
592 |       "metadata": {
593 |         "id": "ymTI34yKqA2u",
594 |         "vscode": {
595 |           "languageId": "python"
596 |         }
597 |       },
598 |       "outputs": [],
599 |       "source": [
600 |         "optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=hp.lr)\n",
601 |         "optimizer_D = torch.optim.RMSprop(critic.parameters(), lr=hp.lr)\n",
602 |         "\n",
603 |         "Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor"
604 |       ]
605 |     },
606 |     {
607 |       "cell_type": "markdown",
608 |       "metadata": {
609 |         "id": "Bf3lQ1-0qehW"
610 |       },
611 |       "source": [
612 |         "## TRAINING STEPS\n",
613 |         "\n",
614 |         "1. The critic network is first trained on a real batch of data, then trained on a batch of data generated from a noise-prior via the generator. \n",
615 |         "\n",
616 |         "2. The critic's loss function is arranged such that it estimates the Wasserstein Distance (maximizes the distance between the two distributions) then clips its own weights to ensure it is 1-Lipschitz-Continuous. \n",
617 |         "\n",
618 |         "3. Then, the generator generates a new batch of images from a noise prior, passes these through to the critic who then \"informs\" the generator of the Wasserstein-1 distance between the true distribution and the distribution of the images the Generator just created. \n",
619 |         "\n",
620 |         "4. It does this via the loss function of the critic. The critic's weights are frozen and the error propagates all the way back through to the generator who then updates its parameters to minimize the Wasserstein distance. \n",
621 |         "\n",
622 |         "5. This repeats until the loss (hopefully) converges to near zero and the distributions are approximately equal.\n",
623 |         "\n",
624 |         "6. The discriminator loss is (an approximation of) the negative Wasserstein distance between the generator distribution and the data distribution."
625 |       ]
626 |     },
627 |     {
628 |       "cell_type": "code",
629 |       "execution_count": null,
630 |       "metadata": {
631 |         "colab": {
632 |           "base_uri": "https://localhost:8080/",
633 |           "height": 1000
634 |         },
635 |         "id": "ipLMbwcxqgPm",
636 |         "outputId": "25fcb2db-224b-4c8f-d39c-b523685f2593",
637 |         "vscode": {
638 |           "languageId": "python"
639 |         }
640 |       },
641 |       "outputs": [],
642 |       "source": [
643 |         "for epoch in range(hp.n_epochs):\n",
644 |         "  for i, (imgs, _) in enumerate(dataloader):\n",
645 |         "\n",
646 |         "      # Adversarial ground truths\n",
647 |         "      valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)\n",
648 |         "      fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)\n",
649 |         "\n",
650 |         "      # Configure input\n",
651 |         "      real_imgs = Variable(imgs.type(Tensor))\n",
652 |         "\n",
653 |         "      # -----------------\n",
654 |         "      #  Train Critic\n",
655 |         "      # -----------------\n",
656 |         "\n",
657 |         "      optimizer_G.zero_grad()\n",
658 |         "\n",
659 |         "      # Sample noise as generator input\n",
660 |         "      # Draw random samples from a normal (Gaussian) distribution.\n",
661 |         "      # np.random.normal(mean, sd, Output shape)\n",
662 |         "      z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], hp.latent_dim))))\n",
663 |         "\n",
664 |         "      # Generate a batch of images\n",
665 |         "      fake_imgs = generator(z).detach()\n",
666 |         "\n",
667 |         "      ''' The math for the loss functions for the critic and generator is:\n",
668 |         "        Critic Loss: D(x) - D(G(z))\n",
669 |         "        Generator Loss: D(G(z))\n",
670 |         "        Now for the Critic Loss, as per the Paper, we have to maximize the expression.\n",
671 |         "        So, arithmetically, maximizing an expression, means minimizing the -ve of that expression\n",
672 |         "        i.e. -(D(x) - D(G(z))) which is -D(x) + D(G(z)) i.e. -D(real_imgs) + D(G(real_imgs))\n",
673 |         "     '''\n",
674 |         "      d_loss = -torch.mean(critic(real_imgs)) + torch.mean(critic(fake_imgs)) \n",
675 |         "\n",
676 |         "      d_loss.backward()\n",
677 |         "      optimizer_D.step()\n",
678 |         "\n",
679 |         "      \n",
680 |         "      ''' Clip weights of critic to avoid vanishing/exploding gradients in the \n",
681 |         "      critic/critic. \n",
682 |         "      In order to have parameters w lie in a compact space, something simple we can do is clamp the weights to a fixed box (say W = [-0.005, 0.005]l ) after each gradient update.\n",
683 |         "      \n",
684 |         "      torch.clamp() is used to clamp all the elements in an input into the range [min, max]. It takes three parameters: the input tensor, min, and max values. The values less than the min are replaced by the min and the values greater than the max are replaced by the max. If min is not given, then there is no lower bound. If max is not given, then there is no upper bound.  '''\n",
685 |         "      for p in critic.parameters():\n",
686 |         "        p.data.clamp_(-hp.clip_value, hp.clip_value)\n",
687 |         "\n",
688 |         "\n",
689 |         "      ''' Train the generator every n_critic iterations \n",
690 |         "      we need to increase training iterations of the critic so that it works to \n",
691 |         "      approximate the real distribution sooner.\n",
692 |         "      '''\n",
693 |         "      if i % hp.n_critic == 0:\n",
694 |         "        # ---------------------\n",
695 |         "        #  Train Generator\n",
696 |         "        # ---------------------\n",
697 |         "        optimizer_G.zero_grad()\n",
698 |         "\n",
699 |         "        # Generate a batch of images\n",
700 |         "        fake_images_from_generator = generator(z)\n",
701 |         "        # Adversarial loss\n",
702 |         "        g_loss = -torch.mean(critic(fake_images_from_generator))\n",
703 |         "\n",
704 |         "        g_loss.backward()\n",
705 |         "        optimizer_G.step()    \n",
706 |         "\n",
707 |         "      batches_done = epoch * len(dataloader) + i\n",
708 |         "      if batches_done % hp.sample_interval == 0:\n",
709 |         "        clear_output()\n",
710 |         "        print(f\"Epoch:{epoch}:It{i}:DLoss{d_loss.item()}:GLoss{g_loss.item()}\")          \n",
711 |         "        visualise_output(fake_images_from_generator.data[:50],10, 10)"
712 |       ]
713 |     }
714 |   ],
715 |   "metadata": {
716 |     "accelerator": "GPU",
717 |     "colab": {
718 |       "collapsed_sections": [],
719 |       "name": "GEN_4_WGAN.ipynb",
720 |       "provenance": []
721 |     },
722 |     "kernelspec": {
723 |       "display_name": "Python 3",
724 |       "name": "python3"
725 |     }
726 |   },
727 |   "nbformat": 4,
728 |   "nbformat_minor": 0
729 | }
730 | 


--------------------------------------------------------------------------------
/assets/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/2.png


--------------------------------------------------------------------------------
/assets/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/3.png


--------------------------------------------------------------------------------
/assets/Youtube_Cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/Youtube_Cover.jpg


--------------------------------------------------------------------------------
/assets/yt_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/yt_logo.png


--------------------------------------------------------------------------------