├── .gitignore ├── BiCycleGAN_Toward_Multimodal_Image-to-Image_Translation.ipynb ├── CycleGAN_From_Scratch_PyTorch_FINAL_Entire_NB.ipynb ├── DCGAN-from-Scratch-with-PyTorch ├── DCGAN_ONLY_train.ipynb ├── Readme.md ├── dcgan.py ├── train.py └── utils.py ├── DCGAN_Generator_Function_Understanding_Filter_Size_and_Input_Shape.ipynb ├── DCGAN_Tensorflow_Celeb_A_Dataset.ipynb ├── GoogLeNet_Incepton-v1-PyTorch.ipynb ├── LeNet5_PyTorch.ipynb ├── README.md ├── ResNet56_PyTorch.ipynb ├── Unet_Brain_segmentation_unet_with_keras ├── brain_segmentation_unet_with_keras.ipynb ├── unet.py └── utils.py ├── WGAN_Pytorch_From_Scratch_Full_Notebook.ipynb └── assets ├── 2.png ├── 3.png ├── Youtube_Cover.jpg └── yt_logo.png /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | # Below for all Kaggle and Other Dataset, as they will all be inside input 4 | # On 22-April, have put all the data inside input 5 | input/ 6 | *input 7 | /*input 8 | */*input 9 | **/*input 10 | /input 11 | /input/* 12 | */input/* 13 | 14 | 15 | .vscode/ 16 | *.vscode 17 | /*.vscode 18 | */*.vscode 19 | **/*.vscode 20 | /.vscode 21 | /.vscode/* 22 | */.vscode/* 23 | 24 | input_dataset/ 25 | *input_dataset 26 | /*input_dataset 27 | */*input_dataset 28 | **/*input_dataset 29 | /input_dataset 30 | /input_dataset/* 31 | */input_dataset/* 32 | 33 | 34 | *input_dataset_2 35 | /*input_dataset_2 36 | */*input_dataset_2 37 | **/*input_dataset_2 38 | /input_dataset_2 39 | /input_dataset_2/* 40 | */input_dataset_2/* 41 | 42 | # ABOVE ONLY FOR THIS REPO AS I KEPT ALL DATASET UNDER THE ABOVE FOLDER NAME 43 | 44 | node_modules/ 45 | Colab_Model_Download/ 46 | wandb/ 47 | mlruns 48 | # The above is the MOST EFFECTIVE ONE as per 49 | # https://stackoverflow.com/a/1470664/1902852 50 | # The way to ignore all directories called "node_modules" 51 | # anywhere below the current level in a directory tree 52 | 53 | # Further if I have already pushed a directory to remote then remove that with below 54 | # Execute a folder remove (rm) from index only (--cached) recursivelly (-r). Command line example for root bin folder: 55 | # `git rm -r --cached full_absolute_path` 56 | # e.g. I had to run the below kind of command to remove the ".next" folder that was pushed to remote github 57 | # passing the full path of the .next folder 58 | # git rm -r --cached /media/veracrypt2/014-agency-classic-next/.next 59 | 60 | 61 | # See https://help.github.com/ignore-files/ for more about ignoring files. 62 | 63 | HF_Models/ 64 | 65 | *HF_Models 66 | /*HF_Models 67 | */*HF_Models 68 | **/*HF_Models 69 | /HF_Models 70 | /HF_Models/* 71 | 72 | node_modules/ 73 | # The above is the MOST EFFECTIVE ONE as per 74 | # https://stackoverflow.com/a/1470664/1902852 75 | # The way to ignore all directories called "node_modules" 76 | # anywhere below the current level in a directory tree 77 | 78 | *node_modules 79 | /*node_modules 80 | */*node_modules 81 | **/*node_modules 82 | /node_modules 83 | /node_modules/* 84 | */node_modules/*in 85 | 86 | # If you have performed a task, such as adding a new line item to your .gitignore file, I MUST need to clear out your git repo's cache in order for the changes to take place. Here are the commands for doing that: 87 | 88 | # git rm -r --cached . && git add . && git commit -am 'git cache cleared' && git push 89 | 90 | __MACOSX/ 91 | *__MACOSX 92 | /*__MACOSX 93 | */*__MACOSX 94 | **/*__MACOSXc 95 | /__MACOSX 96 | /__MACOSX/* 97 | */__MACOSX/* 98 | 99 | __pycache__/ 100 | *__pycache__ 101 | /*__pycache__ 102 | */*__pycache__ 103 | **/*__pycache__c 104 | /__pycache__ 105 | /__pycache__/* 106 | */__pycache__/* 107 | 108 | .ipynb_checkpoints/ 109 | *.ipynb_checkpoints 110 | /*.ipynb_checkpoints 111 | */*.ipynb_checkpoints 112 | **/*.ipynb_checkpointsc 113 | /.ipynb_checkpoints 114 | /.ipynb_checkpoints/* 115 | */.ipynb_checkpoints/* 116 | 117 | -checkpoint.ipynb/ 118 | *-checkpoint.ipynb 119 | /*-checkpoint.ipynb 120 | */*-checkpoint.ipynb 121 | **/*-checkpoint.ipynb 122 | /-checkpoint.ipynb 123 | 124 | .h5/ 125 | *.h5 126 | /*.h5 127 | */*.h5 128 | **/*.h5 129 | /.h5 130 | 131 | .pyc/ 132 | *.pyc 133 | /*.pyc 134 | */*.pyc 135 | **/*.pyc 136 | /.pyc 137 | 138 | 139 | .bin/ 140 | *.bin 141 | /*.bin 142 | */*.bin 143 | **/*.bin 144 | /.bin 145 | 146 | .json/ 147 | *.json 148 | /*.json 149 | */*.json 150 | **/*.json 151 | /.json 152 | 153 | .next/ 154 | *.next 155 | /*.next 156 | */*.next 157 | **/*.next 158 | /.next 159 | /.next/* 160 | */.next/* 161 | 162 | 163 | .npy/ 164 | *.npy 165 | /*.npy 166 | */*.npy 167 | **/*.npy 168 | /.npy 169 | 170 | # testing 171 | coverage/ 172 | *coverage 173 | /*coverage 174 | */*coverage 175 | **/*coverage 176 | /coverage 177 | /coverage/* 178 | */coverage/* 179 | 180 | # production 181 | build/ 182 | *build 183 | /*build 184 | */*build 185 | **/*build 186 | /buildgs 187 | /build/* 188 | */build/* 189 | 190 | 191 | .db 192 | .db/ 193 | *.db 194 | /*.db 195 | */*.db 196 | **/*.db 197 | /.db 198 | 199 | 200 | .pkl 201 | .pkl/ 202 | *.pkl 203 | /*.pkl 204 | */*.pkl 205 | **/*.pkl 206 | /.pkl 207 | 208 | .hdf5 209 | .hdf5/ 210 | *.hdf5 211 | /*.hdf5 212 | */*.hdf5 213 | **/*.hdf5 214 | /.hdf5 215 | 216 | .pt 217 | .pt/ 218 | *.pt 219 | /*.pt 220 | */*.pt 221 | **/*.pt 222 | /.pt 223 | 224 | 225 | .pyc 226 | .pyc/ 227 | *.pyc 228 | /*.pyc 229 | */*.pyc 230 | **/*.pyc 231 | /.pyc 232 | 233 | 234 | .txt 235 | *.txt 236 | /*.txt 237 | */*.txt 238 | **/*.txt 239 | /.txt 240 | 241 | 242 | .csv 243 | *.csv 244 | /*.csv 245 | */*.csv 246 | **/*.csv 247 | /.csv 248 | 249 | .index 250 | *.index 251 | /*.index 252 | */*.index 253 | **/*.index 254 | /.index 255 | 256 | .mp4/ 257 | *.mp4 258 | /*.mp4 259 | */*.mp4 260 | **/*.mp4 261 | /.mp4 262 | 263 | .srt/ 264 | *.srt 265 | /*.srt 266 | */*.srt 267 | **/*.srt 268 | /.srt 269 | 270 | glove_vectors 271 | *glove_vectors 272 | /glove_vectors 273 | */*glove_vectors 274 | **/*glove_vectors 275 | /glove_vectors 276 | 277 | 278 | 279 | .zip 280 | *.zip 281 | /*.zip 282 | */*.zip 283 | **/*.zip 284 | /.zip 285 | 286 | 287 | .gz 288 | *.gz 289 | /*.gz 290 | */*.gz 291 | **/*.gz 292 | /.gz 293 | 294 | 295 | # Commenting out all .png files as I would need them for image pasting in .md files / notes 296 | #.png/ 297 | #*.png 298 | #/*.png 299 | #*/*.png 300 | #**/*.png 301 | #/.png 302 | 303 | 304 | *.jpg 305 | /*.jpg 306 | */*.jpg 307 | **/*.jpg 308 | /.jpg 309 | 310 | *.jpeg 311 | /*.jpeg 312 | */*.jpeg 313 | **/*.jpeg 314 | /.jpeg 315 | 316 | *.rar 317 | /*.rar 318 | */*.rar 319 | **/*.rar 320 | /.rar 321 | 322 | 323 | .tgz/ 324 | *.tgz 325 | /*.tgz 326 | */*.tgz 327 | **/*.tgz 328 | /.tgz 329 | 330 | .tar/ 331 | *.tar 332 | /*.tar 333 | */*.tar 334 | **/*.tar 335 | /.tar 336 | 337 | *.7z 338 | /*.7z 339 | */*.7z 340 | **/*.7z 341 | /.7z 342 | 343 | .dcm/ 344 | *.dcm 345 | /*.dcm 346 | */*.dcm 347 | **/*.dcm 348 | /.dcm 349 | 350 | .tiff/ 351 | *.tiff 352 | /*.tiff 353 | */*.tiff 354 | **/*.tiff 355 | /.tiff 356 | 357 | .nii/ 358 | *.nii 359 | /*.nii 360 | */*.nii 361 | **/*.nii 362 | /.nii 363 | 364 | LARGE_Datasets 365 | */LARGE_Datasets 366 | */LARGE_Datasets/** 367 | **/LARGE_Datasets/** 368 | 369 | /Others_Code_gitignore 370 | /Others_Code_gitignore/* 371 | */Others_Code_gitignore/* 372 | 373 | /YouTube_Experiments_Scripts 374 | /YouTube_Experiments_Scripts/* 375 | */YouTube_Experiments_Scripts/* 376 | 377 | # *** END OF FILES SPECIFIC TO ML Projects **** 378 | 379 | 380 | # misc 381 | DS_Store/ 382 | .DS_Store 383 | .env.local 384 | .env.development.local 385 | .env.test.local 386 | .env.production.local 387 | 388 | npm-debug.log* 389 | yarn-debug.log* 390 | yarn-error.log* 391 | 392 | # Ignore docs files 393 | _gh_pages 394 | .ruby-version 395 | 396 | # Numerous always-ignore extensions 397 | *.diff 398 | *.err 399 | *.orig 400 | *.log 401 | *.rej 402 | *.swo 403 | *.swp 404 | *.zip 405 | *.vi 406 | *~ 407 | *.~lock* 408 | .~lock* 409 | 410 | # OS or Editor folders 411 | .DS_Store 412 | ._* 413 | Thumbs.db 414 | .cache 415 | .project 416 | .settings 417 | .tmproj 418 | *.esproj 419 | nbproject 420 | *.sublime-project 421 | *.sublime-workspace 422 | .idea 423 | 424 | # Komodo 425 | *.komodoproject 426 | .komodotools 427 | 428 | # grunt-html-validation 429 | validation-status.json 430 | validation-report.json 431 | 432 | 433 | # Ignore all logfiles and tempfiles. 434 | !/log/.keep 435 | /tmp 436 | /.gems 437 | 438 | CountDownTimer-Note.odt 439 | random-code-1.js 440 | random-code-2.js 441 | random-code-3.js 442 | performance-1.js 443 | 444 | test.html 445 | test1.html 446 | test2.html 447 | test3.html 448 | 449 | #ignore file name ending in "-bkp.js" OR "-bkp.ts" OR "-bkp.py" or "-test.js" OR "-test.ts" in its name. So I will have to put "-test.js" at all files that is just for my development-time random testing code . 450 | **/*-bkp.js 451 | **/*-bkp.ts 452 | **/*-bkp.py 453 | **/*-test.js 454 | **/*-test.ts 455 | **/*-test.py 456 | **/*-test.ipynb 457 | **/*-test.md 458 | **/*-test.json 459 | 460 | # OS or Editor folders 461 | .DS_Store 462 | ._* 463 | Thumbs.db 464 | .cache 465 | .project 466 | .settings 467 | .tmproj 468 | *.esproj 469 | nbproject 470 | *.sublime-project 471 | *.sublime-workspace 472 | .idea 473 | node_modules 474 | Others_Code_gitignore 475 | Project-Note-PAUL 476 | .vscode 477 | 478 | # Local Netlify folder 479 | .netlify 480 | -------------------------------------------------------------------------------- /BiCycleGAN_Toward_Multimodal_Image-to-Image_Translation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## BiCycleGAN - Paper - Toward Multimodal Image-to-Image Translation - PyTorch Implementation from Scratch\n", 8 | "\n", 9 | "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=iCXruj3slIk&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=17)\n", 10 | "\n", 11 | "[![Imgur](https://imgur.com/n7xTxVm.png)](https://www.youtube.com/watch?v=iCXruj3slIk&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=17)\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### 👉 The Maps - Dataset link - http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/\n", 19 | "\n", 20 | "## About the Satellite to Map Image Translation Dataset\n", 21 | "\n", 22 | "This is a dataset comprised of satellite images of New York and their corresponding Google maps pages. The image translation problem involves converting satellite photos to Google maps format, or the reverse, Google maps images to Satellite photos.\n", 23 | "\n", 24 | "The dataset is provided on the pix2pix website and can be downloaded as a 255-megabyte zip file.\n", 25 | "\n", 26 | "maps\n", 27 | "├── train\n", 28 | "└── val\n", 29 | "\n", 30 | "The train folder contains 1,097 images, whereas the validation dataset contains 1,099 images.\n", 31 | "\n", 32 | "Images have a digit filename and are in JPEG format. Each image is 1,200 pixels wide and 600 pixels tall and contains both the satellite image on the left and the Google maps image on the right." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "colab": { 40 | "base_uri": "https://localhost:8080/" 41 | }, 42 | "id": "jjIvEuipGqxu", 43 | "outputId": "fa2c5913-6374-4a01-d292-e4c85d1ca3e9" 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "!pip install icecream\n", 48 | "import os\n", 49 | "import numpy as np\n", 50 | "import math\n", 51 | "import itertools\n", 52 | "import scipy\n", 53 | "import sys\n", 54 | "import time\n", 55 | "import datetime\n", 56 | "\n", 57 | "import torchvision.transforms as transforms\n", 58 | "from torchvision.utils import save_image\n", 59 | "\n", 60 | "from torch.utils.data import DataLoader\n", 61 | "from torchvision import datasets\n", 62 | "from torch.autograd import Variable\n", 63 | "import torch.autograd as autograd\n", 64 | "from torchvision.utils import make_grid\n", 65 | "\n", 66 | "import torch.nn as nn\n", 67 | "import torch.nn.functional as F\n", 68 | "import torch\n", 69 | "\n", 70 | "import matplotlib.pyplot as plt\n", 71 | "from matplotlib.pyplot import figure\n", 72 | "from IPython.display import clear_output\n", 73 | "from icecream import ic" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 48, 79 | "metadata": { 80 | "colab": { 81 | "base_uri": "https://localhost:8080/" 82 | }, 83 | "id": "UokJxbOYHuAi", 84 | "outputId": "d98cf4ea-3cba-411d-a329-e89ef1f48bf0" 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "from google.colab import drive\n", 97 | "drive.mount('/content/drive')" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 50, 103 | "metadata": { 104 | "id": "rVBTY_4wwfbq" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "class Hyperparameters(object):\n", 109 | " def __init__(self, **kwargs):\n", 110 | " self.__dict__.update(kwargs)\n", 111 | "\n", 112 | "hp = Hyperparameters(\n", 113 | " epoch=0,\n", 114 | " n_epochs=200,\n", 115 | " batch_size=8, \n", 116 | " dataset_train_mode=\"train\",\n", 117 | " dataset_test_mode=\"val\", \n", 118 | " lr=.0002, \n", 119 | " b1=.5,\n", 120 | " b2=0.999,\n", 121 | " n_cpu=8,\n", 122 | " img_size=128,\n", 123 | " channels=3,\n", 124 | " latent_dim=8,\n", 125 | " n_critic=5,\n", 126 | " sample_interval=400,\n", 127 | " lambda_pixel=10,\n", 128 | " lambda_latent=.5,\n", 129 | " lambda_kl=.01)\n", 130 | "\n", 131 | "img_root_folder = '/content/drive/MyDrive/All_Datasets/Maps-UCBerkeley-CycleGAN/maps'" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 53, 137 | "metadata": { 138 | "id": "XKrlGFNzgjyr" 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# Just print to check that the full file paths of the images are printed indeed\n", 143 | "# sorted(glob.glob(os.path.join(img_root_folder, 'train') + \"/*.*\"))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 54, 149 | "metadata": { 150 | "cellView": "form", 151 | "id": "yZaSsdJa_xqH" 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "import glob\n", 156 | "import random\n", 157 | "import os\n", 158 | "from torch.utils.data import Dataset\n", 159 | "from PIL import Image\n", 160 | "\n", 161 | "class ImageDataset(Dataset):\n", 162 | " def __init__(self, root, transforms_=None, mode=\"train\"):\n", 163 | " self.transform = transforms.Compose(transforms_)\n", 164 | "\n", 165 | " # Using the glob and sorted functions to load all the images and sort them.\n", 166 | " self.files = sorted(glob.glob(os.path.join(root, mode) + \"/*.*\"))\n", 167 | " if mode == \"train\":\n", 168 | " self.files.extend(sorted(glob.glob(os.path.join(root, \"test\") + \"/*.*\")))\n", 169 | " # `extends()` method adds the specified list elements to the end of the current list.\n", 170 | "\n", 171 | " def __getitem__(self, index):\n", 172 | "\n", 173 | " img = Image.open(self.files[index % len(self.files)])\n", 174 | " ''' In above line, my target is to find index item in a list based on the length of a variable in the list\n", 175 | " So, in case such indexes do not exist, None is returned. \n", 176 | " a % b => a is divided by b, and the remainder of that division is returned.\n", 177 | " 5 % 100 => 5 '''\n", 178 | " w, h = img.size # Pillow.Image.open() returns width, height\n", 179 | " img_A = img.crop((0, 0, w / 2, h)) # (left, upper, right, lower)\n", 180 | " img_B = img.crop((w / 2, 0, w, h)) # (left, upper, right, lower) \n", 181 | "\n", 182 | " ''' Below code block implements Horizontal Flipping or Mirroring Image based on randomly generated probability '''\n", 183 | " if np.random.random() < 0.5:\n", 184 | " # Converting from numpy arrays to a RGB image\n", 185 | " # Call PIL.Image.fromarray(obj, mode) with obj as a 3-D array and mode as \"RGB\" to convert obj into an image.\n", 186 | " # Image.fromarray creates an image memory from an object exporting the array interface\n", 187 | " img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], \"RGB\") # Mirror in x direction (flip horizontally)\n", 188 | " img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], \"RGB\") # Mirror in x direction (flip horizontally)\n", 189 | " ''' a[::-1] # all items in the array, reversed. \n", 190 | " Pillow image returns tuple of (width, height) \n", 191 | " The data has 3 dimensions: height, width and color. Numpy shape of the image is a tuple of (row (height), column (width), color(3) )\n", 192 | " So ::-1 effectively reverses the order of the width. The height and color are not affected.\n", 193 | " '''\n", 194 | "\n", 195 | " img_A = self.transform(img_A)\n", 196 | " img_B = self.transform(img_B)\n", 197 | "\n", 198 | " return {\"A\": img_A, \"B\": img_B}\n", 199 | "\n", 200 | " def __len__(self):\n", 201 | " return len(self.files)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### Explanation of Horizontal Image Flipping with `Image.fromarray(np.array(img_A)[:, ::-1, :], \"RGB\")`\n", 209 | "\n", 210 | "Basically, its implementing, the official code's mechanism\n", 211 | "\n", 212 | "https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/data/base_dataset.py#L144\n", 213 | "\n", 214 | "```py\n", 215 | "def __flip(img, flip):\n", 216 | " if flip:\n", 217 | " return img.transpose(Image.FLIP_LEFT_RIGHT)\n", 218 | " return \n", 219 | "\n", 220 | "```" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "#### Note - numpy arrays and PIL images have different shape, in Numpy its (H,W) and in PIL and (W,H)\n", 228 | "\n", 229 | "===========================================================================\n", 230 | "\n", 231 | "#### Why `img = Image.open(self.files[index % len(self.files)])`\n", 232 | "\n", 233 | "Because, here my target is to find index item in a list based on the length of a variable in the list\n", 234 | "\n", 235 | "So, in case such indexes do not exist, None is returned. " 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 56, 241 | "metadata": { 242 | "cellView": "form", 243 | "id": "VWcpYFFnWAbv" 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "# IMAGE VISUALIZER HELPERS\n", 248 | "def imshow(img,size=10):\n", 249 | " img = img / 2 + 0.5 # de-normalizing\n", 250 | " npimg = img.numpy()\n", 251 | " plt.figure(figsize=(size, size))\n", 252 | " plt.imshow(np.transpose(npimg, (1, 2, 0)))\n", 253 | " plt.show()\n", 254 | "\n", 255 | "\n", 256 | "import matplotlib.image as mpimg\n", 257 | "\n", 258 | "def visualise_output(path, x, y):\n", 259 | " img = mpimg.imread(path)\n", 260 | " plt.figure(figsize=(x,y))\n", 261 | " plt.imshow(img) \n", 262 | " plt.show()" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "## Explanation of line `img = img / 2 + 0.5` # de-normalizing\n", 270 | "\n", 271 | "Since the normalization process is actually z = (x - mean) / sigma \n", 272 | "\n", 273 | "Where both mean and sigma is 0.5\n", 274 | "\n", 275 | "The inverse normalization should be x = z*sigma + mean\n", 276 | "\n", 277 | "\n", 278 | "https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/7" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 57, 284 | "metadata": { 285 | "cellView": "form", 286 | "colab": { 287 | "base_uri": "https://localhost:8080/" 288 | }, 289 | "id": "pR5LjEFQARFm", 290 | "outputId": "3bedecbb-57bf-43ec-ad89-22337d77f320" 291 | }, 292 | "outputs": [ 293 | { 294 | "name": "stderr", 295 | "output_type": "stream", 296 | "text": [ 297 | "/usr/local/lib/python3.7/dist-packages/torchvision/transforms/transforms.py:288: UserWarning: Argument interpolation should be of type InterpolationMode instead of int. Please, use InterpolationMode enum.\n", 298 | " \"Argument interpolation should be of type InterpolationMode instead of int. \"\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "# CONFIGURE DATALOADERS\n", 304 | "transforms_ = [\n", 305 | " transforms.Resize((hp.img_size, hp.img_size), Image.BICUBIC),\n", 306 | " transforms.ToTensor(),\n", 307 | " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n", 308 | "]\n", 309 | "\n", 310 | "train_dataloader = DataLoader(\n", 311 | " ImageDataset(img_root_folder, mode=hp.dataset_train_mode, transforms_=transforms_),\n", 312 | " batch_size=hp.batch_size,\n", 313 | " shuffle=True,\n", 314 | " num_workers=1,\n", 315 | ")\n", 316 | "val_dataloader = DataLoader(\n", 317 | " ImageDataset(img_root_folder, mode=hp.dataset_test_mode, transforms_=transforms_),\n", 318 | " batch_size=16,\n", 319 | " shuffle=True,\n", 320 | " num_workers=1,\n", 321 | ")" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": { 328 | "colab": { 329 | "base_uri": "https://localhost:8080/", 330 | "height": 1000 331 | }, 332 | "id": "7vp2cbB5okgD", 333 | "outputId": "1afdc064-3be4-4174-f895-503a44657601" 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "# VISUALING SAMPLE DATA { run: \"auto\" }\n", 338 | "pic_size = 16\n", 339 | "\n", 340 | "dataiter = iter(train_dataloader)\n", 341 | "images = dataiter.next()\n", 342 | "\n", 343 | "for i in range(len(images[\"A\"])):\n", 344 | " imshow(make_grid([images[\"A\"][i],images[\"B\"][i]]), size=pic_size)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": {}, 350 | "source": [ 351 | "## Unet\n", 352 | "\n", 353 | "Unet is an end-to-end fully convolutional network (FCN), i.e. it only contains Convolutional layers and does not contain any Dense layer because of which it can accept image of any size.\n", 354 | "\n", 355 | "The left hand side is the contraction path (Encoder) where we apply regular convolutions and max pooling layers.\n", 356 | "\n", 357 | "In the Encoder, the size of the image gradually reduces while the depth gradually increases. (e.g. Starting from 128x128x3 to 8x8x256 )\n", 358 | "\n", 359 | "\n", 360 | "The right hand side is the expansion path (Decoder) where we apply transposed convolutions along with regular convolutions\n", 361 | "\n", 362 | "In the decoder, the size of the image gradually increases and the depth gradually decreases." 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 60, 368 | "metadata": { 369 | "id": "EEb5TdBmIy7l" 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "from torchvision.models import resnet18\n", 374 | "\n", 375 | "########################################################\n", 376 | "# Initialize convolution layer weights to N(0,0.02)\n", 377 | "########################################################\n", 378 | "def weights_init_normal(m):\n", 379 | " classname = m.__class__.__name__\n", 380 | " if classname.find(\"Conv\") != -1:\n", 381 | " torch.nn.init.normal_(m.weight.data, 0.0, 0.02)\n", 382 | " elif classname.find(\"BatchNorm2d\") != -1:\n", 383 | " torch.nn.init.normal_(m.weight.data, 1.0, 0.02)\n", 384 | " torch.nn.init.constant_(m.bias.data, 0.0)\n", 385 | "\n", 386 | "\n", 387 | "##############################\n", 388 | "# U-NET\n", 389 | "##############################\n", 390 | "''' As per the Paper - \"For generator G, we use the U-Net, which contains an encoder-decoder\n", 391 | "architecture, with symmetric skip connections.\" \n", 392 | "In the Encoder, the size of the image gradually reduces while the depth gradually increases. \n", 393 | "'''\n", 394 | "\n", 395 | "class UNetDown(nn.Module):\n", 396 | " def __init__(self, in_size, out_size, normalize=True, dropout=0.0):\n", 397 | " super(UNetDown, self).__init__()\n", 398 | " layers = [nn.Conv2d(in_size, out_size, 3, stride=2, padding=1, bias=False)]\n", 399 | " if normalize:\n", 400 | " layers.append(nn.BatchNorm2d(out_size, 0.8))\n", 401 | " layers.append(nn.LeakyReLU(0.2))\n", 402 | " self.model = nn.Sequential(*layers)\n", 403 | "\n", 404 | " def forward(self, x):\n", 405 | " return self.model(x)\n", 406 | "\n", 407 | "''' In the decoder, the size of the image gradually increases and the depth gradually decreases. '''\n", 408 | "class UNetUp(nn.Module):\n", 409 | " def __init__(self, in_size, out_size):\n", 410 | " super(UNetUp, self).__init__()\n", 411 | " self.model = nn.Sequential(\n", 412 | " nn.Upsample(scale_factor=2), # Upsampling by a scaling factor of 2\n", 413 | " nn.Conv2d(in_size, out_size, 3, stride=1, padding=1, bias=False),\n", 414 | " nn.BatchNorm2d(out_size, 0.8),\n", 415 | " nn.ReLU(inplace=True),\n", 416 | " )\n", 417 | "\n", 418 | " def forward(self, x, skip_input):\n", 419 | " x = self.model(x)\n", 420 | " x = torch.cat((x, skip_input), 1)\n", 421 | " return x\n", 422 | "\n", 423 | "\n", 424 | "class Generator(nn.Module):\n", 425 | " def __init__(self, latent_dim, img_shape):\n", 426 | " super(Generator, self).__init__()\n", 427 | " channels, self.h, self.w = img_shape\n", 428 | "\n", 429 | " self.fc = nn.Linear(latent_dim, self.h * self.w)\n", 430 | "\n", 431 | " self.down1 = UNetDown(channels + 1, 64, normalize=False)\n", 432 | " self.down2 = UNetDown(64, 128)\n", 433 | " self.down3 = UNetDown(128, 256)\n", 434 | " self.down4 = UNetDown(256, 512)\n", 435 | " self.down5 = UNetDown(512, 512)\n", 436 | " self.down6 = UNetDown(512, 512)\n", 437 | " self.down7 = UNetDown(512, 512, normalize=False)\n", 438 | " self.up1 = UNetUp(512, 512)\n", 439 | " self.up2 = UNetUp(1024, 512)\n", 440 | " self.up3 = UNetUp(1024, 512)\n", 441 | " self.up4 = UNetUp(1024, 256)\n", 442 | " self.up5 = UNetUp(512, 128)\n", 443 | " self.up6 = UNetUp(256, 64)\n", 444 | "\n", 445 | " self.final = nn.Sequential(\n", 446 | " nn.Upsample(scale_factor=2), \n", 447 | " nn.Conv2d(128, channels, 3, stride=1, padding=1), nn.Tanh()\n", 448 | " )\n", 449 | "\n", 450 | " def forward(self, x, z):\n", 451 | " # Propagate noise through fc layer and reshape to img shape\n", 452 | " z = self.fc(z).view(z.size(0), 1, self.h, self.w)\n", 453 | " d1 = self.down1(torch.cat((x, z), 1))\n", 454 | " d2 = self.down2(d1)\n", 455 | " d3 = self.down3(d2)\n", 456 | " d4 = self.down4(d3)\n", 457 | " d5 = self.down5(d4)\n", 458 | " d6 = self.down6(d5)\n", 459 | " d7 = self.down7(d6)\n", 460 | " u1 = self.up1(d7, d6)\n", 461 | " u2 = self.up2(u1, d5)\n", 462 | " u3 = self.up3(u2, d4)\n", 463 | " u4 = self.up4(u3, d3)\n", 464 | " u5 = self.up5(u4, d2)\n", 465 | " u6 = self.up6(u5, d1)\n", 466 | "\n", 467 | " return self.final(u6)\n", 468 | "\n", 469 | "\n", 470 | "##############################\n", 471 | "# Encoder\n", 472 | "##############################\n", 473 | "\n", 474 | "class Encoder(nn.Module):\n", 475 | " # 1. Use this encoder and get mu and log_var\n", 476 | " # 2. std = exp(log_var / 2)\n", 477 | " # 3. random_z = N(0, 1)\n", 478 | " # 4. encoded_z = random_z * std + mu (Reparameterization trick)\n", 479 | " def __init__(self, latent_dim, input_shape):\n", 480 | " super(Encoder, self).__init__()\n", 481 | " resnet18_model = resnet18(pretrained=False)\n", 482 | " self.feature_extractor = nn.Sequential(*list(resnet18_model.children())[:-3])\n", 483 | " # [:-3] => Everything except the last 3 items\n", 484 | " self.pooling = nn.AvgPool2d(kernel_size=8, stride=8, padding=0)\n", 485 | " # Output is mu and log(var) for reparameterization trick used in VAEs\n", 486 | " # mu and logvar assigned the same value (the encoder’s last layer output)\n", 487 | " self.fc_mu = nn.Linear(256, latent_dim)\n", 488 | " self.fc_logvar = nn.Linear(256, latent_dim)\n", 489 | "\n", 490 | " def forward(self, img):\n", 491 | " out = self.feature_extractor(img)\n", 492 | " out = self.pooling(out)\n", 493 | " out = out.view(out.size(0), -1)\n", 494 | " mu = self.fc_mu(out)\n", 495 | " logvar = self.fc_logvar(out)\n", 496 | " return mu, logvar\n", 497 | "\n", 498 | "\n", 499 | "##############################\n", 500 | "# Discriminator\n", 501 | "##############################\n", 502 | "\n", 503 | "class MultiDiscriminator(nn.Module):\n", 504 | " def __init__(self, input_shape):\n", 505 | " super(MultiDiscriminator, self).__init__()\n", 506 | "\n", 507 | " def discriminator_block(in_filters, out_filters, normalize=True):\n", 508 | " \"\"\"Returns downsampling layers of each discriminator block\"\"\"\n", 509 | " layers = [nn.Conv2d(in_filters, out_filters, 4, stride=2, padding=1)]\n", 510 | " if normalize:\n", 511 | " layers.append(nn.BatchNorm2d(out_filters, 0.8))\n", 512 | " layers.append(nn.LeakyReLU(0.2))\n", 513 | " return layers\n", 514 | "\n", 515 | " channels, _, _ = input_shape\n", 516 | " # Extracts discriminator models\n", 517 | " self.models = nn.ModuleList()\n", 518 | " for i in range(3):\n", 519 | " self.models.add_module(\n", 520 | " \"disc_%d\" % i,\n", 521 | " nn.Sequential(\n", 522 | " *discriminator_block(channels, 64, normalize=False),\n", 523 | " *discriminator_block(64, 128),\n", 524 | " *discriminator_block(128, 256),\n", 525 | " *discriminator_block(256, 512),\n", 526 | " nn.Conv2d(512, 1, 3, padding=1)\n", 527 | " ),\n", 528 | " )\n", 529 | "\n", 530 | " self.downsample = nn.AvgPool2d(channels, stride=2, padding=[1, 1], count_include_pad=False)\n", 531 | "\n", 532 | " def compute_loss(self, x, ground_truth):\n", 533 | " \"\"\"Computes the MSE between model output and scalar ground_truth\"\"\"\n", 534 | " loss = sum([torch.mean((out - ground_truth) ** 2) for out in self.forward(x)])\n", 535 | " return loss\n", 536 | "\n", 537 | " def forward(self, x):\n", 538 | " outputs = []\n", 539 | " for m in self.models:\n", 540 | " outputs.append(m(x))\n", 541 | " x = self.downsample(x)\n", 542 | " return outputs" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 61, 548 | "metadata": { 549 | "colab": { 550 | "base_uri": "https://localhost:8080/" 551 | }, 552 | "id": "YxFuX3PCKOVW", 553 | "outputId": "c0a5bccd-572c-444d-d805-226d29a71b99" 554 | }, 555 | "outputs": [ 556 | { 557 | "name": "stdout", 558 | "output_type": "stream", 559 | "text": [ 560 | "Using CUDA\n" 561 | ] 562 | }, 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "MultiDiscriminator(\n", 567 | " (models): ModuleList(\n", 568 | " (disc_0): Sequential(\n", 569 | " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 570 | " (1): LeakyReLU(negative_slope=0.2)\n", 571 | " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 572 | " (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 573 | " (4): LeakyReLU(negative_slope=0.2)\n", 574 | " (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 575 | " (6): BatchNorm2d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 576 | " (7): LeakyReLU(negative_slope=0.2)\n", 577 | " (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 578 | " (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 579 | " (10): LeakyReLU(negative_slope=0.2)\n", 580 | " (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 581 | " )\n", 582 | " (disc_1): Sequential(\n", 583 | " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 584 | " (1): LeakyReLU(negative_slope=0.2)\n", 585 | " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 586 | " (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 587 | " (4): LeakyReLU(negative_slope=0.2)\n", 588 | " (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 589 | " (6): BatchNorm2d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 590 | " (7): LeakyReLU(negative_slope=0.2)\n", 591 | " (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 592 | " (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 593 | " (10): LeakyReLU(negative_slope=0.2)\n", 594 | " (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 595 | " )\n", 596 | " (disc_2): Sequential(\n", 597 | " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 598 | " (1): LeakyReLU(negative_slope=0.2)\n", 599 | " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 600 | " (3): BatchNorm2d(128, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 601 | " (4): LeakyReLU(negative_slope=0.2)\n", 602 | " (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 603 | " (6): BatchNorm2d(256, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 604 | " (7): LeakyReLU(negative_slope=0.2)\n", 605 | " (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", 606 | " (9): BatchNorm2d(512, eps=0.8, momentum=0.1, affine=True, track_running_stats=True)\n", 607 | " (10): LeakyReLU(negative_slope=0.2)\n", 608 | " (11): Conv2d(512, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", 609 | " )\n", 610 | " )\n", 611 | " (downsample): AvgPool2d(kernel_size=3, stride=2, padding=[1, 1])\n", 612 | ")" 613 | ] 614 | }, 615 | "execution_count": 61, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "##############################################\n", 622 | "# SETUP, LOSS, INITIALIZE MODELS and BUFFERS\n", 623 | "##############################################\n", 624 | "cuda = True if torch.cuda.is_available() else False\n", 625 | "print(\"Using CUDA\" if cuda else \"Not using CUDA\")\n", 626 | "\n", 627 | "# Loss functions\n", 628 | "mae_loss = torch.nn.L1Loss()\n", 629 | "input_shape = (hp.channels, hp.img_size, hp.img_size)\n", 630 | "\n", 631 | "# Initialize generator, encoder and discriminators\n", 632 | "generator = Generator(hp.latent_dim, input_shape)\n", 633 | "encoder = Encoder(hp.latent_dim, input_shape)\n", 634 | "\n", 635 | "D_VAE = MultiDiscriminator(input_shape)\n", 636 | "D_LR = MultiDiscriminator(input_shape)\n", 637 | "\n", 638 | "if cuda:\n", 639 | " generator = generator.cuda()\n", 640 | " encoder.cuda()\n", 641 | " D_VAE = D_VAE.cuda()\n", 642 | " D_LR = D_LR.cuda()\n", 643 | " mae_loss.cuda()\n", 644 | "\n", 645 | "# Initialize weights\n", 646 | "generator.apply(weights_init_normal)\n", 647 | "D_VAE.apply(weights_init_normal)\n", 648 | "D_LR.apply(weights_init_normal)" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 70, 654 | "metadata": { 655 | "id": "TpEIFH7BIRwA" 656 | }, 657 | "outputs": [], 658 | "source": [ 659 | "# SAMPLING IMAGES\n", 660 | "def sample_images(batches_done):\n", 661 | " \"\"\"From the validation set this method will create images and \n", 662 | " save those Generated samples in a path \"\"\"\n", 663 | " generator.eval()\n", 664 | " imgs = next(iter(val_dataloader))\n", 665 | " # next() will supply each subsequent element from the iterable\n", 666 | " # So in this case each subsequent set of images from val_dataloader\n", 667 | " img_samples = None\n", 668 | " # For below line to work, I need to create a folder named 'maps' in the root_path\n", 669 | " path = \"/content/%s/%s.png\" % ('maps', batches_done)\n", 670 | " for img_A, img_B in zip(imgs[\"A\"], imgs[\"B\"]):\n", 671 | " # Repeat input image by number of desired columns\n", 672 | " real_A = img_A.view(1, *img_A.shape).repeat(hp.latent_dim, 1, 1, 1)\n", 673 | " real_A = Variable(real_A.type(Tensor))\n", 674 | " # Sample latent representations\n", 675 | " sampled_z = Variable(Tensor(np.random.normal(0, 1, (hp.latent_dim, hp.latent_dim))))\n", 676 | " # Generate samples\n", 677 | " fake_B = generator(real_A, sampled_z)\n", 678 | " # Concatenate samples horizontally\n", 679 | " fake_B = torch.cat([x for x in fake_B.data.cpu()], -1)\n", 680 | " img_sample = torch.cat((img_A, fake_B), -1)\n", 681 | " img_sample = img_sample.view(1, *img_sample.shape)\n", 682 | " # Concatenate with previous samples vertically\n", 683 | " img_samples = img_sample if img_samples is None else torch.cat((img_samples, img_sample), -2)\n", 684 | " save_image(img_samples, path, nrow=8, normalize=True)\n", 685 | " generator.train() \n", 686 | " return path\n", 687 | " " 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": 71, 693 | "metadata": { 694 | "id": "fgRoX2DbHSwk" 695 | }, 696 | "outputs": [], 697 | "source": [ 698 | "# OPTIMIZERS\n", 699 | "optimizer_E = torch.optim.Adam(encoder.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n", 700 | "optimizer_G = torch.optim.Adam(generator.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n", 701 | "\n", 702 | "optimizer_D_VAE = torch.optim.Adam(D_VAE.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n", 703 | "optimizer_D_LR = torch.optim.Adam(D_LR.parameters(), lr=hp.lr, betas=(hp.b1, hp.b2))\n", 704 | "\n", 705 | "Tensor = torch.cuda.FloatTensor if cuda else torch.Tensor" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "metadata": {}, 711 | "source": [ 712 | "## The reparameterization trick\n", 713 | "\n", 714 | "#### The reparameterization trick is to learn two vectors σ and μ, \n", 715 | "\n", 716 | "First, sample ϵ from N(0,1) and then your latent vector Z would be (where ⊙ symbol or notation is the element-wise product.):\n", 717 | "\n", 718 | "## Z = μ + ϵ ⊙ σ\n", 719 | "\n", 720 | "\n", 721 | "So, if an input data point is to be mapped into a latent variable `z` via sampling (after getting passed through a neural network), it has to follow the following equation:\n", 722 | "\n", 723 | "## z = z_mean + epsilon * std \n", 724 | "\n", 725 | "where \n", 726 | "\n", 727 | "### std = torch.exp(z_log_var / 2)\n", 728 | "\n", 729 | "----\n" 730 | ] 731 | }, 732 | { 733 | "cell_type": "markdown", 734 | "metadata": {}, 735 | "source": [ 736 | "## Reconciliation between 2 mathematical expression for z\n", 737 | "\n", 738 | "## 1st -> z = z_mean + epsilon * std\n", 739 | "\n", 740 | "## 2nd (where the std is expressed as below )\n", 741 | "\n", 742 | "![Imgur](https://imgur.com/1G3iT3m.png)\n", 743 | "\n", 744 | "\n", 745 | "The σ in the first equation is the standard deviation which as you know is the square root of the variance. Then you can see that the multiplication of 0.5 outside the log equates to raising the variance inside of the log to the power of 0.5:\n", 746 | "\n", 747 | "\n", 748 | "![Imgur](https://imgur.com/CQxP66m.png)\n", 749 | "\n", 750 | "\n", 751 | "So they are the same." 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": 72, 757 | "metadata": { 758 | "id": "45hmQrQZLoe4" 759 | }, 760 | "outputs": [], 761 | "source": [ 762 | "''' REPARAMETERIZE \n", 763 | "\n", 764 | "The reparameterization trick is to learn two vectors σ and μ, sample ϵ from N(0,1) and \n", 765 | "then your latent vector Z would be as below (where ⊙ is the element-wise product.):\n", 766 | "\n", 767 | "z = μ + ϵ ⊙ σ\n", 768 | "\n", 769 | "z = z_mean + epsilon * sigma\n", 770 | "\n", 771 | "'''\n", 772 | "\n", 773 | "def reparameterization(z_mean, z_log_var):\n", 774 | " std = torch.exp(z_log_var / 2)\n", 775 | " sampled_z = Variable(Tensor(np.random.normal(0, 1, (z_mean.size(0), hp.latent_dim))))\n", 776 | " z = z_mean + sampled_z * std\n", 777 | " return z" 778 | ] 779 | }, 780 | { 781 | "cell_type": "markdown", 782 | "metadata": {}, 783 | "source": [ 784 | "So in above implementation the `sampled_z` is replacing 'epsilon'\n", 785 | "\n" 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": {}, 791 | "source": [ 792 | "### `np.random.normal()`\n", 793 | "\n", 794 | "The `random.normal` method has the following syntax:\n", 795 | "\n", 796 | "`numpy.random.normal(m,s,n)`\n", 797 | "\n", 798 | "The random.normal function takes in three parameters:\n", 799 | "\n", 800 | "* m: the mean of the normal distribution.\n", 801 | "* s: the standard deviation of the distribution.\n", 802 | "* n: the total number of samples to be drawn." 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": null, 808 | "metadata": { 809 | "colab": { 810 | "base_uri": "https://localhost:8080/", 811 | "height": 975 812 | }, 813 | "id": "GiWH4SwoI7jA", 814 | "outputId": "a9c5a587-ce74-407b-b1ab-af378ba0d570" 815 | }, 816 | "outputs": [], 817 | "source": [ 818 | "# TRAINING\n", 819 | "# Adversarial loss\n", 820 | "valid = 1\n", 821 | "fake = 0\n", 822 | "\n", 823 | "prev_time = time.time()\n", 824 | "for epoch in range(hp.epoch, hp.n_epochs):\n", 825 | " for i, batch in enumerate(train_dataloader):\n", 826 | "\n", 827 | " # Set model input\n", 828 | " real_A = Variable(batch[\"A\"].type(Tensor))\n", 829 | " real_B = Variable(batch[\"B\"].type(Tensor))\n", 830 | "\n", 831 | " ################################\n", 832 | " # Train Generator and Encoder\n", 833 | " #################################\n", 834 | " ''' The first component of Bicycle-GAN is cVAE-GAN. It first encodes the ground truth image(B) into the latent space using encoder E.\n", 835 | "\n", 836 | " ### Then input image(A) and encoded ground truth image(i.e. latent vector - Z) are passed into the Generator G which produces the output image(B^). That is, The generator attempts to map the input image A along with a sampled z back into the original image B.\n", 837 | " the flow of cVAE-GAN => B -> Z -> B^\n", 838 | " '''\n", 839 | "\n", 840 | " optimizer_E.zero_grad()\n", 841 | " optimizer_G.zero_grad()\n", 842 | "\n", 843 | " #################\n", 844 | " # cVAE-GAN\n", 845 | " #################\n", 846 | "\n", 847 | " # Produce output using encoding of B (cVAE-GAN)\n", 848 | " mu, logvar = encoder(real_B)\n", 849 | " # reparameterize so backprogation can be done on the\n", 850 | " # stochastically generated z variable\n", 851 | " encoded_z = reparameterization(mu, logvar)\n", 852 | " fake_B = generator(real_A, encoded_z)\n", 853 | "\n", 854 | " # Pixelwise loss of translated image by VAE\n", 855 | " loss_pixel_L1_vae = mae_loss(fake_B, real_B)\n", 856 | " \n", 857 | " # Kullback-Leibler divergence of encoded B\n", 858 | " # Refer - https://stackoverflow.com/questions/61597340/how-is-kl-divergence-in-pytorch-code-related-to-the-formula\n", 859 | " loss_kl = 0.5 * torch.sum(torch.exp(logvar) + mu ** 2 - logvar - 1)\n", 860 | " # Adversarial loss\n", 861 | " # compute_loss() - Computes the MSE between model output and scalar ground_truth\n", 862 | " loss_VAE_GAN = D_VAE.compute_loss(fake_B, valid)\n", 863 | "\n", 864 | " ####################################\n", 865 | " # cLR-GAN\n", 866 | " # Conditional Latent Regressor GAN\n", 867 | " ####################################\n", 868 | " \n", 869 | " ''' This is the second component of the Bicycle-GAN. Here a randomly drawn latent vector (sampled_z below) along with the input image(A) is provided to the generator. The generated output(B^) may not look like ground truth image(B), but it should look realistic.\n", 870 | " Then the generated output is passed through the encoder, encoder tries to regain the latent vector from the output image. \n", 871 | " \n", 872 | " the flow of cLR-GAN is Z -> B^ -> Z^\n", 873 | " '''\n", 874 | "\n", 875 | " # real_A need to be a 4-D Tensor of Batch_size, Channel, Height, Width\n", 876 | " # ic(real_A.size()) # torch.Size([8, 3, 128, 128])\n", 877 | " # Produce output using sampled z (cLR-GAN)\n", 878 | " # sampled_z need to be a 2-D Tensor of Batch_size (i.e. 8) and Latent_dim (i.e 8)\n", 879 | " sampled_z = Variable(Tensor(np.random.normal(0, 1, (real_A.size(0), hp.latent_dim))))\n", 880 | " # Draw random samples from a normal (Gaussian) distribution.\n", 881 | " # ic(sampled_z.size()) # torch.Size([8, 8])\n", 882 | " ''' ic(sampled_z) will output below\n", 883 | " tensor([[ 0.2202, 0.3848, -1.0489, -0.5884, -0.0094, 0.1678, -1.5106, -0.2802],\n", 884 | " [ 1.9893, -0.8738, -2.4284, 1.0219, 0.4162, 0.3345, -1.7501, 0.0511],\n", 885 | " [-0.5878, 0.0200, -0.9107, 0.8697, -1.8777, 0.3819, 0.4788, -3.0111],\n", 886 | " [ 0.9784, 0.7068, -0.2902, 0.9084, 0.2643, -0.0090, 0.6117, -1.4820],\n", 887 | " [-0.1651, -0.5930, 0.1434, 1.6912, -0.6974, -1.7704, -3.2449, -0.4491],\n", 888 | " [-1.1474, -1.6918, 1.6326, -0.7691, 0.4847, 2.0244, -0.3476, -1.1350],\n", 889 | " [ 0.3965, 0.3789, 0.3177, -1.5840, 0.6150, -1.4996, 0.2707, -0.7499],\n", 890 | " [-0.1608, -0.8812, -0.0048, -0.6618, -0.5037, -0.8252, 0.7017, 0.4248]],\n", 891 | " device='cuda:0') '''\n", 892 | " \n", 893 | " _fake_B = generator(real_A, sampled_z)\n", 894 | " # cLR Loss: Adversarial loss\n", 895 | " loss_CLR_GAN = D_LR.compute_loss(_fake_B, valid)\n", 896 | "\n", 897 | " #########################################\n", 898 | " # Total Loss (Generator + Encoder)\n", 899 | " #########################################\n", 900 | "\n", 901 | " loss_total_gen_encoder = loss_VAE_GAN + loss_CLR_GAN + hp.lambda_pixel * loss_pixel_L1_vae + hp.lambda_kl * loss_kl\n", 902 | "\n", 903 | " loss_total_gen_encoder.backward(retain_graph=True)\n", 904 | " optimizer_E.step()\n", 905 | "\n", 906 | " ######################\n", 907 | " # Generator Only Loss\n", 908 | " ######################\n", 909 | " '''Under CLR-GAN - Then the generated output (_fake_B) is passed through the encoder, encoder tries to regain the latent vector from the output image. '''\n", 910 | "\n", 911 | " # Latent L1 loss\n", 912 | " _mu, _ = encoder(_fake_B)\n", 913 | " loss_latent = hp.lambda_latent * mae_loss(_mu, sampled_z)\n", 914 | "\n", 915 | " loss_latent.backward()\n", 916 | " optimizer_G.step()\n", 917 | "\n", 918 | " #######################################\n", 919 | " # Train Discriminator (cVAE-GAN)\n", 920 | " #######################################\n", 921 | "\n", 922 | " optimizer_D_VAE.zero_grad()\n", 923 | "\n", 924 | " loss_D_VAE = D_VAE.compute_loss(real_B, valid) + D_VAE.compute_loss(fake_B.detach(), fake)\n", 925 | "\n", 926 | " loss_D_VAE.backward()\n", 927 | " optimizer_D_VAE.step()\n", 928 | "\n", 929 | " ####################################\n", 930 | " # Train Discriminator (cLR-GAN)\n", 931 | " # Conditional Latent Regressor GAN\n", 932 | " ####################################\n", 933 | "\n", 934 | " optimizer_D_LR.zero_grad()\n", 935 | "\n", 936 | " loss_D_LR = D_LR.compute_loss(real_B, valid) + D_LR.compute_loss(_fake_B.detach(), fake)\n", 937 | "\n", 938 | " loss_D_LR.backward()\n", 939 | " optimizer_D_LR.step()\n", 940 | "\n", 941 | " #################\n", 942 | " # Log Progress\n", 943 | " #################\n", 944 | "\n", 945 | " # Determine approximate time left\n", 946 | " batches_done = epoch * len(train_dataloader) + i\n", 947 | " batches_left = hp.n_epochs * len(train_dataloader) - batches_done\n", 948 | " time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time))\n", 949 | " prev_time = time.time()\n", 950 | "\n", 951 | " # Print log\n", 952 | " sys.stdout.write(\n", 953 | " \"\\r[Epoch %d/%d] [Batch %d/%d] [D VAE_loss: %f, LR_loss: %f] [G loss: %f, pixel: %f, kl: %f, latent: %f] ETA: %s\"\n", 954 | " % (\n", 955 | " epoch,\n", 956 | " hp.n_epochs,\n", 957 | " i,\n", 958 | " len(train_dataloader),\n", 959 | " loss_D_VAE.item(),\n", 960 | " loss_D_LR.item(),\n", 961 | " loss_total_gen_encoder.item(),\n", 962 | " loss_pixel_L1_vae.item(),\n", 963 | " loss_kl.item(),\n", 964 | " loss_latent.item(),\n", 965 | " time_left,\n", 966 | " )\n", 967 | " )\n", 968 | " \n", 969 | " # If at sample interval save image\n", 970 | " if batches_done % hp.sample_interval == 0:\n", 971 | " clear_output()\n", 972 | " visualise_output(sample_images(batches_done), 30, 10) " 973 | ] 974 | } 975 | ], 976 | "metadata": { 977 | "accelerator": "GPU", 978 | "colab": { 979 | "collapsed_sections": [], 980 | "name": "GEN_5_BiCycleGAN.ipynb", 981 | "provenance": [] 982 | }, 983 | "kernelspec": { 984 | "display_name": "Python 3", 985 | "name": "python3" 986 | }, 987 | "language_info": { 988 | "codemirror_mode": { 989 | "name": "ipython", 990 | "version": 3 991 | }, 992 | "file_extension": ".py", 993 | "mimetype": "text/x-python", 994 | "name": "python", 995 | "nbconvert_exporter": "python", 996 | "pygments_lexer": "ipython3", 997 | "version": "3.9.12" 998 | } 999 | }, 1000 | "nbformat": 4, 1001 | "nbformat_minor": 0 1002 | } 1003 | -------------------------------------------------------------------------------- /DCGAN-from-Scratch-with-PyTorch/DCGAN_ONLY_train.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13)\n", 12 | "\n", 13 | "[![Imgur](https://imgur.com/kL41U2b.png)](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13)\n", 14 | "\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "pycharm": { 22 | "name": "#%%\n" 23 | } 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "from google.colab import drive\n", 28 | "drive.mount(\"/content/drive\")" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "pycharm": { 36 | "name": "#%%\n" 37 | } 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# !python3 \"/content/train.py\"\n", 42 | "!python train.py" 43 | ] 44 | } 45 | ], 46 | "metadata": { 47 | "interpreter": { 48 | "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930" 49 | }, 50 | "kernelspec": { 51 | "display_name": "Python 3.9.10 64-bit", 52 | "language": "python", 53 | "name": "python3" 54 | }, 55 | "language_info": { 56 | "name": "python", 57 | "version": "3.9.10" 58 | }, 59 | "orig_nbformat": 4 60 | }, 61 | "nbformat": 4, 62 | "nbformat_minor": 2 63 | } -------------------------------------------------------------------------------- /DCGAN-from-Scratch-with-PyTorch/Readme.md: -------------------------------------------------------------------------------- 1 | ### To run this notebook, 2 | 3 | - you can just run the Jupyter Notebook, as this notebook imports the train.py file and which in turn imports all the other modules from the other files (i.e. dcgan.py and utils.py) 4 | 5 | For this notebook the directory where the MNIST dataset was downloaded is in Google Drive, so you only have to change the path of the directory where you want your MNIST to get downloaded. 6 | 7 | And also the variable 'output_path' where the generated fake images will be saved. 8 | 9 | # [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13) 10 | 11 | [![Imgur](https://imgur.com/kL41U2b.png)](https://www.youtube.com/watch?v=yOtdN6W4O_0&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=13) -------------------------------------------------------------------------------- /DCGAN-from-Scratch-with-PyTorch/dcgan.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | import numpy as np 6 | from torch.autograd import Variable 7 | from torchvision.utils import save_image 8 | 9 | import matplotlib.pyplot as plt 10 | from torchvision import datasets, transforms 11 | import math 12 | import itertools 13 | from glob import glob 14 | 15 | """ 16 | torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None) 17 | 18 | """ 19 | 20 | 21 | class Generator(nn.Module): 22 | """ 23 | noise_vector: is the length of the z input vector. 24 | 25 | num_gen_filter: size of the feature maps that are propagated through the generator, 26 | 27 | num_ch: The number of channels in the output image (set to 1 for Grayscale images). 28 | 29 | Here, the height and width dimension of the image does not change, only the channel dimension decreases. 30 | 31 | For the Conv and ConvTranspose layers: 32 | * in_channels (int) – Number of channels/filters in the input image 33 | * out_channels (int) – Number of channels/filters produced by the convolution 34 | 35 | """ 36 | 37 | def __init__(self, num_ch, noise_vector, num_gen_filter): 38 | super(Generator, self).__init__() 39 | self.network = nn.Sequential( 40 | nn.ConvTranspose2d( 41 | in_channels=noise_vector, 42 | out_channels=num_gen_filter * 4, 43 | kernel_size=4, 44 | stride=1, 45 | padding=0, 46 | bias=False, 47 | ), 48 | nn.BatchNorm2d(num_gen_filter * 4), 49 | nn.ReLU(True), 50 | nn.ConvTranspose2d( 51 | in_channels=num_gen_filter * 4, 52 | out_channels=num_gen_filter * 2, 53 | kernel_size=3, 54 | stride=2, 55 | padding=1, 56 | bias=False, 57 | ), 58 | nn.BatchNorm2d(num_gen_filter * 2), 59 | nn.ReLU(True), 60 | nn.ConvTranspose2d( 61 | in_channels=num_gen_filter * 2, 62 | out_channels=num_gen_filter, 63 | kernel_size=4, 64 | stride=2, 65 | padding=1, 66 | bias=False, 67 | ), 68 | nn.BatchNorm2d(num_gen_filter), 69 | nn.ReLU(True), 70 | nn.ConvTranspose2d( 71 | in_channels=num_gen_filter, 72 | out_channels=num_ch, 73 | kernel_size=4, 74 | stride=2, 75 | padding=1, 76 | bias=False, 77 | ), 78 | nn.Tanh(), 79 | ) 80 | 81 | def forward(self, input): 82 | output = self.network(input) 83 | return output 84 | 85 | 86 | class Discriminator(nn.Module): 87 | """ 88 | torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None) 89 | Here, the height and width dimension of the image does not change, only the channel dimension increases. 90 | """ 91 | 92 | def __init__(self, num_ch, num_disc_filter): 93 | super(Discriminator, self).__init__() 94 | self.network = nn.Sequential( 95 | nn.Conv2d( 96 | in_channels=num_ch, 97 | out_channels=num_disc_filter, 98 | kernel_size=4, 99 | stride=2, 100 | padding=1, 101 | bias=False, 102 | ), 103 | nn.LeakyReLU(0.2, inplace=True), 104 | nn.Conv2d( 105 | in_channels=num_disc_filter, 106 | out_channels=num_disc_filter * 2, 107 | kernel_size=4, 108 | stride=2, 109 | padding=1, 110 | bias=False, 111 | ), 112 | nn.BatchNorm2d(num_disc_filter * 2), 113 | nn.LeakyReLU(0.2, inplace=True), 114 | nn.Conv2d( 115 | in_channels=num_disc_filter * 2, 116 | out_channels=num_disc_filter * 4, 117 | kernel_size=3, 118 | stride=2, 119 | padding=1, 120 | bias=False, 121 | ), 122 | nn.BatchNorm2d(num_disc_filter * 4), 123 | nn.LeakyReLU(0.2, inplace=True), 124 | nn.Conv2d( 125 | in_channels=num_disc_filter * 4, 126 | out_channels=1, 127 | kernel_size=4, 128 | stride=1, 129 | padding=0, 130 | bias=False, 131 | ), 132 | nn.Sigmoid(), 133 | ) 134 | 135 | # The Discriminator outputs a scalar probability to classify the input image as real or fake. 136 | def forward(self, input): 137 | output = self.network(input) 138 | return output.view(-1, 1).squeeze(1) 139 | -------------------------------------------------------------------------------- /DCGAN-from-Scratch-with-PyTorch/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | import numpy as np 6 | from torch.autograd import Variable 7 | from torchvision.utils import save_image 8 | from torchsummary import summary 9 | 10 | import matplotlib.pyplot as plt 11 | from torchvision import datasets, transforms 12 | 13 | # The below 2 lines should be in a separate Jupyter NB file 14 | # for train.py to run in Google Colab 15 | # Keeping this line inside train.py will give error while 16 | # running in Colab with just `python train.py` 17 | # from google.colab import drive 18 | # drive.mount("/content/drive") 19 | 20 | # Device configuration 21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | print("Using", device) 23 | 24 | from utils import * 25 | from dcgan import * 26 | 27 | 28 | def train( 29 | num_epochs, 30 | discriminator_net, 31 | generator_net, 32 | optimizerD, 33 | optimizerG, 34 | train_loader, 35 | fake_label, 36 | real_label, 37 | criterion, 38 | output_path, 39 | num_test_samples, 40 | device, 41 | noise_vector, 42 | ): 43 | num_batches = len(train_loader) 44 | 45 | for epoch in range(num_epochs): 46 | for i, (real_images, _) in enumerate(train_loader): 47 | batch_size_real_imgs = real_images.shape[0] 48 | 49 | ############################ 50 | # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) 51 | ########################### 52 | """ The standard process to train a DCGAN network is to first train 53 | the discriminator on the batch of samples. 54 | """ 55 | discriminator_net.zero_grad() 56 | 57 | real_images = real_images.to(device) 58 | 59 | # First training on real image, hence fill it with 1 60 | # Create Labels 61 | label = torch.full((batch_size_real_imgs,), real_label, device=device) 62 | 63 | """ The discriminator is used to classify real images (drawn from the training set) 64 | and fake images (produced by the generator). 65 | So, next, train the discriminator network on real images and real labels: 66 | """ 67 | output = discriminator_net(real_images) 68 | 69 | loss_disc_real = criterion(output, label) 70 | 71 | loss_disc_real.backward() 72 | 73 | D_x = output.mean().item() 74 | 75 | # Creating noise variables for the input to whole adversarial network 76 | noise = torch.randn(batch_size_real_imgs, noise_vector, 1, 1, device=device) 77 | 78 | # Generate a batch of fake images using the generator network 79 | fake_images = generator_net(noise) 80 | 81 | # As now training on fake image, fill label with 0's 82 | label.fill_(fake_label) 83 | 84 | # Now train Discriminator on fake images 85 | output = discriminator_net(fake_images.detach()) 86 | 87 | loss_disc_fake = criterion(output, label) 88 | loss_disc_fake.backward() 89 | 90 | D_G_z1 = output.mean().item() 91 | 92 | # Total Discriminator Loss 93 | loss_disc_total = loss_disc_real + loss_disc_fake 94 | 95 | optimizerD.step() 96 | 97 | ############################ 98 | # (2) Update Generator network: maximize log(D(G(z))) 99 | ########################### 100 | 101 | """ When we train the generator network we have to 102 | freeze the discriminator network, as we have already trained it. """ 103 | 104 | generator_net.zero_grad() 105 | 106 | # Now, set Image Label vector values equal to 1 107 | # To fool the Discriminator Network 108 | label.fill_(real_label) 109 | 110 | # After filling all labels with 1 (representing real labels), run discriminator network with fake images to fool it 111 | # To classify real images (drawn from the training set) and fakes images (produced by the generator). 112 | output = discriminator_net(fake_images) 113 | 114 | # And now after I tried to fool discriminator, check how much it was fooled. 115 | # so to the extent above output does not match with "labels" variable (which were all filed up with 1) 116 | # That will be the failure of Generator Network i.e. Generator Loss 117 | loss_generator = criterion(output, label) 118 | 119 | loss_generator.backward() 120 | 121 | D_G_z2 = output.mean().item() 122 | 123 | optimizerG.step() 124 | 125 | if (i + 1) % 100 == 0: 126 | print( 127 | "Epoch [{}/{}], step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, Discriminator - D(G(x)): {:.2f}, Generator - D(G(x)): {:.2f}".format( 128 | epoch + 1, 129 | num_epochs, 130 | i + 1, 131 | num_batches, 132 | loss_disc_total.item(), 133 | loss_generator.item(), 134 | D_x, 135 | D_G_z1, 136 | D_G_z2, 137 | ) 138 | ) 139 | generator_net.eval() 140 | plot_images( 141 | epoch, 142 | output_path, 143 | num_test_samples, 144 | generator_net, 145 | device, 146 | ) 147 | generator_net.train() 148 | 149 | 150 | ########################################## 151 | # Initialize all the necessary variables 152 | ######################################### 153 | 154 | batch_size = 256 155 | 156 | output_path = "/content/sample_data/" 157 | 158 | # Gather MNIST Dataset 159 | train_loader = get_data_loader(batch_size) 160 | 161 | # Create the Discriminator and Generator network 162 | discriminator_net = Discriminator(num_ch=1, num_disc_filter=32).to(device) 163 | 164 | generator_net = Generator(num_ch=1, noise_vector=100, num_gen_filter=32).to(device) 165 | 166 | # loss function 167 | criterion = nn.BCELoss() 168 | 169 | # optimizers 170 | optimizerD = optim.Adam(discriminator_net.parameters(), lr=0.001) 171 | optimizerG = optim.Adam(generator_net.parameters(), lr=0.001) 172 | 173 | # initialize variables required for training 174 | real_label = 1.0 175 | fake_label = 0.0 176 | # num_batches = len(train_loader) 177 | 178 | num_test_samples = 16 179 | 180 | fixed_noise = torch.randn(num_test_samples, 100, 1, 1, device=device) 181 | 182 | num_epochs = 70 183 | 184 | noise_vector = 100 185 | 186 | ########################################## 187 | # Execute the train Function 188 | ######################################### 189 | 190 | train( 191 | num_epochs=num_epochs, 192 | discriminator_net=discriminator_net, 193 | generator_net=generator_net, 194 | optimizerD=optimizerD, 195 | optimizerG=optimizerG, 196 | train_loader=train_loader, 197 | fake_label=fake_label, 198 | real_label=real_label, 199 | criterion=criterion, 200 | output_path=output_path, 201 | num_test_samples=num_test_samples, 202 | device=device, 203 | noise_vector=noise_vector, 204 | ) 205 | -------------------------------------------------------------------------------- /DCGAN-from-Scratch-with-PyTorch/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | import numpy as np 6 | from torch.autograd import Variable 7 | from torchvision.utils import save_image 8 | 9 | import matplotlib.pyplot as plt 10 | from torchvision import datasets, transforms 11 | import math 12 | import itertools 13 | from glob import glob 14 | 15 | 16 | def get_data_loader(batch_size): 17 | # MNIST Dataset 18 | transform = transforms.Compose( 19 | [transforms.ToTensor(), transforms.Normalize(mean=(0.1307,), std=(0.3081,))] 20 | ) 21 | 22 | train_dataset = datasets.MNIST( 23 | root="/content/drive/MyDrive/All_Datasets/MNIST", 24 | train=True, 25 | transform=transform, 26 | download=True, 27 | ) 28 | 29 | # Data Loader 30 | train_loader = torch.utils.data.DataLoader( 31 | dataset=train_dataset, batch_size=batch_size, shuffle=True 32 | ) 33 | return train_loader 34 | 35 | 36 | def plot_images(epoch, path, num_test_samples, generator, device): 37 | z = torch.randn(num_test_samples, 100, 1, 1, device=device) 38 | 39 | plot_grid_size = int(math.sqrt(num_test_samples)) 40 | 41 | title = None 42 | 43 | generated_fake_images = generator(z) 44 | 45 | path += "variable_noise/" 46 | 47 | title = "Variable Noise" 48 | 49 | fig, ax = plt.subplots(plot_grid_size, plot_grid_size, figsize=(6, 6)) 50 | 51 | for i, j in itertools.product(range(plot_grid_size), range(plot_grid_size)): 52 | ax[i, j].get_xaxis().set_visible(False) 53 | ax[i, j].get_yaxis().set_visible(False) 54 | 55 | for sample in range(num_test_samples): 56 | i = sample // 4 57 | j = sample % 4 58 | ax[i, j].cla() 59 | ax[i, j].imshow( 60 | generated_fake_images[sample].data.cpu().numpy().reshape(28, 28), 61 | cmap="Greys", 62 | ) 63 | 64 | label = "Epoch_{}".format(epoch + 1) 65 | fig.text(0.5, 0.04, label, ha="center") 66 | fig.suptitle(title) 67 | -------------------------------------------------------------------------------- /LeNet5_PyTorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Build the architecture of LeNet5 from Scratch\n", 8 | "\n", 9 | "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=Uq5sQUoLXpA&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=9)\n", 10 | "\n", 11 | "[![Imgur](https://imgur.com/yEmSfK0.png)](https://www.youtube.com/watch?v=Uq5sQUoLXpA&list=PLxqBkZuBynVRyOJs4RWmB_fKlOVe5S8CR&index=9)\n", 12 | "\n", 13 | "There are two main steps after that. \n", 14 | "\n", 15 | "First is initializing the layers that we are going to use in our CNN inside __init__ , and \n", 16 | "\n", 17 | "Then the other is to define the sequence in which those layers will process the image. This is defined inside the forward function." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "![Imgur](https://imgur.com/yrIrojL.png)\n", 25 | "\n", 26 | "The above diagram shows a description of the LeNet-5 architecture as shown in the original document.\n", 27 | "\n", 28 | " \n", 29 | "**Layer 1**- The first layer is the input layer; It is generally not considered a layer of the network as nothing is learned on that layer. The input layer supports 32x32, and these are the dimensions of the images that will be passed to the next layer.\n", 30 | "\n", 31 | "The grayscale images used in the research paper had their pixel values normalized from 0 to 255, to values between -0.1 and 1.175. The reason for normalization is to ensure that the batch of images have a mean of 0 and a standard deviation of 1, the benefits of this is seen in the reduction in the amount of training time. In the image classification with LeNet-5 example below, we’ll be normalizing the pixel values of the images to take on values between 0 to 1.\n", 32 | "\n", 33 | " \n", 34 | "**Layer 2**- Layer C1 is a convolution layer with six 5 × 5 convolution kernels, and the feature allocation size is 28 × 28, whereby input image information can be avoided.\n", 35 | "\n", 36 | " \n", 37 | "**Layer 3**- Layer S2 is the undersampling / grouping layer which generates 6 function graphs of length 14x14. Each cell in every function map is attached to 2x2 neighborhoods at the corresponding function map in C1. \n", 38 | " \n", 39 | "**Layer 4** - C3 convolution layer encompass sixteen 5x5 convolution kernels The input of the primary six function maps C3 is every continuous subset of the 3 function maps in S2, the access of the following six function maps comes from the access of the 4 continuous subsets and the input for the following 3 function maps is crafted from the 4 discontinuous subsets.Finally, the input for the very last function diagram comes from all the S2 function diagrams. \n", 40 | " \n", 41 | "**Layer 5**- Layer S4 is just like S2 with a length of 2x2 and an output of sixteen 5x5 function graphics. \n", 42 | "\n", 43 | "**Layer 6**- Layer C5 is a convolution layer with one hundred twenty convolution cores of length 5x5. Each cell is attached to the 5x5 neighborhoods along sixteen S4 function charts. Since the function chart length of S4 is likewise 5x5, the output length of C5 is 1 * 1, so S4 and C5 are absolutely linked.\n", 44 | "\n", 45 | "It is referred to as a convolutional layer in preference to a completely linked layer due to the fact if the input of LeNet-5 becomes large and its shape stays unchanged, then its output length is bigger than 1x1, i.e. now no longer a completely linked layer.\n", 46 | " \n", 47 | "\n", 48 | "**Layer 7-** The F6 layer is connected to C5 and 84 feature charts are generated. In the grayscale images used in the research, the pixel values ​​from 0 to 255 were normalized to values ​​between -0.1 and 1,175 The reason for normalization is to make sure the image stack has a mean of 0 and a standard deviation of 1.\n", 49 | "\n", 50 | "The advantages of this are in the reduction of the training time. In the following example we will normalize the pixel values ​​of the images to take values ​​between 0 and 1." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 27, 56 | "metadata": { 57 | "colab": { 58 | "base_uri": "https://localhost:8080/" 59 | }, 60 | "id": "Vx23hoSrw87A", 61 | "outputId": "cfd18dde-d1a9-48ca-8107-a93b87d70c9f" 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "device(type='cuda')" 68 | ] 69 | }, 70 | "execution_count": 27, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "import torch\n", 77 | "import torch.nn as nn\n", 78 | "import torchvision\n", 79 | "import torchvision.transforms as transforms\n", 80 | "import matplotlib.pyplot as plt\n", 81 | "\n", 82 | "import warnings\n", 83 | "warnings.filterwarnings('ignore')\n", 84 | "\n", 85 | "# Device will determine whether to run the training on GPU or CPU.\n", 86 | "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", 87 | "# device = 'cpu'\n", 88 | "device" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 17, 94 | "metadata": { 95 | "colab": { 96 | "base_uri": "https://localhost:8080/" 97 | }, 98 | "id": "SBNQhORMxEpf", 99 | "outputId": "ab8eca2e-2d95-4535-8de0-c465fedf4577" 100 | }, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "Wed Feb 16 15:15:37 2022 \n", 107 | "+-----------------------------------------------------------------------------+\n", 108 | "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", 109 | "|-------------------------------+----------------------+----------------------+\n", 110 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 111 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 112 | "| | | MIG M. |\n", 113 | "|===============================+======================+======================|\n", 114 | "| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", 115 | "| N/A 48C P0 35W / 250W | 1065MiB / 16280MiB | 0% Default |\n", 116 | "| | | N/A |\n", 117 | "+-------------------------------+----------------------+----------------------+\n", 118 | " \n", 119 | "+-----------------------------------------------------------------------------+\n", 120 | "| Processes: |\n", 121 | "| GPU GI CI PID Type Process name GPU Memory |\n", 122 | "| ID ID Usage |\n", 123 | "|=============================================================================|\n", 124 | "+-----------------------------------------------------------------------------+\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "!nvidia-smi" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 18, 135 | "metadata": { 136 | "id": "Dt8ZQ7JeGcVL" 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "batch_size = 64\n", 141 | "num_classes = 10" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "![Imgur](https://imgur.com/yrIrojL.png)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "#Defining the convolutional neural network\n", 158 | "class LeNet5(nn.Module):\n", 159 | " def __init__(self, num_classes):\n", 160 | " super(LeNet5, self).__init__()\n", 161 | " self.layer1 = nn.Sequential(\n", 162 | " nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),\n", 163 | " # Above is Layer-1 - The input for LeNet-5 is a 32×32 grayscale image which passes through the first convolutional layer with 6 feature maps or filters having size 5×5 and a stride of one. The image dimensions changes from 32x32x1 to 28x28x6.\n", 164 | " \n", 165 | " nn.BatchNorm2d(6),\n", 166 | " nn.ReLU(),\n", 167 | " # Layer-2 - Then average pooling layer or sub-sampling layer with a filter size 2×2 and a stride of two. The resulting image dimensions will be reduced to 14x14x6. \n", 168 | " nn.MaxPool2d(kernel_size = 2, stride = 2)\n", 169 | " ) \n", 170 | " \n", 171 | " self.layer2 = nn.Sequential(\n", 172 | " nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),\n", 173 | " # Layer-3 Next, there is a second convolutional layer with 16 feature maps having size 5×5 and a stride of 1. In this layer, only 10 out of 16 feature maps are connected to 6 feature maps of the previous layer\n", 174 | " nn.BatchNorm2d(16),\n", 175 | " nn.ReLU(),\n", 176 | " # Layer-4 The fourth layer (S4) is again an average pooling layer with filter size 2×2 and a stride of 2. This layer is the same as the second layer (S2) except it has 16 feature maps so the output will be reduced to 5x5x16.\n", 177 | " nn.MaxPool2d(kernel_size = 2, stride = 2)\n", 178 | " )\n", 179 | " \n", 180 | " self.fc = nn.Linear(400, 120)\n", 181 | " ''' Layer-5 The fifth layer (C5) is a fully connected convolutional layer with 120 feature maps each of size 1×1. Each of the 120 units in C5 is connected to all the 400 nodes (5x5x16) in the fourth layer S4. '''\n", 182 | " self.relu = nn.ReLU()\n", 183 | " '''Layer-6 The sixth layer is a fully connected layer (F6) with 84 units. '''\n", 184 | " self.fc1 = nn.Linear(120, 84)\n", 185 | " self.relu1 = nn.ReLU()\n", 186 | " ''' Layer-7 - Finally, a fully connected layer ŷ with 10 possible values corresponding to the digits from 0 to 9. Since the MNIST data has 10 classes for each of the 10 numerical digits. '''\n", 187 | " self.fc2 = nn.Linear(84, num_classes)\n", 188 | " \n", 189 | " def forward(self, x):\n", 190 | " output = self.layer1(x)\n", 191 | " output = self.layer2(output)\n", 192 | " print('output after layer2', output.size()) # torch.Size([32, 16, 5, 5]\n", 193 | " # inside forward method image dimension are : [batch_size, channels, height, width]\n", 194 | " output = output.reshape(output.size(0), -1)\n", 195 | " # print('output after resize', output.size()) # torch.Size([32, 400])\n", 196 | " output = self.fc(output)\n", 197 | " output = self.relu(output)\n", 198 | " output = self.fc1(output)\n", 199 | " output = self.relu1(output)\n", 200 | " output = self.fc2(output)\n", 201 | " return output" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "\n", 209 | "## Reason to reshape after `output = self.layer2(output)`\n", 210 | "\n", 211 | "In PyTorch, images are represented as [channels, height, width], so a color image would be [3, 256, 256].\n", 212 | "\n", 213 | "During the training you will get batches of images, so your shape in the forward method will get an additional batch dimension at dim0: [batch_size, channels, height, width].\n", 214 | "\n", 215 | "So before applying the fully connected layer with output-shape of 400, I have to convert the above 4-D Tensor to a 2-D Tensor. \n", 216 | "And I definitely have to keep the first dimension, which is the batch-size, hence to the `.reshape()` function I am passing the unchanged first dimension of `output.size(0)`\n", 217 | "\n", 218 | "But for the second dimension, I am letting PyTorch to decide based on the Matrix calculation. i.e. the second dimension will be inferred given the first dimension.\n", 219 | "\n", 220 | "\n", 221 | "-----------------\n", 222 | "\n", 223 | "## where is softmax in above ? \n", 224 | "\n", 225 | "If you thought that the last layer in a Neural Network should be some sort of activation function like sigmoid() or softmax(), and we dont see that happening here in the above function.\n", 226 | "\n", 227 | "So, where is softmax? And its right here:\n", 228 | "\n", 229 | "```py\n", 230 | "criterion = nn.CrossEntropyLoss()\n", 231 | "\n", 232 | "```\n", 233 | "\n", 234 | "Inside `nn.CrossEntropyLoss()` function is handled the softmax computation which, of course, works with the raw output of your last layer\n", 235 | "\n", 236 | "---\n", 237 | "\n", 238 | "## Setting Hyperparameters\n" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": { 244 | "id": "XHOaF28qw87C" 245 | }, 246 | "source": [ 247 | "### Loading the Dataset\n", 248 | "Using torchvision , we will load the dataset as this will allow us to perform any pre-processing steps easily." 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 19, 254 | "metadata": { 255 | "id": "kqUweCDjGe2R" 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "class LeNet5(nn.Module):\n", 260 | " def __init__(self,num_classes):\n", 261 | " super(LeNet5,self).__init__()\n", 262 | " self.layer1 = nn.Sequential(\n", 263 | " nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5,5), stride=(1,1), padding=(0,0)), #Layer 1\n", 264 | " nn.BatchNorm2d(6),\n", 265 | " nn.ReLU(),\n", 266 | " nn.MaxPool2d(kernel_size=(2, 2), stride=(2)) #Layer-2\n", 267 | " ) \n", 268 | " \n", 269 | " self.layer2 = nn.Sequential(\n", 270 | " nn.Conv2d(in_channels=6, out_channels=16, kernel_size=(5,5), stride=(1,1), padding=(0,0)), #Layer 3\n", 271 | " nn.BatchNorm2d(16),\n", 272 | " nn.ReLU(),\n", 273 | " nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)) #Layer 4\n", 274 | " )\n", 275 | " self.fc = nn.Linear(400, 120) # Layer 5\n", 276 | " self.relu = nn.ReLU()\n", 277 | " self.fc1 = nn.Linear(120, 84) # Layer 6\n", 278 | " self.relu1 = nn.ReLU()\n", 279 | " self.fc2 = nn.Linear(84, num_classes) # Final Layer\n", 280 | " \n", 281 | " def forward(self, x):\n", 282 | " output = self.layer1(x)\n", 283 | " output = self.layer2(output)\n", 284 | " # print('output after layer2', output.size()) # torch.Size([32, 16, 5, 5]\n", 285 | " output = output.reshape(output.size(0), -1) # See note below for this line\n", 286 | " # print('output after resize', output.size()) # torch.Size([32, 400])\n", 287 | " output = self.fc(output)\n", 288 | " output = self.relu(output)\n", 289 | " output = self.fc1(output)\n", 290 | " output = self.relu1(output)\n", 291 | " output = self.fc2(output)\n", 292 | " return output\n", 293 | " " 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 20, 299 | "metadata": { 300 | "id": "hg3ntm7uw87E" 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "# Define relevant variables for the ML task\n", 305 | "\n", 306 | "''' Keeping num_classes at 10, as this will be the output shape from the final Layer of the LeNet5 Neural Network model, because, the output layer will have 10 output neurons, since the MNIST data has 10 classes for each of the 10 numerical digits. '''\n", 307 | "\n", 308 | "train_dataset = torchvision.datasets.MNIST(root = './data',\n", 309 | " train = True,\n", 310 | " transform = transforms.Compose([\n", 311 | " transforms.Resize((32,32)),\n", 312 | " transforms.ToTensor(),\n", 313 | " transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),\n", 314 | " download = True)\n", 315 | "\n", 316 | "\n", 317 | "test_dataset = torchvision.datasets.MNIST(root = './data',\n", 318 | " train = False,\n", 319 | " transform = transforms.Compose([\n", 320 | " transforms.Resize((32,32)),\n", 321 | " transforms.ToTensor(),\n", 322 | " transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),\n", 323 | " download=True)\n", 324 | "\n", 325 | "\n", 326 | "train_dataloader = torch.utils.data.DataLoader(dataset = train_dataset,\n", 327 | " batch_size = batch_size,\n", 328 | " shuffle = True)\n", 329 | "\n", 330 | "\n", 331 | "test_dataloader = torch.utils.data.DataLoader(dataset = test_dataset,\n", 332 | " batch_size = batch_size,\n", 333 | " shuffle = True)" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": { 339 | "id": "PJSNQLNAw87G" 340 | }, 341 | "source": [ 342 | "## Build the architecture of LeNet5 from Scratch" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 21, 348 | "metadata": { 349 | "id": "QuJAdibTw87G" 350 | }, 351 | "outputs": [], 352 | "source": [ 353 | "learning_rate = 0.001\n", 354 | "\n", 355 | "model = LeNet5(num_classes).to(device)\n", 356 | "\n", 357 | "#Setting the loss function\n", 358 | "criterion = nn.CrossEntropyLoss()\n", 359 | "\n", 360 | "#Setting the optimizer with the model parameters and learning rate\n", 361 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n", 362 | "\n", 363 | "#this is defined to print how many steps are remaining when training\n", 364 | "total_step = len(train_dataloader)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 22, 370 | "metadata": { 371 | "colab": { 372 | "base_uri": "https://localhost:8080/" 373 | }, 374 | "id": "cLjcifcvGj1H", 375 | "outputId": "22c12c91-675b-4536-c44b-267fa911dcf1" 376 | }, 377 | "outputs": [ 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "Length of train_dataloader 938\n", 383 | "Length of test_dataloader 157\n" 384 | ] 385 | } 386 | ], 387 | "source": [ 388 | "print('Length of train_dataloader ', len(train_dataloader))\n", 389 | "print('Length of test_dataloader ', len(test_dataloader))" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": { 395 | "id": "ZbZJsUIvw87H" 396 | }, 397 | "source": [ 398 | "## Setting Hyperparameters\n" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 23, 404 | "metadata": { 405 | "id": "Q3f-rT9_w87J" 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "def train(model, criterion, optimizer, num_epochs=10):\n", 410 | " total_training_loss = []\n", 411 | " total_step = len(train_dataloader)\n", 412 | " \n", 413 | " for epoch in range(num_epochs):\n", 414 | " running_loss = 0.0\n", 415 | " \n", 416 | " for i, (images, labels) in enumerate(train_dataloader): \n", 417 | " images = images.to(device)\n", 418 | " labels = labels.to(device)\n", 419 | " \n", 420 | " #Forward pass\n", 421 | " outputs = model(images)\n", 422 | " loss = criterion(outputs, labels)\n", 423 | " \n", 424 | " # Backward and optimize\n", 425 | " optimizer.zero_grad() # Clear the past gradient by set the gradients to zero before every update\n", 426 | " loss.backward() # calculate the new gradients\n", 427 | " # print('images.size ', images.size(0))\n", 428 | " ''' \"\"\" Update the running loss \n", 429 | " we need to use, loss.item() instead of loss alone in running_loss calculation and averaging. \n", 430 | " Because loss gives you a grad_function, not a float value. \n", 431 | " The item() method extracts the loss’s value as a Python float.\n", 432 | " \"\"\" '''\n", 433 | " running_loss += loss.item() * images.size(0)\n", 434 | " optimizer.step() # we update the weights \n", 435 | " \n", 436 | " \n", 437 | " if (i+1) % 400 == 0:\n", 438 | " print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' \n", 439 | " .format(epoch+1, num_epochs, i+1, total_step, loss.item()))\n", 440 | " epoch_loss = running_loss / len(train_dataloader)\n", 441 | " total_training_loss.append(epoch_loss)\n", 442 | " \"\"\" To summarize, in the above code what I did is\n", 443 | " \n", 444 | " 1. => multiply each average batch loss with batch-length. \n", 445 | " The batch-length is inputs.size(0) which gives the number total images in each batch. \n", 446 | " Essentially I am un-averaging the Batch-Loss\n", 447 | "\n", 448 | " Do this all the batches inside the batch-running loop.\n", 449 | " And then after that loop, i.e. outside the batch-loop and coming back to my epoch-loop\n", 450 | " \n", 451 | " 2. => Divide this accumulated un-averaged Batch-loss from all batches, \n", 452 | " by the number of samples (len(train_dataloader)) to get the exact train loss average for the epoch \"\"\"\n", 453 | " return total_training_loss" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 24, 459 | "metadata": { 460 | "colab": { 461 | "base_uri": "https://localhost:8080/" 462 | }, 463 | "id": "KHSSd1-gC9n4", 464 | "outputId": "49b9194c-9e42-471e-a3e4-f2b449d018bc" 465 | }, 466 | "outputs": [ 467 | { 468 | "name": "stdout", 469 | "output_type": "stream", 470 | "text": [ 471 | "Epoch [1/30], Step [400/938], Loss: 0.0270\n", 472 | "Epoch [1/30], Step [800/938], Loss: 0.1064\n", 473 | "Epoch [2/30], Step [400/938], Loss: 0.0322\n", 474 | "Epoch [2/30], Step [800/938], Loss: 0.0453\n", 475 | "Epoch [3/30], Step [400/938], Loss: 0.0066\n", 476 | "Epoch [3/30], Step [800/938], Loss: 0.0386\n", 477 | "Epoch [4/30], Step [400/938], Loss: 0.0346\n", 478 | "Epoch [4/30], Step [800/938], Loss: 0.0306\n", 479 | "Epoch [5/30], Step [400/938], Loss: 0.0016\n", 480 | "Epoch [5/30], Step [800/938], Loss: 0.0295\n", 481 | "Epoch [6/30], Step [400/938], Loss: 0.0009\n", 482 | "Epoch [6/30], Step [800/938], Loss: 0.0145\n", 483 | "Epoch [7/30], Step [400/938], Loss: 0.0020\n", 484 | "Epoch [7/30], Step [800/938], Loss: 0.0363\n", 485 | "Epoch [8/30], Step [400/938], Loss: 0.0061\n", 486 | "Epoch [8/30], Step [800/938], Loss: 0.0026\n", 487 | "Epoch [9/30], Step [400/938], Loss: 0.0335\n", 488 | "Epoch [9/30], Step [800/938], Loss: 0.0120\n", 489 | "Epoch [10/30], Step [400/938], Loss: 0.0395\n", 490 | "Epoch [10/30], Step [800/938], Loss: 0.0007\n", 491 | "Epoch [11/30], Step [400/938], Loss: 0.0007\n", 492 | "Epoch [11/30], Step [800/938], Loss: 0.0060\n", 493 | "Epoch [12/30], Step [400/938], Loss: 0.0012\n", 494 | "Epoch [12/30], Step [800/938], Loss: 0.0011\n", 495 | "Epoch [13/30], Step [400/938], Loss: 0.0221\n", 496 | "Epoch [13/30], Step [800/938], Loss: 0.0421\n", 497 | "Epoch [14/30], Step [400/938], Loss: 0.0032\n", 498 | "Epoch [14/30], Step [800/938], Loss: 0.0059\n", 499 | "Epoch [15/30], Step [400/938], Loss: 0.0036\n", 500 | "Epoch [15/30], Step [800/938], Loss: 0.0000\n", 501 | "Epoch [16/30], Step [400/938], Loss: 0.0000\n", 502 | "Epoch [16/30], Step [800/938], Loss: 0.0059\n", 503 | "Epoch [17/30], Step [400/938], Loss: 0.0016\n", 504 | "Epoch [17/30], Step [800/938], Loss: 0.0001\n", 505 | "Epoch [18/30], Step [400/938], Loss: 0.0006\n", 506 | "Epoch [18/30], Step [800/938], Loss: 0.0001\n", 507 | "Epoch [19/30], Step [400/938], Loss: 0.0001\n", 508 | "Epoch [19/30], Step [800/938], Loss: 0.0032\n", 509 | "Epoch [20/30], Step [400/938], Loss: 0.0004\n", 510 | "Epoch [20/30], Step [800/938], Loss: 0.0001\n", 511 | "Epoch [21/30], Step [400/938], Loss: 0.0021\n", 512 | "Epoch [21/30], Step [800/938], Loss: 0.0004\n", 513 | "Epoch [22/30], Step [400/938], Loss: 0.0050\n", 514 | "Epoch [22/30], Step [800/938], Loss: 0.0082\n", 515 | "Epoch [23/30], Step [400/938], Loss: 0.0019\n", 516 | "Epoch [23/30], Step [800/938], Loss: 0.0000\n", 517 | "Epoch [24/30], Step [400/938], Loss: 0.0072\n", 518 | "Epoch [24/30], Step [800/938], Loss: 0.0000\n", 519 | "Epoch [25/30], Step [400/938], Loss: 0.0002\n", 520 | "Epoch [25/30], Step [800/938], Loss: 0.0003\n", 521 | "Epoch [26/30], Step [400/938], Loss: 0.0723\n", 522 | "Epoch [26/30], Step [800/938], Loss: 0.0000\n", 523 | "Epoch [27/30], Step [400/938], Loss: 0.0012\n", 524 | "Epoch [27/30], Step [800/938], Loss: 0.0442\n", 525 | "Epoch [28/30], Step [400/938], Loss: 0.0257\n", 526 | "Epoch [28/30], Step [800/938], Loss: 0.0037\n", 527 | "Epoch [29/30], Step [400/938], Loss: 0.0000\n", 528 | "Epoch [29/30], Step [800/938], Loss: 0.0000\n", 529 | "Epoch [30/30], Step [400/938], Loss: 0.0001\n", 530 | "Epoch [30/30], Step [800/938], Loss: 0.0021\n" 531 | ] 532 | } 533 | ], 534 | "source": [ 535 | "total_training_loss = train(model, criterion, optimizer, num_epochs=30)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 25, 541 | "metadata": { 542 | "colab": { 543 | "base_uri": "https://localhost:8080/" 544 | }, 545 | "id": "KrAEczZ6DAWx", 546 | "outputId": "9e683e9f-e5e3-4d80-99cb-ad55338f0657" 547 | }, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "[10.568266467943882,\n", 553 | " 3.527540081909407,\n", 554 | " 2.6088559687145545,\n", 555 | " 2.1823721174309565,\n", 556 | " 1.924547830021688,\n", 557 | " 1.6057489041481285,\n", 558 | " 1.3537017107074425,\n", 559 | " 1.1566425667095708,\n", 560 | " 1.1153176984784983,\n", 561 | " 0.983036599652807,\n", 562 | " 0.8899051300895366,\n", 563 | " 0.6929740591073604,\n", 564 | " 0.7318501428482301,\n", 565 | " 0.7322920497585915,\n", 566 | " 0.4553440813554328,\n", 567 | " 0.6452689993842532,\n", 568 | " 0.5951177723426396,\n", 569 | " 0.46549444908607823,\n", 570 | " 0.48881708133601715,\n", 571 | " 0.4589629755210131,\n", 572 | " 0.40898956688392407,\n", 573 | " 0.433147037635138,\n", 574 | " 0.3640874308061105,\n", 575 | " 0.3657881085964735,\n", 576 | " 0.3087785066473603,\n", 577 | " 0.3707788242852709,\n", 578 | " 0.24622227986580741,\n", 579 | " 0.282929215015277,\n", 580 | " 0.3211896866044501,\n", 581 | " 0.24679545621306762]" 582 | ] 583 | }, 584 | "execution_count": 25, 585 | "metadata": {}, 586 | "output_type": "execute_result" 587 | } 588 | ], 589 | "source": [ 590 | "total_training_loss" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 28, 596 | "metadata": { 597 | "colab": { 598 | "base_uri": "https://localhost:8080/", 599 | "height": 278 600 | }, 601 | "id": "gVWgnhLgDACb", 602 | "outputId": "48899b3d-1325-437f-ea99-d7b4e7a2e617" 603 | }, 604 | "outputs": [ 605 | { 606 | "data": { 607 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de3RU9bn/8fcDieES5RYQBSqgFASk2AZE9GjwwlHkFD2K1aL10h6tB0Xpr4jVVYtdUi+tWmm11lZae1TUini0tmJFIVY5argpN6vlIlGEJEIgChjC8/vjOyHhnpDM7Mzsz2utvWZm75nsZzP67O98r+buiIhIfDSLOgAREUktJX4RkZhR4hcRiRklfhGRmFHiFxGJmayoA6iLvLw87969e9RhiIiklXnz5pW6e8fd96dF4u/evTtFRUVRhyEiklbMbPXe9quqR0QkZpT4RURiRolfRCRm0qKOX0SalsrKSoqLi9m6dWvUoQjQokULunbtSnZ2dp3er8QvIvVWXFzMoYceSvfu3TGzqMOJNXenrKyM4uJievToUafPqKpHROpt69atdOjQQUm/CTAzOnToUK9fX0r8InJQlPSbjvp+F0r8IiIxk9mJ/7bb4NvfjjoKEWlkZWVlDBw4kIEDB9K5c2e6dOmy8/WXX365388WFRUxbty4A55j6NChjRLr7NmzGTlyZKP8rcaS2Y27H30EhYVRRyEijaxDhw4sXLgQgEmTJpGbm8sPf/jDnce3b99OVtbe01t+fj75+fkHPMebb77ZOME2QZld4s/Lg9JS0CpjIhnv8ssv5/vf/z4nnHACN954I2+//TYnnngixx9/PEOHDuX9998Hdi2BT5o0iSuvvJKCggJ69uzJlClTdv693Nzcne8vKCjgggsuoE+fPowZM4bqlQv/+te/0qdPH77xjW8wbty4epXsp02bxnHHHUf//v2ZOHEiAFVVVVx++eX079+f4447jvvuuw+AKVOm0LdvXwYMGMBFF13U4H+rzC7x5+XBtm3w+eeQ+BJFJAkKCvbcd+GF8N//DV98ASNG7Hn88svDVloKF1yw67HZsw8qjOLiYt58802aN2/Opk2beP3118nKyuKVV17h5ptvZvr06Xt8Zvny5bz22mts3ryZ3r17c8011+zRH37BggUsWbKEI488kpNOOok33niD/Px8rr76agoLC+nRowcXX3xxneP85JNPmDhxIvPmzaNdu3YMHz6c5557jm7duvHxxx+zePFiADZu3AjAnXfeycqVK8nJydm5ryEyv8QP4T8sEcl4o0ePpnnz5gCUl5czevRo+vfvz/jx41myZMleP3POOeeQk5NDXl4enTp1Yt26dXu8Z/DgwXTt2pVmzZoxcOBAVq1axfLly+nZs+fOvvP1SfzvvPMOBQUFdOzYkaysLMaMGUNhYSE9e/ZkxYoVXHfddbz00kscdthhAAwYMIAxY8bw2GOP7bMKqz4yu8TfvTuceCJUVkYdiUhm218JvVWr/R/PyzvoEv7uWrduvfP5j3/8Y4YNG8aMGTNYtWoVBXv7VQLk5OTsfN68eXO2b99+UO9pDO3atWPRokXMnDmThx56iKeffpqpU6fy4osvUlhYyAsvvMDkyZN57733GnQDyOwS/7Bh8Oab0KtX1JGISIqVl5fTpUsXAP74xz82+t/v3bs3K1asYNWqVQA89dRTdf7s4MGDmTNnDqWlpVRVVTFt2jROPfVUSktL2bFjB+effz6333478+fPZ8eOHaxZs4Zhw4Zx1113UV5eTkVFRYNiz+wSv4jE1o033shll13G7bffzjnnnNPof79ly5Y8+OCDnHXWWbRu3ZpBgwbt872zZs2ia9euO1//+c9/5s4772TYsGG4O+eccw6jRo1i0aJFXHHFFezYsQOAO+64g6qqKi655BLKy8txd8aNG0fbtm0bFLt5knq8mNlUYCSw3t37J/a1B54CugOrgAvdfcOB/lZ+fr4f1EIsFRUwdChcfz1897v1/7yI7NWyZcs49thjow4jchUVFeTm5uLujB07ll69ejF+/PhIYtnbd2Jm89x9j76ryazq+SNw1m77bgJmuXsvYFbidfK0agVLl0Lip5iISGP63e9+x8CBA+nXrx/l5eVcffXVUYdUJ0mr6nH3QjPrvtvuUUBB4vmjwGxgYrJioFkz6NBBvXpEJCnGjx8fWQm/IVLduHu4u69NPP8UOHxfbzSzq8ysyMyKSkpKDv6MSvwiSZGsamKpv/p+F5H16vEQ6T6jdfeH3T3f3fM7dtxjkfi6y8uDhtw4RGQPLVq0oKysTMm/Caiej79FixZ1/kyqe/WsM7Mj3H2tmR0BrE/6GQsKQiOviDSarl27UlxcTIN+jUujqV6Bq65SnfifBy4D7kw8/m/Sz/jTnyb9FCJxk52dXefVnqTpSVpVj5lNA+YCvc2s2My+S0j4Z5rZB8AZidciIpJCyezVs6+JK05P1jn3aupUmDABVqyANm1SemoRkaYos6dsAMjKgs8+U88eEZGEzE/8mqFTRGQXSvwiIjGjxC8iEjOZn/g7dQoLrnfrFnUkIiJNQuZPy5ybC48/HnUUIiJNRuaX+KtVVUUdgYhIkxCPxH/CCTB6dNRRiIg0CfFI/K1aqXFXRCQhHok/L0+JX0QkQYlfRCRm4pP4y8ogsYCxiEicZX53ToBTToFt26CyEnJyoo5GRCRS8Uj8Z54ZNhERiUlVjzts3hxK/SIiMRePxD9vHhx2GMycGXUkIiKRi0fi10RtIiI7xSvxl5VFG4eISBMQj8TfujW0aKESv4gIcUn8ZhrEJSKSEI/unAATJ8JRR0UdhYhI5OKT+K+9NuoIRESahHhU9QCUl8OKFVFHISISufgk/ltugUGDoo5CRCRy8Un8eXmwYQNs3x51JCIikYpX4ncPyV9EJMbilfhBXTpFJPaU+EVEYiY+ib9fP3jwQejZM+pIREQiFUniN7PxZrbEzBab2TQza5H0kx5xBFxzDXTpkvRTiYg0ZSlP/GbWBRgH5Lt7f6A5cFHST+wOixbB6tVJP5WISFMWVVVPFtDSzLKAVsAnST+jGQwdClOmJP1UIiJNWcoTv7t/DPwC+AhYC5S7+8u7v8/MrjKzIjMrKikpaZyTa6I2EZFIqnraAaOAHsCRQGszu2T397n7w+6e7+75HTt2bJyT5+VpTn4Rib0oqnrOAFa6e4m7VwLPAkNTcuaOHVXiF5HYiyLxfwQMMbNWZmbA6cCylJxZVT0iIqmfltnd3zKzZ4D5wHZgAfBwSk5+7bUwZkxKTiUi0lRFMh+/u/8E+EnKTzxkSMpPKSLS1MRn5C7AunXwt7/B559HHYmISGTilfgLC2HECFi5MupIREQiE6/Er4naRERilvg7dAiPSvwiEmPxSvwq8YuIxCzxq8QvIhJNd87I5OTAzJnQp0/UkYiIRCZeiR9g+PCoIxARiVS8qnoA5swJfflFRGIqfiX+u+6C9evh7LOjjkREJBLxK/FramYRibl4Jn716hGRGItn4q+ogK1bo45ERCQS8Uz8oOoeEYmt+CX+886Dd98Nq3GJiMRQ/Hr1dOyopC8isRa/Ev+mTfDAA7B4cdSRiIhEIn6Jf+vWsATjnDlRRyIiEon4Jf727cOjunSKSEzFL/FnZUHbtkr8IhJb8Uv8oEFcIhJrSvwiIjETv+6cANOnQ8uWUUchIhKJeCb+I4+MOgIRkcjEs6qnsBB+/GNwjzoSEZGUi2finzsXbr8dvvgi6khERFIunolfE7WJSIzFO/GrZ4+IxJASv4hIzCjxi4jETCTdOc2sLfB7oD/gwJXuPjdlARxzDGzeDK1bp+yUIiJNRVT9+O8HXnL3C8zsEKBVSs/evDnk5qb0lCIiTUXKq3rMrA1wCvAIgLt/6e4bUx0Ht90Gf/pTyk8rIhK1KOr4ewAlwB/MbIGZ/d7M9qhzMbOrzKzIzIpKSkoaP4onn4S//KXx/66ISBMXReLPAr4O/Mbdjwc+B27a/U3u/rC757t7fsdkLJXYoYMad0UklqJI/MVAsbu/lXj9DOFGkFqaoVNEYqpOid/MWptZs8Tzr5rZN80s+2BO6O6fAmvMrHdi1+nA0oP5Ww2ixC8iMVXXEn8h0MLMugAvA5cCf2zAea8DHjezd4GBwM8a8LcOTl4eVFZqojYRiZ26Jn5z9y+A/wQedPfRQL+DPam7L0zU3w9w93PdfcPB/q2DdscdUFICZik/tYhIlOqc+M3sRGAM8GJiX/PkhJQiSvgiElN1Tfw3AD8CZrj7EjPrCbyWvLBSYMkSuPhiWL486khERFKqTonf3ee4+zfd/a5EI2+pu49LcmzJtXlz6Mu/cmXUkYiIpFRde/U8YWaHJQZaLQaWmtmE5IaWZJqoTURiqq5VPX3dfRNwLvA3wujbS5MWVSoo8YtITNU18Wcn+u2fCzzv7pWEWTXTV5s2YbI2JX4RiZm6Jv7fAquA1kChmR0FbEpWUClhBr17h+QvIhIjdZqW2d2nAFNq7VptZsOSE1IKLVkSdQQiIilX18bdNmZ2b/VsmWZ2D6H0LyIiaaauVT1Tgc3AhYltE/CHZAWVMj//OXznO1FHISKSUnVdgetodz+/1uvbzGxhMgJKqQ8/hJkzo45CRCSl6lri32JmJ1e/MLOTgC3JCSmF8vKgrAx27Ig6EhGRlKlrif/7wJ8SyyYCbAAuS05IKZSXB1VVUF4O7dpFHY2ISErUdcqGRe7+NWAAMCCxctZpSY0sFTSIS0RiqF4rcLn7psQIXoAfJCGe1OrWDb72tTAvv4hITNS1qmdv0n9e44ICWJj+bdQiIvXRkDV303vKBhGRmNpv4jezzWa2aS/bZuDIFMWYPFu3wpAh8MgjUUciIpIy+63qcfdDUxVIJHJyYMEC+OCDqCMREUmZhlT1pD+z0LNHvXpEJEbinfhBiV9EYkeJX4lfRGKmId05M8OQIUr8IhIrSvyTJ0cdgYhISqmqR0QkZpT4p00LUzeoukdEYkKJ3x2Ki5X4RSQ2lPg1Q6eIxIwSvxK/iMRMZInfzJqb2QIz+0tUMQBK/CISO1GW+K8HlkV4/iAvD849F7p0iToSEZGUiKQfv5l1Bc4BJhP1gi6tWsGMGZGGICKSSlGV+H8J3Ajsc5VzM7vKzIrMrKikpCR1kYmIZLiUJ34zGwmsd/d5+3ufuz/s7vnunt+xY8fkBnXGGaG6R0QkBqIo8Z8EfNPMVgFPAqeZ2WMRxFGjWTNYty7SEEREUiXlid/df+TuXd29O3AR8Kq7X5LqOHahGTpFJEbUjx+U+EUkViKdndPdZwOzo4wBCIl/40aorITs7KijERFJKpX4IczJP3ZsSPwiIhlO8/EDDB8eNhGRGFCJv1plJWzdGnUUIiJJp8QPsH49tG0Ll14adSQiIkmnxA/QqRPcdBM88wy88krU0YiIJJUSf7UJE6BnT7juOvjyy6ijERFJGiX+ai1awP33w/Ll4VFEJEMp8dc2cmTYlkU/W7SISLKoO+funnkGcnKijkJEJGlU4t9dddJftgzm7XcCURGRtKQS/97s2AGjRoWbwPz5msZBRDKKSvx706wZ3H03LF4MDz4YdTQiIo1KiX9fRo2Cs86CW2+FTz+NOhoRkUajxL8vZjBlSpjGYeLEqKMREWk0quPfn1694Ec/CsnfPdwMRETSnBL/gUyaFHUEIiKNSlU9dfX3v8OTT0YdhYhIgynx14U73HUXXHMNlJREHY2ISIMo8deFGfzqV1BREWbxFBFJY0r8dXXssXDDDTB1Krz1VtTRiIgcNCX++rj1VjjySBgzBrZsiToaEZGDol499XHoofDss6HE37Jl1NGIiBwUlfjr64QTYNy48PzVV+Haa2HbtmhjEhGpByX+hpg7Fx54AE49FdasiToaEZE6UeJviFtuCfP3L10KX/86zJoVdUQiIgekxN9Q558P77wTFmwfPlw9fkSkyVPjbmPo3Tsk/KlTYfDgsE9z+4hIE6USf2PJzQ2NvmawYgUMGQLvvht1VCIie1DiT4aystDYO2QI3HgjrF0bdUQiIjulPPGbWTcze83MlprZEjO7PtUxJN2gQWHJxnPPhXvuge7dw6hfEZEmIIoS/3bg/7l7X2AIMNbM+kYQR3J17gxPPAH//CdccUVYzrHa++9HF5eIxF7KE7+7r3X3+Ynnm4FlQJdUx5EyRx8NDz0E994bXr/+OvTpAyNGQGFhaAQWEUmhSOv4zaw7cDywRx9IM7vKzIrMrKgkk6ZCPu44mDwZiorCwK+TT4YXXoAdO6KOTERiwjyiEqeZ5QJzgMnu/uz+3pufn+9FRUWpCSxVtmwJ3T9//vMw3fPq1dC6ddRRiUgGMbN57p6/+/5ISvxmlg1MBx4/UNLPWC1bwtix8MEHMHt2SPrbt4eF3TX9g4gkURS9egx4BFjm7vem+vxNTnY29O8fni9cCFOmhAFhP/2ppn4WkaSIosR/EnApcJqZLUxsIyKIo+nJz4fly2HkSPjJT8LiL888owZgEWlUkdXx10dG1vEfyOzZcP31UFkJixaFXwYiIvXQpOr4pQ4KCmDePHjppZD0N20Ko4DLyqKOTETSnBJ/U5aVBV/5Snj+2mthLECvXnDbbVBaGm1sIpK2lPjTxahRofH35JNh0qRwQ7juOqiqijoyEUkzSvzppH9/eP55WLIELroIPvoImjcPx1avjjY2EUkbSvzpqG/fMPhrxozwevXqMDXEGWfAzJnqBSQi+6XEn86qJ35r1w7uuAOWLYOzzoLjj4fHHw89gkREdqPEnwkOOwwmTAgLwEydGhL+5ZfDp5+G4+Xl+hUgIjsp8WeSnJwwBfR774V1gLt1C/vPOw/69YOf/QxWrYo0RBGJnhJ/JmrWDAYOrHn97W9DXh7ccgv06AH/9m/w3HPRxScikVLij4PvfS/M/b9yZZgSurS0phfQ5s3w1FNhsNjq1WGmUFULiWQ0TdkQR+6h/39WFjz2GFx66a7HDzkEXn0VTjopPD74IHToELZjjoFTTgm9iMyiiV9E6mRfUzZkRRGMRMwsJH0I4wF69QoNwWVl4ddAWVnNiOENG2Dp0rCvrKxmwNjSpWESufffD43JffvuurykiDRZSvxxl5UFJ5yw7+Pnnx82CKuELV8Ob74Zlo+EsJDMI49A+/ZhVPEpp4Q2hEGD9ItApIlSVY80zOrVYSbR118P7QgffBCqgT78MBy/995QtdS/f+hZ1KWLbggiKaKqHkmOo46Cyy4LG8DatVBcXHP8scdgwYKa123awHe+ExacAfi//wtVRm3apC5mkZhT4pfGdcQRYas2f35oN1iyBBYvDo/HHBOOVVaGqqFmzeA//gMuuQTOPjs0LotI0qiqR6JTWRmqiV58EaZNg/XrQ1vB1KlhNlIRaRBV9UjTk50NZ54Ztl/8Al55JVQN9e4djr/2WuhOOmZMTWPygWzfXtNjac0a+PLLXccltGwZ2hkgjGLOyoKuXRvtkkTSgfrfSdOQlRUmmHvssZokP3dumGbi2GNDL6E77oBHH635zIQJYaWyAQNC8m7VCk47reb42WeHaqVevWq2K6+sOX7qqWFai3//99DWIBITKvFL03XzzWHuoSefDDeEm28OPYaqG5I3bgxdTI8+OtwY2rev+bUA4aZRXh6eV/ckqt3+8MtfhjaH+++HE08MN4q77w49kA7Wtm1hwNthh8Hhh0PnzuHx8MPVdiFNhur4JX2UloZSfatWjft3Kyrg178OYxL+8pdwE6iqqlnk5kA++CBMif3Nb4Zqpd69w77aLr4YnngiPB8xItwYOneGr341jHvo108D4KTRqY5f0l9eXnL+bm4u3HQTXH99aAMAGDs2jGaeNGnXCe+qrVkT5jiaNi30XGrfHtatC1VW8+fDZ5+Fz69bFx6POip8rrIyzI/04Yeh62tFRdg/YUL4tVFZCW+/Dfn5YbZVkSRQ4hepVp30Abp3D1VMxx8fprWeNCm0JQDcdx/84Afh+aBBcM898K1v1TQq5+aGrXrai9qys8NgNwi/DlatCq/79Qv75s8PI6BzcmDw4PBr4OSTw2Nubqi6Ki6Gzz8P2xdfhMcRI8LxuXPhjTfCL4ra26BB4W9u2xZ+WWRnJ+EfUNKFqnpE9mXjxtAOcN99sGlT6GE0bFgYkPbii2Geo+oxCY1l0yaYNQv+8Y9wQ5g/P1Q7zZoVGq4ffhiuvnrPz1XPnVT7plTbJ5+E9o3bbgs3sZYtww2hU6fQ/jBjRrhx/OMfYRbX2u0TeXk11V6bN4ebT0VFzc3HPTSUQ/h3MQvTfx91VONXy0m97KuqR4lf5EA2bIAHHghVPiNHpvbcn38eehydeGJIov/6FxQVQevWu27HHBNK9FVVsGVLuIGUl4fHTZtC76fqXxtz5oR9GzaEsRMlJSHhN2sG//Vf8Pvf7xpDq1Yh0ZvBBRfA9Om7Hu/SpWa09tlnw0sv1Rzr1CkM0vvzn8PrF14IcfToEUZru4e427cPx9euDV1y3Wu2li3DDagxVFbCxx+Hf5s+fTK+Ok2JX0QObPPmkHzXratpn9iyJbRBALz8Mnz00a43nTZtQpsEhBvJhx+GKqzqrX17uPPOcLxXr5p5nKqNHBluCBB+lVQvGVpt9Gh4+unwvKAgnLNLFzjyyPCYnx+q5HbsCLGvWbPrNnIknH46vPtuuHlX57xDDgmfnTw5/F335M4jtX59+DfdsCFcw9FHQ9u2yTsfatwVkbo49NCwffWrez8+fPj+P9+pU9iGDt378dmzw81g5cqahu3qhm8IA/m2bAkJuHqr7qJbVRVi+/jj8Ktn/fqwf/z4kPhLS/ccjNeqVWivOf30cJ5bbw3vyc0Niw+98Qa0aBHe+8ILoZps6NCarV+/mmqu7dtD9d+GDTXb5s3hVxDA//xPGHT42Wfh2GefhV838+eH49/7Xs0Nrtpxx4UbUvXnv/wy3BCOPjrc1JLU00slfhFJT5WVoYSfnR1+KbjDb38bBuV17Roe27Wreym+sDC06bzxRs1N5dBDQym9ZUu44YYw5qM2s3BDaNYsHJ8+PfzCadcuPHbuHMZ1QLjplZaGY5s2hWq7Zs1q2mQGDAjrZVfLyYFf/SpUvx0kVfWIiNSFe/hF8sYboZfUHXeE6qzZs0PpvF27msTerl34RdIYJfPt20M12r/+BStWhMfzzgvtOwepSSV+MzsLuB9oDvze3e/c3/uV+EVE6m9fiT/lQwXNrDnwAHA20Be42Mz6pjoOEZG4imKM+GDgQ3df4e5fAk8CmoNXRCRFokj8XYA1tV4XJ/aJiEgKNNlZoczsKjMrMrOikpKSqMMREckYUST+j4FutV53Tezbhbs/7O757p7fsWPHlAUnIpLpokj87wC9zKyHmR0CXAQ8H0EcIiKxlPKRu+6+3cyuBWYSunNOdfclqY5DRCSuIpmywd3/Cvw1inOLiMRdWozcNbMSYPVuu/OA0gjCSZZMux7IvGvS9TR9mXZNDb2eo9x9j0bStEj8e2NmRXsbkZauMu16IPOuSdfT9GXaNSXreppsd04REUkOJX4RkZhJ58T/cNQBNLJMux7IvGvS9TR9mXZNSbmetK3jFxGRg5POJX4RETkISvwiIjGTdonfzM4ys/fN7EMzuynqeBqDma0ys/fMbKGZpd2KM2Y21czWm9niWvvam9nfzeyDxGO7KGOsr31c0yQz+zjxPS00sxFRxlgfZtbNzF4zs6VmtsTMrk/sT8vvaT/Xk87fUQsze9vMFiWu6bbE/h5m9lYi5z2VmOqmYedKpzr+xCIu/wTOJEzn/A5wsbsvjTSwBjKzVUC+u6flwBMzOwWoAP7k7v0T++4GPnP3OxM36HbuPjHKOOtjH9c0Cahw919EGdvBMLMjgCPcfb6ZHQrMA84FLicNv6f9XM+FpO93ZEBrd68ws2zgH8D1wA+AZ939STN7CFjk7r9pyLnSrcSvRVyaIHcvBD7bbfco4NHE80cJ/1OmjX1cU9py97XuPj/xfDOwjLAORlp+T/u5nrTlQUXiZXZic+A04JnE/kb5jtIt8WfqIi4OvGxm88zsqqiDaSSHu/vaxPNPgcOjDKYRXWtm7yaqgtKiWmR3ZtYdOB54iwz4nna7Hkjj78jMmpvZQmA98HfgX8BGd9+eeEuj5Lx0S/yZ6mR3/zphHeKxiWqGjOGhPjF96hT37TfA0cBAYC1wT7Th1J+Z5QLTgRvcfVPtY+n4Pe3letL6O3L3KncfSFinZDDQJxnnSbfEX6dFXNKNu3+ceFwPzCB84eluXaIetro+dn3E8TSYu69L/I+5A/gdafY9JeqNpwOPu/uzid1p+z3t7XrS/Tuq5u4bgdeAE4G2ZlY9k3Kj5Lx0S/wZt4iLmbVONE5hZq2B4cDi/X8qLTwPXJZ4fhnwvxHG0iiqE2TCeaTR95RoOHwEWObu99Y6lJbf076uJ82/o45m1jbxvCWhE8sywg3ggsTbGuU7SqtePQCJ7lm/pGYRl8kRh9QgZtaTUMqHsD7CE+l2TWY2DSggTCG7DvgJ8BzwNPAVwpTaF7p72jSW7uOaCghVCA6sAq6uVT/epJnZycDrwHvAjsTumwn14mn3Pe3nei4mfb+jAYTG2+aEQvnT7v7TRI54EmgPLAAucfdtDTpXuiV+ERFpmHSr6hERkQZS4hcRiRklfhGRmFHiFxGJGSV+EZGYUeIXAcysqtaMjgsbc+ZXM+tee5ZPkahlHfgtIrGwJTFUXiTjqcQvsh+JtRLuTqyX8LaZHZPY393MXk1MBjbLzL6S2H+4mc1IzKm+yMyGJv5UczP7XWKe9ZcTIzNFIqHELxK03K2q51u1jpW7+3HArwmjxgF+BTzq7gOAx4Epif1TgDnu/jXg68CSxP5ewAPu3g/YCJyf5OsR2SeN3BUBzKzC3XP3sn8VcJq7r0hMCvapu3cws1LCQiCVif1r3T3PzEqArrWH1CemDf67u/dKvJ4IZLv77cm/MpE9qcQvcmC+j+f1UXtulSrUviYRUuIXObBv1Xqcm3j+JmF2WIAxhAnDAGYB18DORTXapCpIkbpSqUMkaJlY+ajaS+5e3aWznZm9Syi1X5zYdx3wBzObAJQAVzfiJpQAAABQSURBVCT2Xw88bGbfJZTsryEsCCLSZKiOX2Q/EnX8+e5eGnUsIo1FVT0iIjGjEr+ISMyoxC8iEjNK/CIiMaPELyISM0r8IiIxo8QvIhIz/x9On3wOiMO+jAAAAABJRU5ErkJggg==", 608 | "text/plain": [ 609 | "
" 610 | ] 611 | }, 612 | "metadata": { 613 | "needs_background": "light" 614 | }, 615 | "output_type": "display_data" 616 | } 617 | ], 618 | "source": [ 619 | "\n", 620 | "epoch_count = range(1, len(total_training_loss) + 1)\n", 621 | "\n", 622 | "# Visualize loss history\n", 623 | "plt.plot(epoch_count, total_training_loss, 'r--')\n", 624 | "plt.legend(['Training Loss', 'Test Loss'])\n", 625 | "plt.xlabel('Epoch')\n", 626 | "plt.ylabel('Loss')\n", 627 | "plt.show();" 628 | ] 629 | } 630 | ], 631 | "metadata": { 632 | "accelerator": "GPU", 633 | "colab": { 634 | "collapsed_sections": [], 635 | "machine_shape": "hm", 636 | "name": "LeNet_From_Scratch.ipynb", 637 | "provenance": [] 638 | }, 639 | "interpreter": { 640 | "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930" 641 | }, 642 | "kernelspec": { 643 | "display_name": "Python 3.9.10 64-bit", 644 | "language": "python", 645 | "name": "python3" 646 | }, 647 | "language_info": { 648 | "codemirror_mode": { 649 | "name": "ipython", 650 | "version": 3 651 | }, 652 | "file_extension": ".py", 653 | "mimetype": "text/x-python", 654 | "name": "python", 655 | "nbconvert_exporter": "python", 656 | "pygments_lexer": "ipython3", 657 | "version": "3.9.10" 658 | }, 659 | "orig_nbformat": 4 660 | }, 661 | "nbformat": 4, 662 | "nbformat_minor": 0 663 | } 664 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## This Repo has all the Deep Learning Paper Implementations codes for my [YouTube Channel](https://www.youtube.com/channel/UC0_a8SNpTFkmVv5SLMs1CIA/featured) 2 | 3 | [yt_cover]: /assets/Youtube_Cover.jpg 4 | 5 | [![Youtube Link][yt_cover]](https://www.youtube.com/channel/UC0_a8SNpTFkmVv5SLMs1CIA/videos) 6 | 7 | ### Author: Rohan Paul 8 | 9 | #### Find me here.. 10 | 11 | - 🐦 TWITTER: https://twitter.com/rohanpaul_ai 12 | - ​👨‍🔧​ Kaggle: https://www.kaggle.com/paulrohan2020 13 | - 👨🏻‍💼 LINKEDIN: https://www.linkedin.com/in/rohan-paul-b27285129/ 14 | - 👨‍💻 GITHUB: https://github.com/rohan-paul 15 | - 🤖: My Website and Blog: https://rohan-paul-ai.netlify.app/ 16 | - 🧑‍🦰 Facebook Page: https://www.facebook.com/Computer-Vision-with-Rohan-Paul-109348958325690 17 | - 📸 Instagram: https://www.instagram.com/rohan_paul_2020/ 18 | 19 | --- 20 | 21 | [logo]: https://raw.githubusercontent.com/rohan-paul/MachineLearning-DeepLearning-Code-for-my-Youtube-Channel/master/assets/yt_logo.png 22 | 23 | -------------------------------------------------------------------------------- /ResNet56_PyTorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## ResNet Paper Implementation from Scratch with PyTorch on CIFAR-10 Dataset\n", 8 | "\n", 9 | "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=P8U1VL93jzA&list=PLxqBkZuBynVRX6QExfPyzRGj5Ap_zmcAJ&index=6)\n", 10 | "\n", 11 | "[![Imgur](https://imgur.com/NhVhb4u.png)](https://www.youtube.com/watch?v=P8U1VL93jzA&list=PLxqBkZuBynVRX6QExfPyzRGj5Ap_zmcAJ&index=6)\n", 12 | "\n", 13 | "---\n", 14 | "\n", 15 | "The below comments are taken from the original paper from implementing ResNet on CIFAR-10 Dataset. And I will follow this structure for this implementation of ResNet on CIFAR10.\n", 16 | "\n", 17 | "\"We conducted more studies on the CIFAR-10 dataset\n", 18 | "which consists of 50k training images and 10k test-\n", 19 | "ing images in 10 classes.\n", 20 | "\n", 21 | " The network inputs are 32×32 images, with\n", 22 | "the per-pixel mean subtracted. The first layer is 3×3 convolutions. Then we use a stack of 6n layers with 3×3 convolutions on the feature maps of sizes {32, 16, 8} respectively,\n", 23 | "with 2n layers for each feature map size. The numbers of\n", 24 | "filters are {16, 32, 64} respectively.\n", 25 | "\n", 26 | "The subsampling is performed by convolutions with a stride of 2. The network ends\n", 27 | "with a global average pooling, a 10-way fully-connected\n", 28 | "layer, and softmax.\n", 29 | "\n", 30 | "There are totally 6n+2 stacked weighted layers.\n", 31 | "\n", 32 | "We compare n = {3, 5, 7, 9}, leading to 20, 32, 44, and 56-layer networks.\n", 33 | "\n", 34 | "When shortcut connections are used, they are connected\n", 35 | "to the pairs of 3×3 layers (totally 3n shortcuts).\n", 36 | "\n", 37 | "On this dataset we use identity shortcuts in all cases (i.e., option A), so our residual models have exactly the same depth, width, and number of parameters as the plain counterparts.\"\n", 38 | "---\n", 39 | "\n", 40 | "For this example here in this file, I have used n=9. So my ResNet blocks are like [9, 9, 9]\n", 41 | "\n", 42 | "And thats why the total layers are 56 (i.e. 9 * 6 + 2)\n", 43 | "\n", 44 | "![Imgur](https://imgur.com/ifD8qbd.png)\n", 45 | "\n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 1, 51 | "metadata": { 52 | "id": "9g7Qx3Sb7-tD" 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import os\n", 57 | "import shutil\n", 58 | "from collections import OrderedDict\n", 59 | "\n", 60 | "import torch\n", 61 | "import torch.nn as nn\n", 62 | "import torch.nn.functional as F\n", 63 | "import torch.optim as optim\n", 64 | "from torchvision import transforms, datasets\n", 65 | "from torchsummary import summary\n", 66 | "from torch.utils.data import Dataset, DataLoader, random_split" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 2, 72 | "metadata": { 73 | "colab": { 74 | "base_uri": "https://localhost:8080/" 75 | }, 76 | "id": "9mPnawhC7-tF", 77 | "outputId": "646b4228-0910-48d0-c56a-e0cf599b801b" 78 | }, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "Mounted at /content/drive\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "from google.colab import drive\n", 90 | "drive.mount('/content/drive')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "metadata": { 97 | "colab": { 98 | "base_uri": "https://localhost:8080/" 99 | }, 100 | "id": "5ZL8T3Do8J5s", 101 | "outputId": "81edc139-34c1-4dbe-cb6e-06d1e48ef1a6" 102 | }, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Sat Mar 5 13:11:50 2022 \n", 109 | "+-----------------------------------------------------------------------------+\n", 110 | "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", 111 | "|-------------------------------+----------------------+----------------------+\n", 112 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 113 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 114 | "| | | MIG M. |\n", 115 | "|===============================+======================+======================|\n", 116 | "| 0 Tesla P100-PCIE... Off | 00000000:00:04.0 Off | 0 |\n", 117 | "| N/A 35C P0 26W / 250W | 0MiB / 16280MiB | 0% Default |\n", 118 | "| | | N/A |\n", 119 | "+-------------------------------+----------------------+----------------------+\n", 120 | " \n", 121 | "+-----------------------------------------------------------------------------+\n", 122 | "| Processes: |\n", 123 | "| GPU GI CI PID Type Process name GPU Memory |\n", 124 | "| ID ID Usage |\n", 125 | "|=============================================================================|\n", 126 | "| No running processes found |\n", 127 | "+-----------------------------------------------------------------------------+\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "!nvidia-smi" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 4, 138 | "metadata": { 139 | "id": "pwngtd6i7-tL" 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "class LambdaLayer(nn.Module):\n", 144 | " \n", 145 | " def __init__(self, lambd):\n", 146 | " super(LambdaLayer, self).__init__()\n", 147 | " self.lambd = lambd\n", 148 | " \n", 149 | " def forward(self, x):\n", 150 | " return self.lambd(x)\n", 151 | "\n", 152 | "class BasicConvBlock(nn.Module):\n", 153 | " \n", 154 | " ''' The BasicConvBlock takes an input with in_channels, applies some blocks of convolutional layers \n", 155 | " to reduce it to out_channels and sum it up to the original input. \n", 156 | " If their sizes mismatch, then the input goes into an identity. \n", 157 | " \n", 158 | " Basically The BasicConvBlock will implement the regular basic Conv Block + \n", 159 | " the shortcut block that does the dimension matching job (option A or B) when dimension changes between 2 blocks\n", 160 | " '''\n", 161 | " \n", 162 | " def __init__(self, in_channels, out_channels, stride=1, option='A'):\n", 163 | " super(BasicConvBlock, self).__init__()\n", 164 | " \n", 165 | " self.features = nn.Sequential(OrderedDict([\n", 166 | " ('conv1', nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)),\n", 167 | " ('bn1', nn.BatchNorm2d(out_channels)),\n", 168 | " ('act1', nn.ReLU()),\n", 169 | " ('conv2', nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)),\n", 170 | " ('bn2', nn.BatchNorm2d(out_channels))\n", 171 | " ]))\n", 172 | "\n", 173 | " self.shortcut = nn.Sequential()\n", 174 | " \n", 175 | " ''' When input and output spatial dimensions don't match, we have 2 options, with stride:\n", 176 | " - A) Use identity shortcuts with zero padding to increase channel dimension. \n", 177 | " - B) Use 1x1 convolution to increase channel dimension (projection shortcut).\n", 178 | " '''\n", 179 | " if stride != 1 or in_channels != out_channels:\n", 180 | " if option == 'A':\n", 181 | " # Use identity shortcuts with zero padding to increase channel dimension.\n", 182 | " pad_to_add = out_channels//4\n", 183 | " ''' ::2 is doing the job of stride = 2\n", 184 | " F.pad apply padding to (W,H,C,N).\n", 185 | " \n", 186 | " The padding lengths are specified in reverse order of the dimensions,\n", 187 | " F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0))\n", 188 | "\n", 189 | " [width_beginning, width_end, height_beginning, height_end, channel_beginning, channel_end, batchLength_beginning, batchLength_end ]\n", 190 | "\n", 191 | " '''\n", 192 | " self.shortcut = LambdaLayer(lambda x:\n", 193 | " F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad_to_add, pad_to_add, 0,0)))\n", 194 | " if option == 'B':\n", 195 | " self.shortcut = nn.Sequential(OrderedDict([\n", 196 | " ('s_conv1', nn.Conv2d(in_channels, 2*out_channels, kernel_size=1, stride=stride, padding=0, bias=False)),\n", 197 | " ('s_bn1', nn.BatchNorm2d(2*out_channels))\n", 198 | " ]))\n", 199 | " \n", 200 | " def forward(self, x):\n", 201 | " out = self.features(x)\n", 202 | " # sum it up with shortcut layer\n", 203 | " out += self.shortcut(x)\n", 204 | " out = F.relu(out)\n", 205 | " return out" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "\n", 213 | "\n", 214 | "### Explanations on using Option A and B in below code\n", 215 | "\n", 216 | "```py\n", 217 | "\n", 218 | "if stride != 1 or in_channels != out_channels:\n", 219 | " if option == 'A':\n", 220 | " pad = out_channels//4\n", 221 | " # ::2 replace the stride 2 + F.pad apply padding to (W,H,C,N).\n", 222 | " self.shortcut = LambdaLayer(lambda x:\n", 223 | " F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0)))\n", 224 | " if option == 'B':\n", 225 | " self.shortcut = nn.Sequential(OrderedDict([\n", 226 | " ('s_conv1', nn.Conv2d(in_channels, 2*out_channels, kernel_size=1, stride=stride, padding=0, bias=False)),\n", 227 | " ('s_bn1', nn.BatchNorm2d(2*out_channels))\n", 228 | " ]))\n", 229 | "\n", 230 | "```\n", 231 | "\n", 232 | "As per the original Paper\n", 233 | "\n", 234 | "#### We use identity shortcuts when input and output channel dimensions are the same.\n", 235 | "\n", 236 | "#### Otherwise, When input and output spatial dimensions don't match, we have 2 options, with stride:\n", 237 | "\n", 238 | " - A) Use identity shortcuts with zero padding to increase channel dimension.\n", 239 | "\n", 240 | " - B) Use 1x1 convolution to increase channel dimension (projection shortcut).\n", 241 | "\n", 242 | "-----------------------\n", 243 | "\n", 244 | "### Understanding `F.pad` on a 4-D Tensor and the following line\n", 245 | "\n", 246 | "### `F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0)))`\n", 247 | "\n", 248 | "https://stackoverflow.com/a/61945903/1902852\n", 249 | "\n", 250 | "The padding lengths are specified in reverse order of the dimensions, where every dimension has two values, one for the padding at the beginning and one for the padding at the end.\n", 251 | "\n", 252 | "For an image with the dimensions `[channels, height, width]` the padding is given as:\n", 253 | "\n", 254 | "`[width_beginning, width_end, height_beginning, height_end, channels_beginning, channels_end]`,\n", 255 | "\n", 256 | "which can be reworded to\n", 257 | "\n", 258 | "`[left, right, top, bottom]`\n", 259 | "\n", 260 | "Therefore the code above pads the images to the right and bottom. The channels are left out, because they are not being padded, which also means that the same padding could be directly applied to the masks.\n", 261 | "\n", 262 | "So the below line means\n", 263 | "\n", 264 | "`F.pad(x[:, :, ::2, ::2], (0,0, 0,0, pad,pad, 0,0))`\n", 265 | "\n", 266 | "\n", 267 | "`[width_beginning, width_end, height_beginning, height_end, channel_beginning, channel_end, batchLength_beginning, batchLength_end ]`" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 5, 273 | "metadata": { 274 | "id": "6jvgyHXI7-tM" 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "\n", 279 | "class ResNet(nn.Module):\n", 280 | " \"\"\"\n", 281 | " ResNet-56 architecture for CIFAR-10 Dataset of shape 32*32*3\n", 282 | " \"\"\"\n", 283 | " def __init__(self, block_type, num_blocks):\n", 284 | " super(ResNet, self).__init__()\n", 285 | " \n", 286 | " self.in_channels = 16\n", 287 | " \n", 288 | " self.conv0 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)\n", 289 | " self.bn0 = nn.BatchNorm2d(16)\n", 290 | " \n", 291 | " self.block1 = self.__build_layer(block_type, 16, num_blocks[0], starting_stride=1)\n", 292 | " \n", 293 | " self.block2 = self.__build_layer(block_type, 32, num_blocks[1], starting_stride=2)\n", 294 | " \n", 295 | " self.block3 = self.__build_layer(block_type, 64, num_blocks[2], starting_stride=2)\n", 296 | " \n", 297 | " self.avgpool = nn.AdaptiveAvgPool2d((1,1))\n", 298 | " self.linear = nn.Linear(64, 10)\n", 299 | " \n", 300 | " def __build_layer(self, block_type, out_channels, num_blocks, starting_stride):\n", 301 | " \n", 302 | " strides_list_for_current_block = [starting_stride] + [1]*(num_blocks-1)\n", 303 | " ''' Above line will generate an array whose first element is starting_stride\n", 304 | " And it will have (num_blocks-1) more elements each of value 1\n", 305 | " '''\n", 306 | " # print('strides_list_for_current_block ', strides_list_for_current_block)\n", 307 | " \n", 308 | " layers = []\n", 309 | " \n", 310 | " for stride in strides_list_for_current_block:\n", 311 | " layers.append(block_type(self.in_channels, out_channels, stride))\n", 312 | " self.in_channels = out_channels\n", 313 | " \n", 314 | " return nn.Sequential(*layers)\n", 315 | " \n", 316 | " def forward(self, x):\n", 317 | " out = F.relu(self.bn0(self.conv0(x)))\n", 318 | " out = self.block1(out)\n", 319 | " out = self.block2(out) \n", 320 | " out = self.block3(out)\n", 321 | " out = self.avgpool(out)\n", 322 | " out = torch.flatten(out, 1)\n", 323 | " out = self.linear(out)\n", 324 | " return out" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### _build_layer() method\n", 332 | "\n", 333 | "In ResNet Every layer downsamples the input at the start using stride equals to 2 i.e for 1st convolutional layer in 1st block of a layer.\n", 334 | "\n", 335 | "If we look at the first operation of each layer, we see that the stride used at that first one is 2, instead of 1 like for the rest of them.\n", 336 | "\n", 337 | "This is because, here in ResNet, reduction between layers is achieved by an increase on the stride, from 1 to 2, at the first convolution of each layer; instead of by a pooling operation, which we are used to see as down samplers.\n", 338 | "\n", 339 | "Quoting from Paper\n", 340 | "\n", 341 | "\" For both options, when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2.\"" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 6, 347 | "metadata": { 348 | "id": "pnvR0RuC7-tN" 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "def ResNet56():\n", 353 | " return ResNet(block_type=BasicConvBlock, num_blocks=[9,9,9])" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 7, 359 | "metadata": { 360 | "colab": { 361 | "base_uri": "https://localhost:8080/" 362 | }, 363 | "id": "47k9cNnn7-tN", 364 | "outputId": "9832007a-4e1f-4c88-c4c0-931f0af67bf6" 365 | }, 366 | "outputs": [ 367 | { 368 | "name": "stdout", 369 | "output_type": "stream", 370 | "text": [ 371 | "----------------------------------------------------------------\n", 372 | " Layer (type) Output Shape Param #\n", 373 | "================================================================\n", 374 | " Conv2d-1 [-1, 16, 32, 32] 432\n", 375 | " BatchNorm2d-2 [-1, 16, 32, 32] 32\n", 376 | " Conv2d-3 [-1, 16, 32, 32] 2,304\n", 377 | " BatchNorm2d-4 [-1, 16, 32, 32] 32\n", 378 | " ReLU-5 [-1, 16, 32, 32] 0\n", 379 | " Conv2d-6 [-1, 16, 32, 32] 2,304\n", 380 | " BatchNorm2d-7 [-1, 16, 32, 32] 32\n", 381 | " BasicConvBlock-8 [-1, 16, 32, 32] 0\n", 382 | " Conv2d-9 [-1, 16, 32, 32] 2,304\n", 383 | " BatchNorm2d-10 [-1, 16, 32, 32] 32\n", 384 | " ReLU-11 [-1, 16, 32, 32] 0\n", 385 | " Conv2d-12 [-1, 16, 32, 32] 2,304\n", 386 | " BatchNorm2d-13 [-1, 16, 32, 32] 32\n", 387 | " BasicConvBlock-14 [-1, 16, 32, 32] 0\n", 388 | " Conv2d-15 [-1, 16, 32, 32] 2,304\n", 389 | " BatchNorm2d-16 [-1, 16, 32, 32] 32\n", 390 | " ReLU-17 [-1, 16, 32, 32] 0\n", 391 | " Conv2d-18 [-1, 16, 32, 32] 2,304\n", 392 | " BatchNorm2d-19 [-1, 16, 32, 32] 32\n", 393 | " BasicConvBlock-20 [-1, 16, 32, 32] 0\n", 394 | " Conv2d-21 [-1, 16, 32, 32] 2,304\n", 395 | " BatchNorm2d-22 [-1, 16, 32, 32] 32\n", 396 | " ReLU-23 [-1, 16, 32, 32] 0\n", 397 | " Conv2d-24 [-1, 16, 32, 32] 2,304\n", 398 | " BatchNorm2d-25 [-1, 16, 32, 32] 32\n", 399 | " BasicConvBlock-26 [-1, 16, 32, 32] 0\n", 400 | " Conv2d-27 [-1, 16, 32, 32] 2,304\n", 401 | " BatchNorm2d-28 [-1, 16, 32, 32] 32\n", 402 | " ReLU-29 [-1, 16, 32, 32] 0\n", 403 | " Conv2d-30 [-1, 16, 32, 32] 2,304\n", 404 | " BatchNorm2d-31 [-1, 16, 32, 32] 32\n", 405 | " BasicConvBlock-32 [-1, 16, 32, 32] 0\n", 406 | " Conv2d-33 [-1, 16, 32, 32] 2,304\n", 407 | " BatchNorm2d-34 [-1, 16, 32, 32] 32\n", 408 | " ReLU-35 [-1, 16, 32, 32] 0\n", 409 | " Conv2d-36 [-1, 16, 32, 32] 2,304\n", 410 | " BatchNorm2d-37 [-1, 16, 32, 32] 32\n", 411 | " BasicConvBlock-38 [-1, 16, 32, 32] 0\n", 412 | " Conv2d-39 [-1, 16, 32, 32] 2,304\n", 413 | " BatchNorm2d-40 [-1, 16, 32, 32] 32\n", 414 | " ReLU-41 [-1, 16, 32, 32] 0\n", 415 | " Conv2d-42 [-1, 16, 32, 32] 2,304\n", 416 | " BatchNorm2d-43 [-1, 16, 32, 32] 32\n", 417 | " BasicConvBlock-44 [-1, 16, 32, 32] 0\n", 418 | " Conv2d-45 [-1, 16, 32, 32] 2,304\n", 419 | " BatchNorm2d-46 [-1, 16, 32, 32] 32\n", 420 | " ReLU-47 [-1, 16, 32, 32] 0\n", 421 | " Conv2d-48 [-1, 16, 32, 32] 2,304\n", 422 | " BatchNorm2d-49 [-1, 16, 32, 32] 32\n", 423 | " BasicConvBlock-50 [-1, 16, 32, 32] 0\n", 424 | " Conv2d-51 [-1, 16, 32, 32] 2,304\n", 425 | " BatchNorm2d-52 [-1, 16, 32, 32] 32\n", 426 | " ReLU-53 [-1, 16, 32, 32] 0\n", 427 | " Conv2d-54 [-1, 16, 32, 32] 2,304\n", 428 | " BatchNorm2d-55 [-1, 16, 32, 32] 32\n", 429 | " BasicConvBlock-56 [-1, 16, 32, 32] 0\n", 430 | " Conv2d-57 [-1, 32, 16, 16] 4,608\n", 431 | " BatchNorm2d-58 [-1, 32, 16, 16] 64\n", 432 | " ReLU-59 [-1, 32, 16, 16] 0\n", 433 | " Conv2d-60 [-1, 32, 16, 16] 9,216\n", 434 | " BatchNorm2d-61 [-1, 32, 16, 16] 64\n", 435 | " LambdaLayer-62 [-1, 32, 16, 16] 0\n", 436 | " BasicConvBlock-63 [-1, 32, 16, 16] 0\n", 437 | " Conv2d-64 [-1, 32, 16, 16] 9,216\n", 438 | " BatchNorm2d-65 [-1, 32, 16, 16] 64\n", 439 | " ReLU-66 [-1, 32, 16, 16] 0\n", 440 | " Conv2d-67 [-1, 32, 16, 16] 9,216\n", 441 | " BatchNorm2d-68 [-1, 32, 16, 16] 64\n", 442 | " BasicConvBlock-69 [-1, 32, 16, 16] 0\n", 443 | " Conv2d-70 [-1, 32, 16, 16] 9,216\n", 444 | " BatchNorm2d-71 [-1, 32, 16, 16] 64\n", 445 | " ReLU-72 [-1, 32, 16, 16] 0\n", 446 | " Conv2d-73 [-1, 32, 16, 16] 9,216\n", 447 | " BatchNorm2d-74 [-1, 32, 16, 16] 64\n", 448 | " BasicConvBlock-75 [-1, 32, 16, 16] 0\n", 449 | " Conv2d-76 [-1, 32, 16, 16] 9,216\n", 450 | " BatchNorm2d-77 [-1, 32, 16, 16] 64\n", 451 | " ReLU-78 [-1, 32, 16, 16] 0\n", 452 | " Conv2d-79 [-1, 32, 16, 16] 9,216\n", 453 | " BatchNorm2d-80 [-1, 32, 16, 16] 64\n", 454 | " BasicConvBlock-81 [-1, 32, 16, 16] 0\n", 455 | " Conv2d-82 [-1, 32, 16, 16] 9,216\n", 456 | " BatchNorm2d-83 [-1, 32, 16, 16] 64\n", 457 | " ReLU-84 [-1, 32, 16, 16] 0\n", 458 | " Conv2d-85 [-1, 32, 16, 16] 9,216\n", 459 | " BatchNorm2d-86 [-1, 32, 16, 16] 64\n", 460 | " BasicConvBlock-87 [-1, 32, 16, 16] 0\n", 461 | " Conv2d-88 [-1, 32, 16, 16] 9,216\n", 462 | " BatchNorm2d-89 [-1, 32, 16, 16] 64\n", 463 | " ReLU-90 [-1, 32, 16, 16] 0\n", 464 | " Conv2d-91 [-1, 32, 16, 16] 9,216\n", 465 | " BatchNorm2d-92 [-1, 32, 16, 16] 64\n", 466 | " BasicConvBlock-93 [-1, 32, 16, 16] 0\n", 467 | " Conv2d-94 [-1, 32, 16, 16] 9,216\n", 468 | " BatchNorm2d-95 [-1, 32, 16, 16] 64\n", 469 | " ReLU-96 [-1, 32, 16, 16] 0\n", 470 | " Conv2d-97 [-1, 32, 16, 16] 9,216\n", 471 | " BatchNorm2d-98 [-1, 32, 16, 16] 64\n", 472 | " BasicConvBlock-99 [-1, 32, 16, 16] 0\n", 473 | " Conv2d-100 [-1, 32, 16, 16] 9,216\n", 474 | " BatchNorm2d-101 [-1, 32, 16, 16] 64\n", 475 | " ReLU-102 [-1, 32, 16, 16] 0\n", 476 | " Conv2d-103 [-1, 32, 16, 16] 9,216\n", 477 | " BatchNorm2d-104 [-1, 32, 16, 16] 64\n", 478 | " BasicConvBlock-105 [-1, 32, 16, 16] 0\n", 479 | " Conv2d-106 [-1, 32, 16, 16] 9,216\n", 480 | " BatchNorm2d-107 [-1, 32, 16, 16] 64\n", 481 | " ReLU-108 [-1, 32, 16, 16] 0\n", 482 | " Conv2d-109 [-1, 32, 16, 16] 9,216\n", 483 | " BatchNorm2d-110 [-1, 32, 16, 16] 64\n", 484 | " BasicConvBlock-111 [-1, 32, 16, 16] 0\n", 485 | " Conv2d-112 [-1, 64, 8, 8] 18,432\n", 486 | " BatchNorm2d-113 [-1, 64, 8, 8] 128\n", 487 | " ReLU-114 [-1, 64, 8, 8] 0\n", 488 | " Conv2d-115 [-1, 64, 8, 8] 36,864\n", 489 | " BatchNorm2d-116 [-1, 64, 8, 8] 128\n", 490 | " LambdaLayer-117 [-1, 64, 8, 8] 0\n", 491 | " BasicConvBlock-118 [-1, 64, 8, 8] 0\n", 492 | " Conv2d-119 [-1, 64, 8, 8] 36,864\n", 493 | " BatchNorm2d-120 [-1, 64, 8, 8] 128\n", 494 | " ReLU-121 [-1, 64, 8, 8] 0\n", 495 | " Conv2d-122 [-1, 64, 8, 8] 36,864\n", 496 | " BatchNorm2d-123 [-1, 64, 8, 8] 128\n", 497 | " BasicConvBlock-124 [-1, 64, 8, 8] 0\n", 498 | " Conv2d-125 [-1, 64, 8, 8] 36,864\n", 499 | " BatchNorm2d-126 [-1, 64, 8, 8] 128\n", 500 | " ReLU-127 [-1, 64, 8, 8] 0\n", 501 | " Conv2d-128 [-1, 64, 8, 8] 36,864\n", 502 | " BatchNorm2d-129 [-1, 64, 8, 8] 128\n", 503 | " BasicConvBlock-130 [-1, 64, 8, 8] 0\n", 504 | " Conv2d-131 [-1, 64, 8, 8] 36,864\n", 505 | " BatchNorm2d-132 [-1, 64, 8, 8] 128\n", 506 | " ReLU-133 [-1, 64, 8, 8] 0\n", 507 | " Conv2d-134 [-1, 64, 8, 8] 36,864\n", 508 | " BatchNorm2d-135 [-1, 64, 8, 8] 128\n", 509 | " BasicConvBlock-136 [-1, 64, 8, 8] 0\n", 510 | " Conv2d-137 [-1, 64, 8, 8] 36,864\n", 511 | " BatchNorm2d-138 [-1, 64, 8, 8] 128\n", 512 | " ReLU-139 [-1, 64, 8, 8] 0\n", 513 | " Conv2d-140 [-1, 64, 8, 8] 36,864\n", 514 | " BatchNorm2d-141 [-1, 64, 8, 8] 128\n", 515 | " BasicConvBlock-142 [-1, 64, 8, 8] 0\n", 516 | " Conv2d-143 [-1, 64, 8, 8] 36,864\n", 517 | " BatchNorm2d-144 [-1, 64, 8, 8] 128\n", 518 | " ReLU-145 [-1, 64, 8, 8] 0\n", 519 | " Conv2d-146 [-1, 64, 8, 8] 36,864\n", 520 | " BatchNorm2d-147 [-1, 64, 8, 8] 128\n", 521 | " BasicConvBlock-148 [-1, 64, 8, 8] 0\n", 522 | " Conv2d-149 [-1, 64, 8, 8] 36,864\n", 523 | " BatchNorm2d-150 [-1, 64, 8, 8] 128\n", 524 | " ReLU-151 [-1, 64, 8, 8] 0\n", 525 | " Conv2d-152 [-1, 64, 8, 8] 36,864\n", 526 | " BatchNorm2d-153 [-1, 64, 8, 8] 128\n", 527 | " BasicConvBlock-154 [-1, 64, 8, 8] 0\n", 528 | " Conv2d-155 [-1, 64, 8, 8] 36,864\n", 529 | " BatchNorm2d-156 [-1, 64, 8, 8] 128\n", 530 | " ReLU-157 [-1, 64, 8, 8] 0\n", 531 | " Conv2d-158 [-1, 64, 8, 8] 36,864\n", 532 | " BatchNorm2d-159 [-1, 64, 8, 8] 128\n", 533 | " BasicConvBlock-160 [-1, 64, 8, 8] 0\n", 534 | " Conv2d-161 [-1, 64, 8, 8] 36,864\n", 535 | " BatchNorm2d-162 [-1, 64, 8, 8] 128\n", 536 | " ReLU-163 [-1, 64, 8, 8] 0\n", 537 | " Conv2d-164 [-1, 64, 8, 8] 36,864\n", 538 | " BatchNorm2d-165 [-1, 64, 8, 8] 128\n", 539 | " BasicConvBlock-166 [-1, 64, 8, 8] 0\n", 540 | "AdaptiveAvgPool2d-167 [-1, 64, 1, 1] 0\n", 541 | " Linear-168 [-1, 10] 650\n", 542 | "================================================================\n", 543 | "Total params: 853,018\n", 544 | "Trainable params: 853,018\n", 545 | "Non-trainable params: 0\n", 546 | "----------------------------------------------------------------\n", 547 | "Input size (MB): 0.01\n", 548 | "Forward/backward pass size (MB): 12.16\n", 549 | "Params size (MB): 3.25\n", 550 | "Estimated Total Size (MB): 15.42\n", 551 | "----------------------------------------------------------------\n" 552 | ] 553 | } 554 | ], 555 | "source": [ 556 | "model = ResNet56()\n", 557 | "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n", 558 | "# device = 'cpu'\n", 559 | "model.to(device)\n", 560 | "summary(model, (3, 32, 32))" 561 | ] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": { 566 | "id": "QjPLm6BENFbq" 567 | }, 568 | "source": [ 569 | "## Loading CIFAR-10 Dataset" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 8, 575 | "metadata": { 576 | "id": "Us3eora2NFbs" 577 | }, 578 | "outputs": [], 579 | "source": [ 580 | "def dataloader_cifar():\n", 581 | " transform = transforms.Compose([transforms.ToTensor(),\n", 582 | " transforms.Normalize(mean=[0.5], std=[0.5])])\n", 583 | " \n", 584 | " # Input Data in Local Machine\n", 585 | " # train_dataset = datasets.CIFAR10('../input_data', train=True, download=True, transform=transform)\n", 586 | " # test_dataset = datasets.CIFAR10('../input_data', train=False, download=True, transform=transform)\n", 587 | " \n", 588 | " # Input Data in Google Drive\n", 589 | " train_dataset = datasets.CIFAR10('/content/drive/MyDrive/All_Datasets/CIFAR10', train=True, download=True, transform=transform)\n", 590 | " test_dataset = datasets.CIFAR10('/content/drive/MyDrive/All_Datasets/CIFAR10', train=False, download=True, transform=transform)\n", 591 | "\n", 592 | " # Split dataset into training set and validation set.\n", 593 | " train_dataset, val_dataset = random_split(train_dataset, (45000, 5000))\n", 594 | " \n", 595 | " print(\"Image shape of a random sample image : {}\".format(train_dataset[0][0].numpy().shape), end = '\\n\\n')\n", 596 | " \n", 597 | " print(\"Training Set: {} images\".format(len(train_dataset)))\n", 598 | " print(\"Validation Set: {} images\".format(len(val_dataset)))\n", 599 | " print(\"Test Set: {} images\".format(len(test_dataset)))\n", 600 | " \n", 601 | " BATCH_SIZE = 32\n", 602 | "\n", 603 | " # Generate dataloader\n", 604 | " train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)\n", 605 | " val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)\n", 606 | " test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=True)\n", 607 | " \n", 608 | " return train_loader, val_loader, test_loader" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 9, 614 | "metadata": { 615 | "colab": { 616 | "base_uri": "https://localhost:8080/" 617 | }, 618 | "id": "4E--f56MNFbs", 619 | "outputId": "771fcf9c-e61d-4613-cab6-0e1a97844207" 620 | }, 621 | "outputs": [ 622 | { 623 | "name": "stdout", 624 | "output_type": "stream", 625 | "text": [ 626 | "Files already downloaded and verified\n", 627 | "Files already downloaded and verified\n", 628 | "Image shape of a random sample image : (3, 32, 32)\n", 629 | "\n", 630 | "Training Set: 45000 images\n", 631 | "Validation Set: 5000 images\n", 632 | "Test Set: 10000 images\n" 633 | ] 634 | } 635 | ], 636 | "source": [ 637 | "train_loader, val_loader, test_loader = dataloader_cifar()" 638 | ] 639 | }, 640 | { 641 | "cell_type": "markdown", 642 | "metadata": { 643 | "id": "tOv_8vpcNFbt" 644 | }, 645 | "source": [ 646 | "## Start Actual Training" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 10, 652 | "metadata": { 653 | "id": "ICxcakbK8DRP" 654 | }, 655 | "outputs": [], 656 | "source": [ 657 | "criterion = nn.CrossEntropyLoss()\n", 658 | "optimizer = optim.Adam(model.parameters(), lr=0.01)" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 15, 664 | "metadata": { 665 | "id": "sB_vMJgl8Ei5" 666 | }, 667 | "outputs": [], 668 | "source": [ 669 | "def train_model():\n", 670 | " EPOCHS = 15\n", 671 | " train_samples_num = 45000\n", 672 | " val_samples_num = 5000\n", 673 | " train_costs, val_costs = [], []\n", 674 | " \n", 675 | " #Training phase. \n", 676 | " for epoch in range(EPOCHS):\n", 677 | "\n", 678 | " train_running_loss = 0\n", 679 | " correct_train = 0\n", 680 | " \n", 681 | " model.train().cuda()\n", 682 | " \n", 683 | " for inputs, labels in train_loader:\n", 684 | " inputs, labels = inputs.to(device), labels.to(device)\n", 685 | " \n", 686 | " \"\"\" for every mini-batch during the training phase, we typically want to explicitly set the gradients \n", 687 | " to zero before starting to do backpropragation \"\"\"\n", 688 | " optimizer.zero_grad()\n", 689 | " \n", 690 | " # Start the forward pass\n", 691 | " prediction = model(inputs)\n", 692 | " \n", 693 | " loss = criterion(prediction, labels)\n", 694 | " \n", 695 | " # do backpropagation and update weights with step()\n", 696 | " loss.backward() \n", 697 | " optimizer.step()\n", 698 | " \n", 699 | " # print('outputs on which to apply torch.max ', prediction)\n", 700 | " # find the maximum along the rows, use dim=1 to torch.max()\n", 701 | " _, predicted_outputs = torch.max(prediction.data, 1)\n", 702 | " \n", 703 | " # Update the running corrects \n", 704 | " correct_train += (predicted_outputs == labels).float().sum().item()\n", 705 | " \n", 706 | " ''' Compute batch loss\n", 707 | " multiply each average batch loss with batch-length. \n", 708 | " The batch-length is inputs.size(0) which gives the number total images in each batch. \n", 709 | " Essentially I am un-averaging the previously calculated Loss '''\n", 710 | " train_running_loss += (loss.data.item() * inputs.shape[0])\n", 711 | "\n", 712 | "\n", 713 | " train_epoch_loss = train_running_loss / train_samples_num\n", 714 | " \n", 715 | " train_costs.append(train_epoch_loss)\n", 716 | " \n", 717 | " train_acc = correct_train / train_samples_num\n", 718 | "\n", 719 | " # Now check trained weights on the validation set\n", 720 | " val_running_loss = 0\n", 721 | " correct_val = 0\n", 722 | " \n", 723 | " model.eval().cuda()\n", 724 | " \n", 725 | " with torch.no_grad():\n", 726 | " for inputs, labels in val_loader:\n", 727 | " inputs, labels = inputs.to(device), labels.to(device)\n", 728 | "\n", 729 | " # Forward pass.\n", 730 | " prediction = model(inputs)\n", 731 | "\n", 732 | " # Compute the loss.\n", 733 | " loss = criterion(prediction, labels)\n", 734 | "\n", 735 | " # Compute validation accuracy.\n", 736 | " _, predicted_outputs = torch.max(prediction.data, 1)\n", 737 | " correct_val += (predicted_outputs == labels).float().sum().item()\n", 738 | "\n", 739 | " # Compute batch loss.\n", 740 | " val_running_loss += (loss.data.item() * inputs.shape[0])\n", 741 | "\n", 742 | " val_epoch_loss = val_running_loss / val_samples_num\n", 743 | " val_costs.append(val_epoch_loss)\n", 744 | " val_acc = correct_val / val_samples_num\n", 745 | " \n", 746 | " info = \"[Epoch {}/{}]: train-loss = {:0.6f} | train-acc = {:0.3f} | val-loss = {:0.6f} | val-acc = {:0.3f}\"\n", 747 | " \n", 748 | " print(info.format(epoch+1, EPOCHS, train_epoch_loss, train_acc, val_epoch_loss, val_acc))\n", 749 | " \n", 750 | " torch.save(model.state_dict(), '/content/checkpoint_gpu_{}'.format(epoch + 1)) \n", 751 | " \n", 752 | " torch.save(model.state_dict(), '/content/resnet-56_weights_gpu') \n", 753 | " \n", 754 | " return train_costs, val_costs\n", 755 | "\n", 756 | " " 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": 16, 762 | "metadata": { 763 | "colab": { 764 | "base_uri": "https://localhost:8080/" 765 | }, 766 | "id": "JJaEBgbt9INy", 767 | "outputId": "56270746-ac9e-4ffd-b481-b3d0b901ca82" 768 | }, 769 | "outputs": [ 770 | { 771 | "name": "stdout", 772 | "output_type": "stream", 773 | "text": [ 774 | "[Epoch 1/15]: train-loss = 0.876177 | train-acc = 0.692 | val-loss = 0.001271 | val-acc = 0.734\n", 775 | "[Epoch 2/15]: train-loss = 0.694989 | train-acc = 0.759 | val-loss = 0.002828 | val-acc = 0.769\n", 776 | "[Epoch 3/15]: train-loss = 0.580110 | train-acc = 0.800 | val-loss = 0.000333 | val-acc = 0.778\n", 777 | "[Epoch 4/15]: train-loss = 0.492105 | train-acc = 0.829 | val-loss = 0.001201 | val-acc = 0.780\n", 778 | "[Epoch 5/15]: train-loss = 0.416747 | train-acc = 0.854 | val-loss = 0.001592 | val-acc = 0.810\n", 779 | "[Epoch 6/15]: train-loss = 0.351747 | train-acc = 0.877 | val-loss = 0.000720 | val-acc = 0.784\n", 780 | "[Epoch 7/15]: train-loss = 0.297035 | train-acc = 0.896 | val-loss = 0.001334 | val-acc = 0.794\n", 781 | "[Epoch 8/15]: train-loss = 0.248202 | train-acc = 0.912 | val-loss = 0.000497 | val-acc = 0.823\n", 782 | "[Epoch 9/15]: train-loss = 0.206233 | train-acc = 0.927 | val-loss = 0.001534 | val-acc = 0.814\n", 783 | "[Epoch 10/15]: train-loss = 0.169824 | train-acc = 0.940 | val-loss = 0.000273 | val-acc = 0.815\n", 784 | "[Epoch 11/15]: train-loss = 0.146284 | train-acc = 0.948 | val-loss = 0.000277 | val-acc = 0.818\n", 785 | "[Epoch 12/15]: train-loss = 0.126156 | train-acc = 0.955 | val-loss = 0.002989 | val-acc = 0.822\n", 786 | "[Epoch 13/15]: train-loss = 0.111513 | train-acc = 0.961 | val-loss = 0.000481 | val-acc = 0.824\n", 787 | "[Epoch 14/15]: train-loss = 0.109781 | train-acc = 0.962 | val-loss = 0.000927 | val-acc = 0.824\n", 788 | "[Epoch 15/15]: train-loss = 0.091795 | train-acc = 0.968 | val-loss = 0.003517 | val-acc = 0.833\n" 789 | ] 790 | } 791 | ], 792 | "source": [ 793 | "# !pwd\n", 794 | "train_costs, val_costs = train_model()" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": 17, 800 | "metadata": { 801 | "colab": { 802 | "base_uri": "https://localhost:8080/" 803 | }, 804 | "id": "kg45TKgb8N0k", 805 | "outputId": "fb8457c3-aaf2-42d1-d456-454012473c58" 806 | }, 807 | "outputs": [ 808 | { 809 | "data": { 810 | "text/plain": [ 811 | "" 812 | ] 813 | }, 814 | "execution_count": 17, 815 | "metadata": {}, 816 | "output_type": "execute_result" 817 | } 818 | ], 819 | "source": [ 820 | "#Restore the model.\n", 821 | "model = ResNet56()\n", 822 | "model.load_state_dict(torch.load('/content/resnet-56_weights_gpu'))" 823 | ] 824 | }, 825 | { 826 | "cell_type": "markdown", 827 | "metadata": { 828 | "id": "AODk608HNFbv" 829 | }, 830 | "source": [ 831 | "## Test the trained model on Test dataset" 832 | ] 833 | }, 834 | { 835 | "cell_type": "code", 836 | "execution_count": 18, 837 | "metadata": { 838 | "colab": { 839 | "base_uri": "https://localhost:8080/" 840 | }, 841 | "id": "MPbkor1g8Q3r", 842 | "outputId": "66352854-1346-4280-d198-c31310685fc7" 843 | }, 844 | "outputs": [ 845 | { 846 | "name": "stdout", 847 | "output_type": "stream", 848 | "text": [ 849 | "Test accuracy: 0.8344\n" 850 | ] 851 | } 852 | ], 853 | "source": [ 854 | "test_samples_num = 10000\n", 855 | "correct = 0 \n", 856 | "\n", 857 | "model.eval().cuda()\n", 858 | "\n", 859 | "with torch.no_grad():\n", 860 | " for inputs, labels in test_loader:\n", 861 | " inputs, labels = inputs.to(device), labels.to(device)\n", 862 | " # Make predictions.\n", 863 | " prediction = model(inputs)\n", 864 | "\n", 865 | " # Retrieve predictions indexes.\n", 866 | " _, predicted_class = torch.max(prediction.data, 1)\n", 867 | "\n", 868 | " # Compute number of correct predictions.\n", 869 | " correct += (predicted_class == labels).float().sum().item()\n", 870 | "\n", 871 | "test_accuracy = correct / test_samples_num\n", 872 | "print('Test accuracy: {}'.format(test_accuracy))" 873 | ] 874 | } 875 | ], 876 | "metadata": { 877 | "accelerator": "GPU", 878 | "colab": { 879 | "collapsed_sections": [], 880 | "name": "ResNet_MyWork.ipynb", 881 | "provenance": [] 882 | }, 883 | "interpreter": { 884 | "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930" 885 | }, 886 | "kernelspec": { 887 | "display_name": "Python 3.9.10 64-bit", 888 | "language": "python", 889 | "name": "python3" 890 | }, 891 | "language_info": { 892 | "codemirror_mode": { 893 | "name": "ipython", 894 | "version": 3 895 | }, 896 | "file_extension": ".py", 897 | "mimetype": "text/x-python", 898 | "name": "python", 899 | "nbconvert_exporter": "python", 900 | "pygments_lexer": "ipython3", 901 | "version": "3.9.10" 902 | }, 903 | "orig_nbformat": 4 904 | }, 905 | "nbformat": 4, 906 | "nbformat_minor": 0 907 | } 908 | -------------------------------------------------------------------------------- /Unet_Brain_segmentation_unet_with_keras/unet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | plt.style.use("ggplot") 8 | # %matplotlib inline 9 | 10 | import cv2 11 | from tqdm import tqdm_notebook, tnrange 12 | from glob import glob 13 | from itertools import chain 14 | from skimage.io import imread, imshow, concatenate_images 15 | from skimage.transform import resize 16 | from skimage.morphology import label 17 | from sklearn.model_selection import train_test_split 18 | 19 | import tensorflow as tf 20 | from skimage.color import rgb2gray 21 | from tensorflow.keras import Input 22 | from tensorflow.keras.models import Model, load_model, save_model 23 | from tensorflow.keras.layers import ( 24 | Input, 25 | Activation, 26 | BatchNormalization, 27 | Dropout, 28 | Lambda, 29 | Conv2D, 30 | Conv2DTranspose, 31 | MaxPooling2D, 32 | concatenate, 33 | ) 34 | from tensorflow.keras.optimizers import Adam 35 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint 36 | 37 | from tensorflow.keras import backend as K 38 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 39 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint 40 | 41 | 42 | def unet(input_size=(256, 256, 3)): 43 | inputs = Input(input_size) 44 | 45 | # First DownConvolution / Encoder Leg will begin, so start with Conv2D 46 | conv1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(inputs) 47 | bn1 = Activation("relu")(conv1) 48 | conv1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(bn1) 49 | bn1 = BatchNormalization(axis=3)(conv1) 50 | bn1 = Activation("relu")(bn1) 51 | pool1 = MaxPooling2D(pool_size=(2, 2))(bn1) 52 | 53 | conv2 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(pool1) 54 | bn2 = Activation("relu")(conv2) 55 | conv2 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(bn2) 56 | bn2 = BatchNormalization(axis=3)(conv2) 57 | bn2 = Activation("relu")(bn2) 58 | pool2 = MaxPooling2D(pool_size=(2, 2))(bn2) 59 | 60 | conv3 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(pool2) 61 | bn3 = Activation("relu")(conv3) 62 | conv3 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(bn3) 63 | bn3 = BatchNormalization(axis=3)(conv3) 64 | bn3 = Activation("relu")(bn3) 65 | pool3 = MaxPooling2D(pool_size=(2, 2))(bn3) 66 | 67 | conv4 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(pool3) 68 | bn4 = Activation("relu")(conv4) 69 | conv4 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(bn4) 70 | bn4 = BatchNormalization(axis=3)(conv4) 71 | bn4 = Activation("relu")(bn4) 72 | pool4 = MaxPooling2D(pool_size=(2, 2))(bn4) 73 | 74 | conv5 = Conv2D(filters=1024, kernel_size=(3, 3), padding="same")(pool4) 75 | bn5 = Activation("relu")(conv5) 76 | conv5 = Conv2D(filters=1024, kernel_size=(3, 3), padding="same")(bn5) 77 | bn5 = BatchNormalization(axis=3)(conv5) 78 | bn5 = Activation("relu")(bn5) 79 | 80 | """ Now UpConvolution / Decoder Leg will begin, so start with Conv2DTranspose 81 | The gray arrows (in the above image) indicate the skip connections that concatenate the encoder feature map with the decoder, which helps the backward flow of gradients for improved training. """ 82 | up6 = concatenate( 83 | [ 84 | Conv2DTranspose(512, kernel_size=(2, 2), strides=(2, 2), padding="same")( 85 | bn5 86 | ), 87 | conv4, 88 | ], 89 | axis=3, 90 | ) 91 | """ After every concatenation we again apply two consecutive regular convolutions so that the model can learn to assemble a more precise output """ 92 | conv6 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(up6) 93 | bn6 = Activation("relu")(conv6) 94 | conv6 = Conv2D(filters=512, kernel_size=(3, 3), padding="same")(bn6) 95 | bn6 = BatchNormalization(axis=3)(conv6) 96 | bn6 = Activation("relu")(bn6) 97 | 98 | up7 = concatenate( 99 | [ 100 | Conv2DTranspose(256, kernel_size=(2, 2), strides=(2, 2), padding="same")( 101 | bn6 102 | ), 103 | conv3, 104 | ], 105 | axis=3, 106 | ) 107 | conv7 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(up7) 108 | bn7 = Activation("relu")(conv7) 109 | conv7 = Conv2D(filters=256, kernel_size=(3, 3), padding="same")(bn7) 110 | bn7 = BatchNormalization(axis=3)(conv7) 111 | bn7 = Activation("relu")(bn7) 112 | 113 | up8 = concatenate( 114 | [ 115 | Conv2DTranspose(128, kernel_size=(2, 2), strides=(2, 2), padding="same")( 116 | bn7 117 | ), 118 | conv2, 119 | ], 120 | axis=3, 121 | ) 122 | conv8 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(up8) 123 | bn8 = Activation("relu")(conv8) 124 | conv8 = Conv2D(filters=128, kernel_size=(3, 3), padding="same")(bn8) 125 | bn8 = BatchNormalization(axis=3)(conv8) 126 | bn8 = Activation("relu")(bn8) 127 | 128 | up9 = concatenate( 129 | [ 130 | Conv2DTranspose(64, kernel_size=(2, 2), strides=(2, 2), padding="same")( 131 | bn8 132 | ), 133 | conv1, 134 | ], 135 | axis=3, 136 | ) 137 | conv9 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(up9) 138 | bn9 = Activation("relu")(conv9) 139 | conv9 = Conv2D(filters=64, kernel_size=(3, 3), padding="same")(bn9) 140 | bn9 = BatchNormalization(axis=3)(conv9) 141 | bn9 = Activation("relu")(bn9) 142 | 143 | conv10 = Conv2D(filters=1, kernel_size=(1, 1), activation="sigmoid")(bn9) 144 | 145 | return Model(inputs=[inputs], outputs=[conv10]) 146 | -------------------------------------------------------------------------------- /Unet_Brain_segmentation_unet_with_keras/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import cv2 7 | from tensorflow.keras import backend as K 8 | 9 | plt.style.use("ggplot") 10 | 11 | 12 | def plot_from_img_path(rows, columns, list_img_path, list_mask_path): 13 | fig = plt.figure(figsize=(12, 12)) 14 | for i in range(1, rows * columns + 1): 15 | fig.add_subplot(rows, columns, i) 16 | img_path = list_img_path[i] 17 | mask_path = list_mask_path[i] 18 | image = cv2.imread(img_path) 19 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 20 | mask = cv2.imread(mask_path) 21 | plt.imshow(image) 22 | plt.imshow(mask, alpha=0.4) 23 | plt.show() 24 | 25 | 26 | def dice_coefficients(y_true, y_pred, smooth=100): 27 | y_true_flatten = K.flatten(y_true) 28 | y_pred_flatten = K.flatten(y_pred) 29 | 30 | intersection = K.sum(y_true_flatten * y_pred_flatten) 31 | union = K.sum(y_true_flatten) + K.sum(y_pred_flatten) 32 | return (2 * intersection + smooth) / (union + smooth) 33 | 34 | 35 | def dice_coefficients_loss(y_true, y_pred, smooth=100): 36 | return -dice_coefficients(y_true, y_pred, smooth) 37 | 38 | 39 | def iou(y_true, y_pred, smooth=100): 40 | intersection = K.sum(y_true * y_pred) 41 | sum = K.sum(y_true + y_pred) 42 | iou = (intersection + smooth) / (sum - intersection + smooth) 43 | return iou 44 | 45 | 46 | def jaccard_distance(y_true, y_pred): 47 | y_true_flatten = K.flatten(y_true) 48 | y_pred_flatten = K.flatten(y_pred) 49 | return -iou(y_true_flatten, y_pred_flatten) 50 | -------------------------------------------------------------------------------- /WGAN_Pytorch_From_Scratch_Full_Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## What Is a Wasserstein GAN?\n", 8 | "\n", 9 | "It is an extension of the GAN that seeks an alternate way of training the generator model to better approximate the distribution of data observed in a given training dataset.\n", 10 | "\n", 11 | "### Wasserstein GAN, or WGAN, is a type of generative adversarial network that minimizes an approximation of the Earth-Mover's distance (EM) rather than the Jensen-Shannon divergence as in the original GAN formulation.\n", 12 | "\n", 13 | "#### Here in WGAN, the discriminator does not actually classify instances. Rather here for each instance the Discriminator outputs a number. This number does not have to be less than one or greater than 0, so we can't use 0.5 as a threshold to decide whether an instance is real or fake. Discriminator training just tries to make the output bigger for real instances than for fake instances.\n", 14 | "\n", 15 | "Instead of using a discriminator to classify or predict the probability of generated images as being real or fake, the WGAN changes or replaces the discriminator model with a critic that scores the realness or fakeness of a given image.\n", 16 | "\n", 17 | "This change is motivated by a mathematical argument that training the generator should seek a minimization of the distance between the distribution of the data observed in the training dataset and the distribution observed in generated examples. The argument contrasts different distribution distance measures, such as Kullback-Leibler (KL) divergence, Jensen-Shannon (JS) divergence, and the Earth-Mover (EM) distance, referred to as Wasserstein distance.\n", 18 | "\n", 19 | "---\n", 20 | "\n", 21 | "\n", 22 | "The idea for the working of WGANs is to utilize two probability distributions. One is the probability distribution of the generator (Pg), which refers to the distribution from the output of the generator model.\n", 23 | "\n", 24 | "The other is the probability distribution from the real images (Pr).\n", 25 | "\n", 26 | "And the objective of WGAN is to ensure that both these probability distributions are close to each other so that the output generated is highly realistic and high-quality.\n", 27 | "\n", 28 | "For calculating the distance of these probability distributions, mathematical statistics in machine learning proposes three primary methods, namely\n", 29 | "\n", 30 | "- Kullback–Leibler divergence,\n", 31 | "- Jensen–Shannon divergence, and\n", 32 | "- Wasserstein distance.\n", 33 | "\n", 34 | "The Jensen–Shannon divergence (also a typical GAN loss) is the more utilized mechanism in simple GAN networks.\n", 35 | "\n", 36 | "#### But in WGAN, we use the Wasserstein distance (a.k.a Earth Mover’s Distance) instead of Jensen-Shannon Divergence to compare probability distributions.\n", 37 | "\n", 38 | "**The benefit of the WGAN is that the training process is more stable and less sensitive to model architecture and choice of hyperparameter configurations.**\n", 39 | "\n", 40 | "---\n", 41 | "\n", 42 | "## Compared to the original GAN algorithm, the WGAN undertakes the following changes:\n", 43 | "\n", 44 | "* After every gradient update on the critic function, clamp the weights to a small fixed range, [-c, c].\n", 45 | "\n", 46 | "* Use a new loss function derived from the Wasserstein distance, no logarithm anymore. The “discriminator” model does not play as a direct critic but a helper for estimating the Wasserstein metric between real and generated data distribution.\n", 47 | "\n", 48 | "* Empirically the authors recommended RMSProp optimizer on the critic, rather than a momentum based optimizer such as Adam which could cause instability in the model training.\n", 49 | "\n", 50 | "---\n", 51 | "\n", 52 | "\n", 53 | "## Key Points in WGAN\n", 54 | "\n", 55 | "![Imgur](https://imgur.com/cWROjs7.png)\n", 56 | "\n", 57 | "### 1. Critic Weight Clipping\n", 58 | "\n", 59 | "The critic F has to be a 1-Lipschitz function. To enforce the constraint, WGAN applies a very simple clipping to restrict the maximum weight value in F,\n", 60 | "\n", 61 | "i.e. the weights of the discriminator must be within a certain range controlled by the hyperparameters c\n", 62 | "\n", 63 | "### 2. Update Critic More Than Generator\n", 64 | "\n", 65 | "In the DCGAN, the generator and the discriminator model must be updated in equal amounts.\n", 66 | "\n", 67 | "Specifically, the discriminator is updated with a half batch of real and a half batch of fake samples each iteration, whereas the generator is updated with a single batch of generated samples.\n", 68 | "\n", 69 | "In the WGAN model, the critic model must be updated more than the generator model.\n", 70 | "\n", 71 | "Specifically, a new hyperparameter is defined to control the number of times that the critic is updated for each update to the generator model, called n_critic.\n", 72 | "\n", 73 | "### 3. Use RMSProp Stochastic Gradient Descent\n", 74 | "\n", 75 | "The DCGAN uses the Adam version of stochastic gradient descent with a small learning rate and modest momentum.\n", 76 | "\n", 77 | "The WGAN recommends the use of Root Mean Square Propagation or RMSProp instead (which is one of the Adaptive Learning Rate Gradient Descent), with a small learning rate of 0.00005.\n", 78 | "\n", 79 | "---\n", 80 | "\n", 81 | "## The loss function for WGAN\n", 82 | "\n", 83 | "#### First, for a Normal GAN (e.g. DCGAN) the Loss definition is ;\n", 84 | "\n", 85 | "Critic Loss: D(x) - D(G(z))\n", 86 | "\n", 87 | "Where,\n", 88 | "- D(x) is the discriminator's estimate of the probability that real data instance x is real.\n", 89 | "- G(z) is the generator's output when given noise z.\n", 90 | "- D(G(z)) is the discriminator's estimate of the probability that a fake instance is real.\n", 91 | "\n", 92 | "\n", 93 | "#### Now for WGAN the Loss is defined as:\n", 94 | "\n", 95 | "#### Critic Loss = [average critic score on real images] – [average critic score on fake images]\n", 96 | "\n", 97 | "Critic Loss: D(x) - D(G(z))\n", 98 | "\n", 99 | "In WGAN, the Discriminator, does not produce a Probability, rather it produces a pure score.\n", 100 | "\n", 101 | "Where,\n", 102 | " - D(x) is the critic's output for a real instance.\n", 103 | " - G(z) is the generator's output when given noise z.\n", 104 | " - D(G(z)) is the critic's output for a fake instance.\n", 105 | "\n", 106 | "The output of critic D does not have to be between 1 and 0.\n", 107 | "\n", 108 | "- The discriminator tries to maximize this function. In other words, it tries to maximize the difference between its output on real instances and its output on fake instances.\n", 109 | "\n", 110 | "- So, when compared to the Normal GAN's Discriminator, the Discriminator in WGAN, we do NOT classify or predict the probability of generated images as being real or fake. Instead, the WGAN replaces the discriminator model with a critic that scores the realness or fakeness of a given image.\n", 111 | "\n", 112 | "- It does this by removing the last Sigmoid() layer and have a linear layer at the end of the discriminator’s neural network.\n", 113 | "\n", 114 | "#### Generator Loss = -[average critic score on fake images]\n", 115 | "\n", 116 | "Generator Loss: D(G(z))\n", 117 | "\n", 118 | "The generator tries to maximize this function. In other words, It tries to maximize the discriminator's output for its fake instances. In these functions:\n", 119 | "\n", 120 | "\n", 121 | "\n", 122 | "---\n", 123 | "\n", 124 | "### Implementing Wasserstein Loss\n", 125 | "\n", 126 | "1. Use a linear activation function in the output layer of the critic model (instead of sigmoid).\n", 127 | "\n", 128 | "2. Use Wasserstein loss to train the critic and generator models that promote larger difference between scores for real and generated images.\n", 129 | "\n", 130 | "3. Constrain critic model weights to a limited range after each mini batch update (e.g. [-0.01,0.01]).\n", 131 | "\n", 132 | "4. In order to have parameters w lie in a compact space, something simple we can do is clamp the weights to a fixed box (say W = [−0.01, 0.01]l ) after each gradient update.\n", 133 | "\n", 134 | "\n", 135 | "We can summarize the function as it is described in the paper as follows:\n", 136 | "\n", 137 | "\n", 138 | "#### Critic Loss = [average critic score on real images] – [average critic score on fake images]\n", 139 | "\n", 140 | "#### Generator Loss = -[average critic score on fake images]\n", 141 | "\n", 142 | "Where the average scores are calculated across a mini-batch of samples.\n", 143 | "\n", 144 | "The calculations are straightforward to interpret once we recall that stochastic gradient descent seeks to minimize loss.\n", 145 | "\n", 146 | "#### In the case of the generator, a larger score from the critic will result in a smaller loss for the generator, encouraging the critic to output larger scores for fake images. For example, an average score of 10 becomes -10, an average score of 50 becomes -50, which is smaller, and so on.\n", 147 | "\n", 148 | "#### In the case of the critic, a larger score for real images results in a larger resulting loss for the critic, penalizing the model. This encourages the critic to output smaller scores for real images. For example, an average score of 20 for real images and 50 for fake images results in a loss of -30; an average score of 10 for real images and 50 for fake images results in a loss of -40, which is better, and so on.\n", 149 | "\n", 150 | "#### The sign of the loss does not matter in this case, as long as loss for real images is a small number and the loss for fake images is a large number. The Wasserstein loss encourages the critic to separate these numbers.\n", 151 | "\n", 152 | "#### We can also reverse the situation and encourage the critic to output a large score for real images and a small score for fake images and achieve the same result.\n", 153 | "\n", 154 | "---\n", 155 | "\n", 156 | "### Main Equation\n", 157 | "\n", 158 | "The network uses Earth Mover’s Distance instead of Jensen-Shannon Divergence to compare probability distributions.\n", 159 | "\n", 160 | "![Imgur](https://imgur.com/EJg4nHM.png)\n", 161 | "\n", 162 | "In the above equation, the max value represents the constraint on the discriminator. In the WGAN architecture, the discriminator is referred to as the critic. One of the reasons for this convention is that there is no sigmoid activation function to limit the values to 0 or 1, which means real or fake. So the discriminator in WGAN, outputs a scalar score rather than a probability.\n", 163 | "\n", 164 | "The first part of the equation represents the real data, while the second half represents the generator data. The discriminator (or the critic) in the above equation aims to maximize the distance between the real data and the generated data, because it wants to be able to successfully distinguish the data accordingly.\n", 165 | "\n", 166 | "The generator network aims to minimize the distance between the real data and generated data because it wants the generated data to be as real as possible.\n", 167 | "\n", 168 | "---\n", 169 | "\n", 170 | "## Jensen Shannon Divergence (JSD)\n", 171 | "\n", 172 | "\n", 173 | "The objective function of our original GAN is essentially the minimization of something called the Jensen Shannon Divergence (JSD). Specifically it is:\n", 174 | "\n", 175 | "![Imgur](https://imgur.com/kYc2Cfv.png)\n", 176 | "\n", 177 | "---\n", 178 | "\n", 179 | "Sadly, Wasserstein GAN is not perfect. Even the authors of the original WGAN paper mentioned that “Weight clipping is a clearly terrible way to enforce a Lipschitz constraint” (Oops!). WGAN still suffers from unstable training, slow convergence after weight clipping (when clipping window is too large), and vanishing gradients (when clipping window is too small).\n", 180 | "\n", 181 | "Some improvement, precisely replacing weight clipping with gradient penalty is one of the most prominent solution that has been proposed." 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "id": "7A1WwPqxmfrE" 188 | }, 189 | "source": [ 190 | "## Implementation from scratch" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 6, 196 | "metadata": { 197 | "colab": { 198 | "base_uri": "https://localhost:8080/" 199 | }, 200 | "id": "lzolhsHfVzWO", 201 | "outputId": "0d3b2a3f-d1d4-45bc-c091-808931832607", 202 | "vscode": { 203 | "languageId": "python" 204 | } 205 | }, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "from google.colab import drive\n", 217 | "drive.mount('/content/drive')" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 7, 223 | "metadata": { 224 | "id": "7Aa7VNjLmaw9", 225 | "vscode": { 226 | "languageId": "python" 227 | } 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "import os\n", 232 | "\n", 233 | "import torchvision.transforms as transforms\n", 234 | "from torchvision.utils import make_grid\n", 235 | "\n", 236 | "from torch.utils.data import DataLoader\n", 237 | "from torchvision import datasets\n", 238 | "from torch.autograd import Variable\n", 239 | "\n", 240 | "import torch.nn as nn\n", 241 | "import torch.nn.functional as F\n", 242 | "import torch\n", 243 | "\n", 244 | "import numpy as np\n", 245 | "import matplotlib.pyplot as plt\n", 246 | "from matplotlib.pyplot import figure\n", 247 | "\n", 248 | "from tqdm import tqdm \n", 249 | "\n", 250 | "plt.ion()\n", 251 | "from IPython.display import clear_output" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 8, 257 | "metadata": { 258 | "colab": { 259 | "base_uri": "https://localhost:8080/" 260 | }, 261 | "id": "lQ9BWJV3WYUt", 262 | "outputId": "6018a674-99da-4eea-f989-9d4e5a753762", 263 | "vscode": { 264 | "languageId": "python" 265 | } 266 | }, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "Tue Mar 22 20:37:53 2022 \n", 273 | "+-----------------------------------------------------------------------------+\n", 274 | "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", 275 | "|-------------------------------+----------------------+----------------------+\n", 276 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 277 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 278 | "| | | MIG M. |\n", 279 | "|===============================+======================+======================|\n", 280 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", 281 | "| N/A 33C P8 9W / 70W | 0MiB / 15109MiB | 0% Default |\n", 282 | "| | | N/A |\n", 283 | "+-------------------------------+----------------------+----------------------+\n", 284 | " \n", 285 | "+-----------------------------------------------------------------------------+\n", 286 | "| Processes: |\n", 287 | "| GPU GI CI PID Type Process name GPU Memory |\n", 288 | "| ID ID Usage |\n", 289 | "|=============================================================================|\n", 290 | "| No running processes found |\n", 291 | "+-----------------------------------------------------------------------------+\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "!nvidia-smi" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "## HYPERPARAMETERS" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": { 310 | "vscode": { 311 | "languageId": "python" 312 | } 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "class Hyperparameters(object):\n", 317 | " def __init__(self, **kwargs):\n", 318 | " self.__dict__.update(kwargs)\n", 319 | "\n", 320 | "hp = Hyperparameters(n_epochs=200,\n", 321 | " batch_size=64,\n", 322 | " lr=0.00005, \n", 323 | " n_cpu=8,\n", 324 | " latent_dim=100,\n", 325 | " img_size=32,\n", 326 | " channels=1,\n", 327 | " n_critic=25,\n", 328 | " clip_value=.005,\n", 329 | " sample_interval=400)\n", 330 | "\n", 331 | "print(hp.lr)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 10, 337 | "metadata": { 338 | "id": "wQcu6fi-WPCF", 339 | "vscode": { 340 | "languageId": "python" 341 | } 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "root_path = '/content/drive/MyDrive/All_Datasets/Fashion_MNIST'\n", 346 | "''' The Fashion-MNIST dataset contains 60,000 training images (and 10,000 test images) of fashion and clothing items, taken from 10 classes. Each image is a standardized 28×28 size in grayscale (784 total pixels). '''\n", 347 | "\n", 348 | "dataloader = torch.utils.data.DataLoader(\n", 349 | " datasets.FashionMNIST(\n", 350 | " root_path,\n", 351 | " train=True,\n", 352 | " download=True,\n", 353 | " transform=transforms.Compose(\n", 354 | " [transforms.Resize(hp.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]\n", 355 | " ),\n", 356 | " ),\n", 357 | " batch_size=hp.batch_size,\n", 358 | " shuffle=True,\n", 359 | ")" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": { 365 | "id": "yQ954M71otK9" 366 | }, 367 | "source": [ 368 | "SETUP" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 11, 374 | "metadata": { 375 | "id": "dbDFpxOdonQI", 376 | "vscode": { 377 | "languageId": "python" 378 | } 379 | }, 380 | "outputs": [], 381 | "source": [ 382 | "# os.makedirs(\"images\", exist_ok=True)\n", 383 | "img_shape = (hp.channels, hp.img_size, hp.img_size)\n", 384 | "\n", 385 | "cuda = True if torch.cuda.is_available() else False\n", 386 | "\n", 387 | "def weights_init_normal(m):\n", 388 | " classname = m.__class__.__name__\n", 389 | " if classname.find(\"Conv\") != -1:\n", 390 | " torch.nn.init.normal_(m.weight.data, 0.0, 0.02)\n", 391 | " elif classname.find(\"BatchNorm2d\") != -1:\n", 392 | " torch.nn.init.normal_(m.weight.data, 1.0, 0.02)\n", 393 | " torch.nn.init.constant_(m.bias.data, 0.0)\n", 394 | "\n", 395 | "def to_img(x):\n", 396 | " x = x.clamp(0, 1)\n", 397 | " return x\n", 398 | "\n", 399 | "def visualise_output(images, x, y):\n", 400 | " with torch.no_grad(): \n", 401 | " images = images.cpu()\n", 402 | " images = to_img(images)\n", 403 | " np_imagegrid = make_grid(images, x, y).numpy()\n", 404 | " figure(figsize=(20,20))\n", 405 | " plt.imshow(np.transpose(np_imagegrid, (1, 2, 0)))\n", 406 | " plt.show()" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": { 412 | "id": "xIXAbyxIo9r9" 413 | }, 414 | "source": [ 415 | "## GENERATOR\n", 416 | "\n", 417 | "The generator model takes as input a point in the latent space and outputs a single 28×28 grayscale image.\n", 418 | "\n", 419 | "This is achieved by using a fully connected layer to interpret the point in the latent space. \n", 420 | "\n", 421 | "This is then upsampled couple of more times, doubling the size.\n", 422 | "\n", 423 | "### np.prod()\n", 424 | "\n", 425 | "Return the product of array elements over a given axis.\n", 426 | "\n", 427 | "If the input array is blank, then this method returns the neutral element: 1\n", 428 | "\n", 429 | "By default, the axis is set to None, thereby calculating the product of all the elements in the given array. " 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 12, 435 | "metadata": { 436 | "id": "bTfDyWs-o_cG", 437 | "vscode": { 438 | "languageId": "python" 439 | } 440 | }, 441 | "outputs": [], 442 | "source": [ 443 | "class Generator(nn.Module):\n", 444 | " def __init__(self, img_shape, latent_dim):\n", 445 | " super(Generator, self).__init__()\n", 446 | "\n", 447 | " def block(in_features, out_features, normalize=True):\n", 448 | " layers = [nn.Linear(in_features, out_features)]\n", 449 | " if normalize:\n", 450 | " layers.append(nn.BatchNorm1d(out_features, 0.8))\n", 451 | " layers.append(nn.LeakyReLU(0.2, inplace=True))\n", 452 | " return layers\n", 453 | "\n", 454 | " self.model = nn.Sequential(\n", 455 | " *block(\n", 456 | " in_features=latent_dim, out_features=128, normalize=False\n", 457 | " ), # Batch_size, 784 -> Batch_size, 128\n", 458 | " *block(\n", 459 | " in_features=128, out_features=256\n", 460 | " ), # Batch_size, 128 -> Batch_size, 256\n", 461 | " *block(\n", 462 | " in_features=256, out_features=512\n", 463 | " ), # Batch_size, 256 -> Batch_size, 512\n", 464 | " *block(\n", 465 | " in_features=512, out_features=1024\n", 466 | " ), # Batch_size, 512 -> Batch_size, 1024\n", 467 | " nn.Linear(\n", 468 | " in_features=1024, out_features=int(np.prod(img_shape))\n", 469 | " ), # Batch_size, 1024 -> Batch_size, np.prod(img_shape)\n", 470 | " nn.Tanh()\n", 471 | " )\n", 472 | "\n", 473 | " def forward(self, img_shape, z):\n", 474 | " img = self.model(z)\n", 475 | " img = img.view(img.shape[0], *img_shape)\n", 476 | " return img" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": { 482 | "id": "P01xoE6FpL9-" 483 | }, 484 | "source": [ 485 | "## DISCRIMINATOR\n", 486 | "\n", 487 | "One of the reasons for this convention is that there is no sigmoid activation function to limit the values to 0 or 1, which means real or fake. So the discriminator in WGAN, outputs a scalar score rather than a probability." 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 13, 493 | "metadata": { 494 | "id": "jYlPm9G8pN6u", 495 | "vscode": { 496 | "languageId": "python" 497 | } 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "class Critic(nn.Module):\n", 502 | " def __init__(self, img_shape):\n", 503 | " super(Critic, self).__init__()\n", 504 | "\n", 505 | " self.model = nn.Sequential(\n", 506 | " nn.Linear(\n", 507 | " in_features=int(np.prod(img_shape)), out_features=512\n", 508 | " ), # Batch_size, np.prod(img_shape) -> Batch_size, 512\n", 509 | " nn.LeakyReLU(0.2, inplace=True),\n", 510 | " nn.Linear(\n", 511 | " in_features=512, out_features=256\n", 512 | " ), # Batch_size, 512 -> Batch_size, 256\n", 513 | " nn.LeakyReLU(0.2, inplace=True),\n", 514 | " nn.Linear(\n", 515 | " in_features=256, out_features=1\n", 516 | " ), # Batch_size, 256 -> Batch_size, 1\n", 517 | " )\n", 518 | "\n", 519 | " def forward(self, img):\n", 520 | " img_flat = img.view(img.shape[0], -1)\n", 521 | " validity = self.model(img_flat)\n", 522 | " return validity" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": { 528 | "id": "0y-0F1URpfQd" 529 | }, 530 | "source": [ 531 | "LOSS and MODELS" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 14, 537 | "metadata": { 538 | "colab": { 539 | "base_uri": "https://localhost:8080/" 540 | }, 541 | "id": "7UPqaoCSphQ1", 542 | "outputId": "dc9deab8-1c97-49c7-c4bb-d9a05ac7b52a", 543 | "vscode": { 544 | "languageId": "python" 545 | } 546 | }, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "Critic(\n", 552 | " (model): Sequential(\n", 553 | " (0): Linear(in_features=1024, out_features=512, bias=True)\n", 554 | " (1): LeakyReLU(negative_slope=0.2, inplace=True)\n", 555 | " (2): Linear(in_features=512, out_features=256, bias=True)\n", 556 | " (3): LeakyReLU(negative_slope=0.2, inplace=True)\n", 557 | " (4): Linear(in_features=256, out_features=1, bias=True)\n", 558 | " )\n", 559 | ")" 560 | ] 561 | }, 562 | "execution_count": 14, 563 | "metadata": {}, 564 | "output_type": "execute_result" 565 | } 566 | ], 567 | "source": [ 568 | "generator = Generator(img_shape, hp.latent_dim)\n", 569 | "critic = Critic(img_shape)\n", 570 | "\n", 571 | "if cuda:\n", 572 | " generator.cuda()\n", 573 | " critic.cuda() \n", 574 | "\n", 575 | "# Initialize weights\n", 576 | "generator.apply(weights_init_normal)\n", 577 | "critic.apply(weights_init_normal)" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "metadata": { 583 | "id": "Y0X-PJWYp6W9" 584 | }, 585 | "source": [ 586 | "OPTIMIZERS and TENSOR SETUP" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 15, 592 | "metadata": { 593 | "id": "ymTI34yKqA2u", 594 | "vscode": { 595 | "languageId": "python" 596 | } 597 | }, 598 | "outputs": [], 599 | "source": [ 600 | "optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=hp.lr)\n", 601 | "optimizer_D = torch.optim.RMSprop(critic.parameters(), lr=hp.lr)\n", 602 | "\n", 603 | "Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": { 609 | "id": "Bf3lQ1-0qehW" 610 | }, 611 | "source": [ 612 | "## TRAINING STEPS\n", 613 | "\n", 614 | "1. The critic network is first trained on a real batch of data, then trained on a batch of data generated from a noise-prior via the generator. \n", 615 | "\n", 616 | "2. The critic's loss function is arranged such that it estimates the Wasserstein Distance (maximizes the distance between the two distributions) then clips its own weights to ensure it is 1-Lipschitz-Continuous. \n", 617 | "\n", 618 | "3. Then, the generator generates a new batch of images from a noise prior, passes these through to the critic who then \"informs\" the generator of the Wasserstein-1 distance between the true distribution and the distribution of the images the Generator just created. \n", 619 | "\n", 620 | "4. It does this via the loss function of the critic. The critic's weights are frozen and the error propagates all the way back through to the generator who then updates its parameters to minimize the Wasserstein distance. \n", 621 | "\n", 622 | "5. This repeats until the loss (hopefully) converges to near zero and the distributions are approximately equal.\n", 623 | "\n", 624 | "6. The discriminator loss is (an approximation of) the negative Wasserstein distance between the generator distribution and the data distribution." 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": null, 630 | "metadata": { 631 | "colab": { 632 | "base_uri": "https://localhost:8080/", 633 | "height": 1000 634 | }, 635 | "id": "ipLMbwcxqgPm", 636 | "outputId": "25fcb2db-224b-4c8f-d39c-b523685f2593", 637 | "vscode": { 638 | "languageId": "python" 639 | } 640 | }, 641 | "outputs": [], 642 | "source": [ 643 | "for epoch in range(hp.n_epochs):\n", 644 | " for i, (imgs, _) in enumerate(dataloader):\n", 645 | "\n", 646 | " # Adversarial ground truths\n", 647 | " valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)\n", 648 | " fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)\n", 649 | "\n", 650 | " # Configure input\n", 651 | " real_imgs = Variable(imgs.type(Tensor))\n", 652 | "\n", 653 | " # -----------------\n", 654 | " # Train Critic\n", 655 | " # -----------------\n", 656 | "\n", 657 | " optimizer_G.zero_grad()\n", 658 | "\n", 659 | " # Sample noise as generator input\n", 660 | " # Draw random samples from a normal (Gaussian) distribution.\n", 661 | " # np.random.normal(mean, sd, Output shape)\n", 662 | " z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], hp.latent_dim))))\n", 663 | "\n", 664 | " # Generate a batch of images\n", 665 | " fake_imgs = generator(z).detach()\n", 666 | "\n", 667 | " ''' The math for the loss functions for the critic and generator is:\n", 668 | " Critic Loss: D(x) - D(G(z))\n", 669 | " Generator Loss: D(G(z))\n", 670 | " Now for the Critic Loss, as per the Paper, we have to maximize the expression.\n", 671 | " So, arithmetically, maximizing an expression, means minimizing the -ve of that expression\n", 672 | " i.e. -(D(x) - D(G(z))) which is -D(x) + D(G(z)) i.e. -D(real_imgs) + D(G(real_imgs))\n", 673 | " '''\n", 674 | " d_loss = -torch.mean(critic(real_imgs)) + torch.mean(critic(fake_imgs)) \n", 675 | "\n", 676 | " d_loss.backward()\n", 677 | " optimizer_D.step()\n", 678 | "\n", 679 | " \n", 680 | " ''' Clip weights of critic to avoid vanishing/exploding gradients in the \n", 681 | " critic/critic. \n", 682 | " In order to have parameters w lie in a compact space, something simple we can do is clamp the weights to a fixed box (say W = [-0.005, 0.005]l ) after each gradient update.\n", 683 | " \n", 684 | " torch.clamp() is used to clamp all the elements in an input into the range [min, max]. It takes three parameters: the input tensor, min, and max values. The values less than the min are replaced by the min and the values greater than the max are replaced by the max. If min is not given, then there is no lower bound. If max is not given, then there is no upper bound. '''\n", 685 | " for p in critic.parameters():\n", 686 | " p.data.clamp_(-hp.clip_value, hp.clip_value)\n", 687 | "\n", 688 | "\n", 689 | " ''' Train the generator every n_critic iterations \n", 690 | " we need to increase training iterations of the critic so that it works to \n", 691 | " approximate the real distribution sooner.\n", 692 | " '''\n", 693 | " if i % hp.n_critic == 0:\n", 694 | " # ---------------------\n", 695 | " # Train Generator\n", 696 | " # ---------------------\n", 697 | " optimizer_G.zero_grad()\n", 698 | "\n", 699 | " # Generate a batch of images\n", 700 | " fake_images_from_generator = generator(z)\n", 701 | " # Adversarial loss\n", 702 | " g_loss = -torch.mean(critic(fake_images_from_generator))\n", 703 | "\n", 704 | " g_loss.backward()\n", 705 | " optimizer_G.step() \n", 706 | "\n", 707 | " batches_done = epoch * len(dataloader) + i\n", 708 | " if batches_done % hp.sample_interval == 0:\n", 709 | " clear_output()\n", 710 | " print(f\"Epoch:{epoch}:It{i}:DLoss{d_loss.item()}:GLoss{g_loss.item()}\") \n", 711 | " visualise_output(fake_images_from_generator.data[:50],10, 10)" 712 | ] 713 | } 714 | ], 715 | "metadata": { 716 | "accelerator": "GPU", 717 | "colab": { 718 | "collapsed_sections": [], 719 | "name": "GEN_4_WGAN.ipynb", 720 | "provenance": [] 721 | }, 722 | "kernelspec": { 723 | "display_name": "Python 3", 724 | "name": "python3" 725 | } 726 | }, 727 | "nbformat": 4, 728 | "nbformat_minor": 0 729 | } 730 | -------------------------------------------------------------------------------- /assets/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/2.png -------------------------------------------------------------------------------- /assets/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/3.png -------------------------------------------------------------------------------- /assets/Youtube_Cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/Youtube_Cover.jpg -------------------------------------------------------------------------------- /assets/yt_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rohan-paul/Deep-Learning-Paper-Implementation/f606c82c32e4e79496f5739aa0ee9813e335a36e/assets/yt_logo.png --------------------------------------------------------------------------------