├── PyTorch_Image_Inference.ipynb ├── PyTorch_Image_Training.ipynb └── README.md /PyTorch_Image_Training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "%config InlineBackend.figure_format = 'retina'\n", 11 | "\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import torch\n", 16 | "from torch import nn\n", 17 | "from torch import optim\n", 18 | "import torch.nn.functional as F\n", 19 | "from torchvision import datasets, transforms, models" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "['brush', 'commercial', 'field', 'grass', 'parking', 'play', 'residential', 'road', 'trees', 'water']\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "data_dir = '/data/train'\n", 37 | "\n", 38 | "def load_split_train_test(datadir, valid_size = .2):\n", 39 | " train_transforms = transforms.Compose([#transforms.RandomRotation(30), # data augmentations are great\n", 40 | " #transforms.RandomResizedCrop(224), # but not in this case of map tiles\n", 41 | " #transforms.RandomHorizontalFlip(),\n", 42 | " transforms.Resize(224),\n", 43 | " transforms.ToTensor(),\n", 44 | " #transforms.Normalize([0.485, 0.456, 0.406], # PyTorch recommends these but in this\n", 45 | " # [0.229, 0.224, 0.225]) # case I didn't get good results\n", 46 | " ])\n", 47 | "\n", 48 | " test_transforms = transforms.Compose([transforms.Resize(224),\n", 49 | " transforms.ToTensor(),\n", 50 | " #transforms.Normalize([0.485, 0.456, 0.406],\n", 51 | " # [0.229, 0.224, 0.225])\n", 52 | " ])\n", 53 | "\n", 54 | " train_data = datasets.ImageFolder(datadir, transform=train_transforms)\n", 55 | " test_data = datasets.ImageFolder(datadir, transform=test_transforms)\n", 56 | "\n", 57 | " num_train = len(train_data)\n", 58 | " indices = list(range(num_train))\n", 59 | " split = int(np.floor(valid_size * num_train))\n", 60 | " np.random.shuffle(indices)\n", 61 | " from torch.utils.data.sampler import SubsetRandomSampler\n", 62 | " train_idx, test_idx = indices[split:], indices[:split]\n", 63 | " train_sampler = SubsetRandomSampler(train_idx)\n", 64 | " test_sampler = SubsetRandomSampler(test_idx)\n", 65 | " trainloader = torch.utils.data.DataLoader(train_data, sampler=train_sampler, batch_size=64)\n", 66 | " testloader = torch.utils.data.DataLoader(test_data, sampler=test_sampler, batch_size=64)\n", 67 | " return trainloader, testloader\n", 68 | "\n", 69 | "trainloader, testloader = load_split_train_test(data_dir, .2)\n", 70 | "print(trainloader.dataset.classes)\n" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": { 77 | "scrolled": true 78 | }, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "ResNet(\n", 84 | " (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", 85 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 86 | " (relu): ReLU(inplace)\n", 87 | " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", 88 | " (layer1): Sequential(\n", 89 | " (0): Bottleneck(\n", 90 | " (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 91 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 92 | " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 93 | " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 94 | " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 95 | " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 96 | " (relu): ReLU(inplace)\n", 97 | " (downsample): Sequential(\n", 98 | " (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 99 | " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 100 | " )\n", 101 | " )\n", 102 | " (1): Bottleneck(\n", 103 | " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 104 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 105 | " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 106 | " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 107 | " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 108 | " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 109 | " (relu): ReLU(inplace)\n", 110 | " )\n", 111 | " (2): Bottleneck(\n", 112 | " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 113 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 114 | " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 115 | " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 116 | " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 117 | " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 118 | " (relu): ReLU(inplace)\n", 119 | " )\n", 120 | " )\n", 121 | " (layer2): Sequential(\n", 122 | " (0): Bottleneck(\n", 123 | " (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 124 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 125 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", 126 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 127 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 128 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 129 | " (relu): ReLU(inplace)\n", 130 | " (downsample): Sequential(\n", 131 | " (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", 132 | " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 133 | " )\n", 134 | " )\n", 135 | " (1): Bottleneck(\n", 136 | " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 137 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 138 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 139 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 140 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 141 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 142 | " (relu): ReLU(inplace)\n", 143 | " )\n", 144 | " (2): Bottleneck(\n", 145 | " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 146 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 147 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 148 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 149 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 150 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 151 | " (relu): ReLU(inplace)\n", 152 | " )\n", 153 | " (3): Bottleneck(\n", 154 | " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 155 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 156 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 157 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 158 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 159 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 160 | " (relu): ReLU(inplace)\n", 161 | " )\n", 162 | " )\n", 163 | " (layer3): Sequential(\n", 164 | " (0): Bottleneck(\n", 165 | " (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 166 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 167 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", 168 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 169 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 170 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 171 | " (relu): ReLU(inplace)\n", 172 | " (downsample): Sequential(\n", 173 | " (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", 174 | " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 175 | " )\n", 176 | " )\n", 177 | " (1): Bottleneck(\n", 178 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 179 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 180 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 181 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 182 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 183 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 184 | " (relu): ReLU(inplace)\n", 185 | " )\n", 186 | " (2): Bottleneck(\n", 187 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 188 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 189 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 190 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 191 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 192 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 193 | " (relu): ReLU(inplace)\n", 194 | " )\n", 195 | " (3): Bottleneck(\n", 196 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 197 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 198 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 199 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 200 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 201 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 202 | " (relu): ReLU(inplace)\n", 203 | " )\n", 204 | " (4): Bottleneck(\n", 205 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 206 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 207 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 208 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 209 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 210 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 211 | " (relu): ReLU(inplace)\n", 212 | " )\n", 213 | " (5): Bottleneck(\n", 214 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 215 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 216 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 217 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 218 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 219 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 220 | " (relu): ReLU(inplace)\n", 221 | " )\n", 222 | " )\n", 223 | " (layer4): Sequential(\n", 224 | " (0): Bottleneck(\n", 225 | " (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 226 | " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 227 | " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", 228 | " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 229 | " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 230 | " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 231 | " (relu): ReLU(inplace)\n", 232 | " (downsample): Sequential(\n", 233 | " (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", 234 | " (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 235 | " )\n", 236 | " )\n", 237 | " (1): Bottleneck(\n", 238 | " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 239 | " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 240 | " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 241 | " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 242 | " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 243 | " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 244 | " (relu): ReLU(inplace)\n", 245 | " )\n", 246 | " (2): Bottleneck(\n", 247 | " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 248 | " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 249 | " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 250 | " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 251 | " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 252 | " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 253 | " (relu): ReLU(inplace)\n", 254 | " )\n", 255 | " )\n", 256 | " (avgpool): AvgPool2d(kernel_size=7, stride=1, padding=0)\n", 257 | " (fc): Linear(in_features=2048, out_features=1000, bias=True)\n", 258 | ")" 259 | ] 260 | }, 261 | "execution_count": 3, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 268 | "model = models.resnet50(pretrained=True)\n", 269 | "model" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 4, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "ResNet(\n", 281 | " (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", 282 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 283 | " (relu): ReLU(inplace)\n", 284 | " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", 285 | " (layer1): Sequential(\n", 286 | " (0): Bottleneck(\n", 287 | " (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 288 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 289 | " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 290 | " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 291 | " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 292 | " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 293 | " (relu): ReLU(inplace)\n", 294 | " (downsample): Sequential(\n", 295 | " (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 296 | " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 297 | " )\n", 298 | " )\n", 299 | " (1): Bottleneck(\n", 300 | " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 301 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 302 | " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 303 | " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 304 | " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 305 | " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 306 | " (relu): ReLU(inplace)\n", 307 | " )\n", 308 | " (2): Bottleneck(\n", 309 | " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 310 | " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 311 | " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 312 | " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 313 | " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 314 | " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 315 | " (relu): ReLU(inplace)\n", 316 | " )\n", 317 | " )\n", 318 | " (layer2): Sequential(\n", 319 | " (0): Bottleneck(\n", 320 | " (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 321 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 322 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", 323 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 324 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 325 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 326 | " (relu): ReLU(inplace)\n", 327 | " (downsample): Sequential(\n", 328 | " (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", 329 | " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 330 | " )\n", 331 | " )\n", 332 | " (1): Bottleneck(\n", 333 | " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 334 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 335 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 336 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 337 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 338 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 339 | " (relu): ReLU(inplace)\n", 340 | " )\n", 341 | " (2): Bottleneck(\n", 342 | " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 343 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 344 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 345 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 346 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 347 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 348 | " (relu): ReLU(inplace)\n", 349 | " )\n", 350 | " (3): Bottleneck(\n", 351 | " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 352 | " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 353 | " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 354 | " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 355 | " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 356 | " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 357 | " (relu): ReLU(inplace)\n", 358 | " )\n", 359 | " )\n", 360 | " (layer3): Sequential(\n", 361 | " (0): Bottleneck(\n", 362 | " (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 363 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 364 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", 365 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 366 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 367 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 368 | " (relu): ReLU(inplace)\n", 369 | " (downsample): Sequential(\n", 370 | " (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", 371 | " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 372 | " )\n", 373 | " )\n", 374 | " (1): Bottleneck(\n", 375 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 376 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 377 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 378 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 379 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 380 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 381 | " (relu): ReLU(inplace)\n", 382 | " )\n", 383 | " (2): Bottleneck(\n", 384 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 385 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 386 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 387 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 388 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 389 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 390 | " (relu): ReLU(inplace)\n", 391 | " )\n", 392 | " (3): Bottleneck(\n", 393 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 394 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 395 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 396 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 397 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 398 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 399 | " (relu): ReLU(inplace)\n", 400 | " )\n", 401 | " (4): Bottleneck(\n", 402 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 403 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 404 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 405 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 406 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 407 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 408 | " (relu): ReLU(inplace)\n", 409 | " )\n", 410 | " (5): Bottleneck(\n", 411 | " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 412 | " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 413 | " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 414 | " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 415 | " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 416 | " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 417 | " (relu): ReLU(inplace)\n", 418 | " )\n", 419 | " )\n", 420 | " (layer4): Sequential(\n", 421 | " (0): Bottleneck(\n", 422 | " (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 423 | " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 424 | " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", 425 | " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 426 | " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 427 | " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 428 | " (relu): ReLU(inplace)\n", 429 | " (downsample): Sequential(\n", 430 | " (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", 431 | " (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 432 | " )\n", 433 | " )\n", 434 | " (1): Bottleneck(\n", 435 | " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 436 | " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 437 | " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 438 | " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 439 | " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 440 | " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 441 | " (relu): ReLU(inplace)\n", 442 | " )\n", 443 | " (2): Bottleneck(\n", 444 | " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 445 | " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 446 | " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", 447 | " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 448 | " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", 449 | " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 450 | " (relu): ReLU(inplace)\n", 451 | " )\n", 452 | " )\n", 453 | " (avgpool): AvgPool2d(kernel_size=7, stride=1, padding=0)\n", 454 | " (fc): Sequential(\n", 455 | " (0): Linear(in_features=2048, out_features=512, bias=True)\n", 456 | " (1): ReLU()\n", 457 | " (2): Dropout(p=0.2)\n", 458 | " (3): Linear(in_features=512, out_features=10, bias=True)\n", 459 | " (4): LogSoftmax()\n", 460 | " )\n", 461 | ")" 462 | ] 463 | }, 464 | "execution_count": 4, 465 | "metadata": {}, 466 | "output_type": "execute_result" 467 | } 468 | ], 469 | "source": [ 470 | "# Freeze parameters so we don't backprop through them\n", 471 | "for param in model.parameters():\n", 472 | " param.requires_grad = False\n", 473 | " \n", 474 | "model.fc = nn.Sequential(nn.Linear(2048, 512),\n", 475 | " nn.ReLU(),\n", 476 | " nn.Dropout(0.2),\n", 477 | " nn.Linear(512, 10),\n", 478 | " nn.LogSoftmax(dim=1))\n", 479 | "criterion = nn.NLLLoss()\n", 480 | "optimizer = optim.Adam(model.fc.parameters(), lr=0.003)\n", 481 | "model.to(device)" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 5, 487 | "metadata": {}, 488 | "outputs": [ 489 | { 490 | "name": "stdout", 491 | "output_type": "stream", 492 | "text": [ 493 | "Epoch 1/1.. Train loss: 2.624.. Test loss: 1.574.. Test accuracy: 0.538\n", 494 | "Epoch 1/1.. Train loss: 0.946.. Test loss: 1.245.. Test accuracy: 0.649\n", 495 | "Epoch 1/1.. Train loss: 0.710.. Test loss: 1.130.. Test accuracy: 0.686\n", 496 | "Epoch 1/1.. Train loss: 0.553.. Test loss: 0.542.. Test accuracy: 0.838\n", 497 | "Epoch 1/1.. Train loss: 0.521.. Test loss: 0.447.. Test accuracy: 0.865\n", 498 | "Epoch 1/1.. Train loss: 0.513.. Test loss: 0.404.. Test accuracy: 0.877\n", 499 | "Epoch 1/1.. Train loss: 0.431.. Test loss: 0.399.. Test accuracy: 0.872\n", 500 | "Epoch 1/1.. Train loss: 0.582.. Test loss: 0.370.. Test accuracy: 0.879\n", 501 | "Epoch 1/1.. Train loss: 0.463.. Test loss: 0.419.. Test accuracy: 0.879\n", 502 | "Epoch 1/1.. Train loss: 0.452.. Test loss: 0.367.. Test accuracy: 0.887\n", 503 | "Epoch 1/1.. Train loss: 0.426.. Test loss: 0.377.. Test accuracy: 0.878\n", 504 | "Epoch 1/1.. Train loss: 0.396.. Test loss: 0.414.. Test accuracy: 0.864\n", 505 | "Epoch 1/1.. Train loss: 0.432.. Test loss: 0.335.. Test accuracy: 0.889\n", 506 | "Epoch 1/1.. Train loss: 0.460.. Test loss: 0.357.. Test accuracy: 0.885\n", 507 | "Epoch 1/1.. Train loss: 0.421.. Test loss: 0.349.. Test accuracy: 0.881\n", 508 | "Epoch 1/1.. Train loss: 0.352.. Test loss: 0.320.. Test accuracy: 0.892\n", 509 | "Epoch 1/1.. Train loss: 0.405.. Test loss: 0.310.. Test accuracy: 0.899\n", 510 | "Epoch 1/1.. Train loss: 0.339.. Test loss: 0.325.. Test accuracy: 0.897\n", 511 | "Epoch 1/1.. Train loss: 0.392.. Test loss: 0.353.. Test accuracy: 0.891\n", 512 | "Epoch 1/1.. Train loss: 0.419.. Test loss: 0.338.. Test accuracy: 0.884\n", 513 | "Epoch 1/1.. Train loss: 0.381.. Test loss: 0.456.. Test accuracy: 0.871\n", 514 | "Epoch 1/1.. Train loss: 0.378.. Test loss: 0.301.. Test accuracy: 0.897\n", 515 | "Epoch 1/1.. Train loss: 0.316.. Test loss: 0.331.. Test accuracy: 0.899\n", 516 | "Epoch 1/1.. Train loss: 0.351.. Test loss: 0.327.. Test accuracy: 0.890\n", 517 | "Epoch 1/1.. Train loss: 0.319.. Test loss: 0.319.. Test accuracy: 0.892\n", 518 | "Epoch 1/1.. Train loss: 0.423.. Test loss: 0.341.. Test accuracy: 0.890\n" 519 | ] 520 | } 521 | ], 522 | "source": [ 523 | "epochs = 1\n", 524 | "steps = 0\n", 525 | "running_loss = 0\n", 526 | "print_every = 10\n", 527 | "train_losses, test_losses = [], []\n", 528 | "\n", 529 | "for epoch in range(epochs):\n", 530 | " for inputs, labels in trainloader:\n", 531 | " steps += 1\n", 532 | " inputs, labels = inputs.to(device), labels.to(device)\n", 533 | " optimizer.zero_grad()\n", 534 | " logps = model.forward(inputs)\n", 535 | " loss = criterion(logps, labels)\n", 536 | " loss.backward()\n", 537 | " optimizer.step()\n", 538 | " running_loss += loss.item()\n", 539 | " \n", 540 | " if steps % print_every == 0:\n", 541 | " test_loss = 0\n", 542 | " accuracy = 0\n", 543 | " model.eval()\n", 544 | " with torch.no_grad():\n", 545 | " for inputs, labels in testloader:\n", 546 | " inputs, labels = inputs.to(device), labels.to(device)\n", 547 | " logps = model.forward(inputs)\n", 548 | " batch_loss = criterion(logps, labels)\n", 549 | " test_loss += batch_loss.item()\n", 550 | " \n", 551 | " ps = torch.exp(logps)\n", 552 | " top_p, top_class = ps.topk(1, dim=1)\n", 553 | " equals = top_class == labels.view(*top_class.shape)\n", 554 | " accuracy += torch.mean(equals.type(torch.FloatTensor)).item()\n", 555 | "\n", 556 | " train_losses.append(running_loss/len(trainloader))\n", 557 | " test_losses.append(test_loss/len(testloader)) \n", 558 | " print(f\"Epoch {epoch+1}/{epochs}.. \"\n", 559 | " f\"Train loss: {running_loss/print_every:.3f}.. \"\n", 560 | " f\"Test loss: {test_loss/len(testloader):.3f}.. \"\n", 561 | " f\"Test accuracy: {accuracy/len(testloader):.3f}\")\n", 562 | " running_loss = 0\n", 563 | " model.train()" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 7, 569 | "metadata": {}, 570 | "outputs": [ 571 | { 572 | "data": { 573 | "image/png": "\n", 574 | "text/plain": [ 575 | "
" 576 | ] 577 | }, 578 | "metadata": { 579 | "image/png": { 580 | "height": 250, 581 | "width": 373 582 | }, 583 | "needs_background": "light" 584 | }, 585 | "output_type": "display_data" 586 | } 587 | ], 588 | "source": [ 589 | "plt.plot(train_losses, label='Training loss')\n", 590 | "plt.plot(test_losses, label='Validation loss')\n", 591 | "plt.legend(frameon=False)\n", 592 | "plt.show()" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [] 601 | } 602 | ], 603 | "metadata": { 604 | "kernelspec": { 605 | "display_name": "Python 3", 606 | "language": "python", 607 | "name": "python3" 608 | }, 609 | "language_info": { 610 | "codemirror_mode": { 611 | "name": "ipython", 612 | "version": 3 613 | }, 614 | "file_extension": ".py", 615 | "mimetype": "text/x-python", 616 | "name": "python", 617 | "nbconvert_exporter": "python", 618 | "pygments_lexer": "ipython3", 619 | "version": "3.6.5" 620 | } 621 | }, 622 | "nbformat": 4, 623 | "nbformat_minor": 2 624 | } 625 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Full story: 2 | https://towardsdatascience.com/how-to-train-an-image-classifier-in-pytorch-and-use-it-to-perform-basic-inference-on-single-images-99465a1e9bf5 3 | 4 | 5 | --------------------------------------------------------------------------------