├── 01-download-and-prepare-mapAI.ipynb ├── 02-mapai-unet-regularization.ipynb ├── 03-mapai-unet-former-regularization.ipynb ├── 04-mapai-ft-unet-former-regularization.ipynb ├── 05-mapai-dcswin-regularization.ipynb ├── 06-evaluate.ipynb ├── 07-vectorize-building-footprint.ipynb ├── DATASET └── info.txt ├── README.md ├── models ├── DCSwin_model.py ├── FTUNetFormer_model.py ├── UNetFormer_model.py └── __pycache__ │ ├── DCSwin_model.cpython-310.pyc │ ├── FTUNetFormer_model.cpython-310.pyc │ └── UNetFormer_model.cpython-310.pyc ├── plots ├── compare-1.png ├── compare-10.png ├── compare-100.png ├── compare-1000.png ├── compare-1100.png ├── compare-1200.png ├── compare-2.png ├── compare-22.png ├── compare-250.png ├── compare-3.png ├── compare-33.png ├── compare-4.png ├── compare-5.png ├── compare-500.png ├── compare-600.png ├── compare-750.png ├── compare-800.png ├── compare-900.png ├── dcswin-25-epochs.png ├── ft-unet-former-25-epochs.png ├── unet-25-epochs.png └── unet-former-25-epochs.png ├── predictions ├── bergen_-5943_1104.tif ├── bergen_-5944_1104.tif ├── bergen_-5948_1107.tif ├── kristiansand_-4712_-1562.tif ├── kristiansand_-4712_-1563.tif ├── kristiansand_-4712_-1568.tif ├── oslo_-3133_244.tif ├── tromso_923_11083.tif ├── tromso_923_11084.tif └── tromso_923_11086.tif ├── projectRegularization ├── INFO.txt ├── LICENSE ├── README.md ├── README.png ├── __pycache__ │ ├── crf_loss.cpython-310.pyc │ ├── data_loader_gan.cpython-310.pyc │ ├── models.cpython-310.pyc │ ├── training_utils.cpython-310.pyc │ └── variables.cpython-310.pyc ├── crf_loss.py ├── data_loader_gan.py ├── gdal ├── models.py ├── regularize.py ├── train_gan_net.py ├── training_utils.py └── variables.py ├── regularizations ├── bergen_-5943_1104.tif ├── bergen_-5944_1104.tif ├── bergen_-5948_1107.tif ├── kristiansand_-4712_-1562.tif ├── kristiansand_-4712_-1563.tif ├── kristiansand_-4712_-1568.tif ├── oslo_-3133_244.tif ├── tromso_923_11083.tif ├── tromso_923_11084.tif └── tromso_923_11086.tif ├── requirements.txt └── trained_models └── Link_to_download_trained_models.txt /03-mapai-unet-former-regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": { 7 | "id": "1KJnP7zSGQg5" 8 | }, 9 | "source": [ 10 | "## Binary semantic segmentation example using U-Net-Former\n", 11 | "Preparation of dataset and model training code from here:\n", 12 | "\n", 13 | "https://pyimagesearch.com/2021/11/08/u-net-training-image-segmentation-models-in-pytorch/" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "colab": { 21 | "base_uri": "https://localhost:8080/" 22 | }, 23 | "id": "cFxHJWmXlcZk", 24 | "outputId": "b755d1a8-3650-42d9-8718-401b4458f049" 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import os\n", 29 | "import glob\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "\n", 32 | "import torch\n", 33 | "import torchvision\n", 34 | "from tqdm import tqdm\n", 35 | "\n", 36 | "print(torch.__version__)\n", 37 | "print(torchvision.__version__)\n", 38 | "\n", 39 | "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 40 | "print(DEVICE)\n", 41 | "\n", 42 | "# determine if we will be pinning memory during data loading\n", 43 | "PIN_MEMORY = True if DEVICE == \"cuda\" else False" 44 | ] 45 | }, 46 | { 47 | "attachments": {}, 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "id": "KVfaGZrWG63Q" 51 | }, 52 | "source": [ 53 | "#### CONFIGURE PATHS AND HYPERPARAMETERS FOR TRAINING BELOW." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "colab": { 61 | "base_uri": "https://localhost:8080/", 62 | "height": 235 63 | }, 64 | "id": "OjlBC-raVM2K", 65 | "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745" 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "GD_PATH = os.getcwd() # get current working directory for the repo\n", 70 | "print(GD_PATH)\n", 71 | "\n", 72 | "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n", 73 | "DATASET_PATH = \"/home/shymon/datasets/\"\n", 74 | "\n", 75 | "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n", 76 | "\n", 77 | "print(DATASET_PATH)\n", 78 | "\n", 79 | "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n", 80 | "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n", 81 | "\n", 82 | "print(TRAIN_IMG_DIR)\n", 83 | "print(TRAIN_MASK_DIR)\n", 84 | "\n", 85 | "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n", 86 | "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n", 87 | "\n", 88 | "print(VAL_IMG_DIR)\n", 89 | "print(VAL_MASK_DIR)\n", 90 | "\n", 91 | "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n", 92 | "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n", 93 | "\n", 94 | "print(TEST_IMG_DIR)\n", 95 | "print(TEST_MASK_DIR)\n", 96 | "\n", 97 | "# CONFIGURE MapAI DATASET\n", 98 | "NUM_CHANNELS = 3\n", 99 | "NUM_LEVELS = 3\n", 100 | "NUM_CLASSES = 1\n", 101 | "\n", 102 | "# IMAGE SHAPE\n", 103 | "IMG_WIDTH = 512\n", 104 | "IMG_HEIGHT = 512\n", 105 | "\n", 106 | "#---------------------------------------------------------------------------------------------------#\n", 107 | "\n", 108 | "# CONFIGURE parameters for training\n", 109 | "EPOCHS = 25\n", 110 | "init_lr = 1e-4 # learning rate\n", 111 | "BATCH_SIZE = 2\n", 112 | "\n", 113 | "THRESHOLD = 0.5\n", 114 | "base_output = \"out\"\n", 115 | "\n", 116 | "model_name = \"unet-former-25-epochs.pth\" # provide name for model\n", 117 | "training_plot_name = \"unet-former-25-epochs.png\"\n", 118 | "\n", 119 | "#---------------------------------------------------------------------------------------------------#\n", 120 | "\n", 121 | "# OUTPUT PATHS\n", 122 | "\n", 123 | "# Trained model path\n", 124 | "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n", 125 | "print(MODEL_PATH)\n", 126 | "PLOT_PATH = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n", 127 | "print(PLOT_PATH)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": { 133 | "id": "IfSMUZbWWdJn" 134 | }, 135 | "source": [ 136 | "### Load and read the MapAI dataset" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "id": "TPiACQ_6VyQP" 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "import tifffile\n", 148 | "from torch.utils.data import Dataset\n", 149 | "import cv2\n", 150 | "\n", 151 | "\n", 152 | "class mapAIdataset(Dataset):\n", 153 | " def __init__(self, imagePaths, maskPaths, transforms):\n", 154 | " # store the image and mask filepaths, and augmentation\n", 155 | " # transforms\n", 156 | " self.imagePaths = imagePaths\n", 157 | " self.maskPaths = maskPaths\n", 158 | " self.transforms = transforms\n", 159 | " \n", 160 | " def __len__(self):\n", 161 | " # return the number of total samples contained in the dataset\n", 162 | " return len(self.imagePaths)\n", 163 | " \n", 164 | " def __getitem__(self, idx):\n", 165 | " # grab the image path from the current index\n", 166 | " imagePath = self.imagePaths[idx]\n", 167 | " # load the image from disk, swap its channels from BGR to RGB,\n", 168 | " # and read the associated mask from disk\n", 169 | " image = cv2.imread(imagePath)\n", 170 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 171 | " mask = tifffile.imread(self.maskPaths[idx])\n", 172 | " # convert the mask to a float32 tensor with values in the range [0, 1]\n", 173 | " mask = mask.astype('float32')\n", 174 | " # check to see if we are applying any transformations\n", 175 | " if self.transforms is not None:\n", 176 | " # apply the transformations to both image and its mask\n", 177 | " image = self.transforms(image)\n", 178 | " mask = self.transforms(mask)\n", 179 | " \n", 180 | " # return a tuple of the image and its mask\n", 181 | " return (image, mask)" 182 | ] 183 | }, 184 | { 185 | "attachments": {}, 186 | "cell_type": "markdown", 187 | "metadata": { 188 | "id": "AKXL9bO8WnNg" 189 | }, 190 | "source": [ 191 | "### U-Net Former architecture\n", 192 | "\n", 193 | "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/UNetFormer.py\n", 194 | "saved into UNetFormer.py file, from where we import the model." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "id": "9urE3W1iWp7v" 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "import sys\n", 206 | "subfolder = os.path.join(GD_PATH, \"models\")\n", 207 | "sys.path.insert(0, subfolder)\n", 208 | "\n", 209 | "import UNetFormer_model" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": { 215 | "id": "22hbANvfWxmX" 216 | }, 217 | "source": [ 218 | "### Training the segmentation model\n", 219 | "Below we append the paths for TRAIN/VAL/TEST sets - images/masks." 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "colab": { 227 | "base_uri": "https://localhost:8080/" 228 | }, 229 | "id": "G2Jha-LCW0ir", 230 | "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2" 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "from torch.nn import BCEWithLogitsLoss\n", 235 | "from torch.optim import Adam\n", 236 | "from torch.utils.data import DataLoader\n", 237 | "from imutils import paths\n", 238 | "import time\n", 239 | "\n", 240 | "# TRAINING\n", 241 | "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n", 242 | "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n", 243 | "\n", 244 | "# VALIDATION\n", 245 | "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n", 246 | "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n", 247 | "\n", 248 | "\n", 249 | "# TEST\n", 250 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 251 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": { 257 | "id": "gtqUNGR1XCa5" 258 | }, 259 | "source": [ 260 | "### Define transformations\n", 261 | "\n", 262 | "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques." 263 | ] 264 | }, 265 | { 266 | "attachments": {}, 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "id": "ghW7Nj0OEQMc" 270 | }, 271 | "source": [ 272 | "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n", 273 | "\n", 274 | "https://albumentations.ai/docs/getting_started/mask_augmentation/" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | }, 284 | "id": "WR_dzdpCXCHY", 285 | "outputId": "4e9b1681-2846-489f-edf6-a6240af65563" 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "import torchvision.transforms as T\n", 290 | "\n", 291 | "# T.RandomHorizontalFlip(p=0.5),\n", 292 | "# T.RandomVerticalFlip(p=0.1),\n", 293 | "\n", 294 | "# Image augmentations applied\n", 295 | "transforms = T.Compose([T.ToPILImage(),\n", 296 | " T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n", 297 | " T.ToTensor()])\n", 298 | "\n", 299 | "# create the train and test datasets\n", 300 | "trainDS = mapAIdataset(imagePaths=train_images,\n", 301 | " maskPaths=train_masks,\n", 302 | " transforms=transforms)\n", 303 | "\n", 304 | "valDS = mapAIdataset(imagePaths=val_images,\n", 305 | " maskPaths=val_masks,\n", 306 | " transforms=transforms)\n", 307 | "\n", 308 | "testDS = mapAIdataset(imagePaths=test_images,\n", 309 | " maskPaths=test_masks,\n", 310 | " transforms=transforms)\n", 311 | "\n", 312 | "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n", 313 | "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n", 314 | "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n", 315 | "\n", 316 | "# create the training and test data loaders\n", 317 | "trainLoader = DataLoader(trainDS,\n", 318 | " shuffle=True,\n", 319 | " batch_size=BATCH_SIZE,\n", 320 | " pin_memory=PIN_MEMORY,\n", 321 | " num_workers=os.cpu_count())\n", 322 | "\n", 323 | "valLoader = DataLoader(valDS,\n", 324 | " shuffle=False,\n", 325 | " batch_size=BATCH_SIZE,\n", 326 | " pin_memory=PIN_MEMORY,\n", 327 | " num_workers=os.cpu_count())\n", 328 | "\n", 329 | "testLoader = DataLoader(testDS,\n", 330 | " shuffle=False,\n", 331 | " batch_size=BATCH_SIZE,\n", 332 | " pin_memory=PIN_MEMORY,\n", 333 | " num_workers=os.cpu_count())" 334 | ] 335 | }, 336 | { 337 | "attachments": {}, 338 | "cell_type": "markdown", 339 | "metadata": { 340 | "id": "tAO9M_R4XG6q" 341 | }, 342 | "source": [ 343 | "### Initialize UNET-FORMER model for training" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": { 350 | "colab": { 351 | "base_uri": "https://localhost:8080/" 352 | }, 353 | "id": "2IMsYzUaXJW7", 354 | "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315" 355 | }, 356 | "outputs": [], 357 | "source": [ 358 | "model = UNetFormer_model.UNetFormer().to(DEVICE)\n", 359 | "\n", 360 | "# loss / optimizer\n", 361 | "lossFunction = BCEWithLogitsLoss()\n", 362 | "opt = Adam(model.parameters(), lr=init_lr, weight_decay=0.001)\n", 363 | "\n", 364 | "# calculate steps per epoch for train/val/test\n", 365 | "trainSteps = len(trainDS) // BATCH_SIZE \n", 366 | "valSteps = len(valDS) // BATCH_SIZE\n", 367 | "testSteps = len(testDS) // BATCH_SIZE\n", 368 | "\n", 369 | "print(trainSteps, valSteps, testSteps)\n", 370 | "\n", 371 | "# initialize a dictionary to store training history\n", 372 | "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n", 373 | "H" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "id": "WEP-IVokbWQg" 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "torch.cuda.empty_cache()" 385 | ] 386 | }, 387 | { 388 | "attachments": {}, 389 | "cell_type": "markdown", 390 | "metadata": { 391 | "id": "xcjuKhMeXLU-" 392 | }, 393 | "source": [ 394 | "### TRAINING THE MODEL\n", 395 | "\n", 396 | "Run this piece of code only if you want to train the model from scratch.\n", 397 | "\n", 398 | "Training locally: BATCH_SIZE = 2 takes 5035 MB of GPU memory.\n", 399 | "\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "colab": { 407 | "base_uri": "https://localhost:8080/" 408 | }, 409 | "id": "vWuUyLUgXPNf", 410 | "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1" 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "# loop over epochs\n", 415 | "print(\"[INFO] training UNET-FORMER ...\")\n", 416 | "startTime = time.time()\n", 417 | "\n", 418 | "for epoch in tqdm(range(EPOCHS)):\n", 419 | " model.train()\n", 420 | "\n", 421 | " # initialize total training and validation loss\n", 422 | " totalTrainLoss = 0\n", 423 | " totalValLoss = 0\n", 424 | " totalTrainAcc = 0\n", 425 | " totalValAcc = 0\n", 426 | "\n", 427 | " # loop over the training set\n", 428 | " for (i, (x, y)) in enumerate(trainLoader):\n", 429 | " # send output to device\n", 430 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 431 | "\n", 432 | " # perform a forward pass and calculate the training loss\n", 433 | " pred = model(x)\n", 434 | " loss = lossFunction(pred, y)\n", 435 | "\n", 436 | " # calculate the accuracy\n", 437 | " acc = ((pred > 0.5) == y).float().mean()\n", 438 | "\n", 439 | " # kill previously accumulated gradients then\n", 440 | " # perform backpropagation and update model parameters\n", 441 | " opt.zero_grad()\n", 442 | " loss.backward()\n", 443 | " opt.step()\n", 444 | "\n", 445 | " # add the loss and accuracy to the total training loss and accuracy\n", 446 | " totalTrainLoss += loss\n", 447 | " totalTrainAcc += acc\n", 448 | "\n", 449 | " # switch of autograd\n", 450 | " with torch.no_grad():\n", 451 | " # set the model in evaluation mode\n", 452 | " model.eval()\n", 453 | "\n", 454 | " # loop over the validation set\n", 455 | " for (x, y) in valLoader:\n", 456 | " # send the input to the device\n", 457 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 458 | "\n", 459 | " # make the predictions and calculate the validation loss\n", 460 | " pred = model(x)\n", 461 | " loss = lossFunction(pred, y)\n", 462 | "\n", 463 | " # calculate the accuracy\n", 464 | " acc = ((pred > 0.5) == y).float().mean()\n", 465 | "\n", 466 | " # add the loss and accuracy to the total validation loss and accuracy\n", 467 | " totalValLoss += loss\n", 468 | " totalValAcc += acc\n", 469 | "\n", 470 | " # calculate the average training and validation loss and accuracy\n", 471 | " avgTrainLoss = totalTrainLoss / trainSteps\n", 472 | " avgValLoss = totalValLoss / valSteps\n", 473 | " avgTrainAcc = totalTrainAcc / trainSteps\n", 474 | " avgValAcc = totalValAcc / valSteps\n", 475 | " \n", 476 | " # update our training history\n", 477 | " H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n", 478 | " H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n", 479 | "\n", 480 | " # print the model training and validation information\n", 481 | " print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n", 482 | " print(\"Train loss: {:.6f}, Train acc: {:.6f}, Val loss: {:.4f}, Val acc: {:.4f}\".format(\n", 483 | " avgTrainLoss, avgTrainAcc, avgValLoss, avgValAcc))\n", 484 | " \n", 485 | "# display the total time needed to perform the training\n", 486 | "endTime = time.time()\n", 487 | "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))\n", 488 | " " 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": { 495 | "colab": { 496 | "base_uri": "https://localhost:8080/" 497 | }, 498 | "id": "CsJoOVn11rs9", 499 | "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc" 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "H # show traning/val loss history" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": { 509 | "id": "U6ChLXHuXZHA" 510 | }, 511 | "source": [ 512 | "### Plot the training and validation loss" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": { 519 | "colab": { 520 | "base_uri": "https://localhost:8080/", 521 | "height": 316 522 | }, 523 | "id": "j04HfubrXYvX", 524 | "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50" 525 | }, 526 | "outputs": [], 527 | "source": [ 528 | "# plot the training loss\n", 529 | "print(MODEL_PATH)\n", 530 | "print(PLOT_PATH)\n", 531 | "\n", 532 | "plt.style.use(\"ggplot\")\n", 533 | "plt.figure()\n", 534 | "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n", 535 | "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n", 536 | "plt.title(\"Training Loss on Dataset\")\n", 537 | "plt.xlabel(\"Epoch #\")\n", 538 | "plt.ylabel(\"Loss\")\n", 539 | "plt.legend(loc=\"lower left\")\n", 540 | "plt.savefig(PLOT_PATH)\n", 541 | "# serialize the model to disk\n", 542 | "torch.save(model, MODEL_PATH) # saves the model" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": { 548 | "id": "5Y6Fx2oaWr0q" 549 | }, 550 | "source": [ 551 | "### Prediction part\n", 552 | "\n", 553 | "Here the trained model is loaded and use for prediction on test images." 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": null, 559 | "metadata": { 560 | "colab": { 561 | "base_uri": "https://localhost:8080/" 562 | }, 563 | "id": "qYh4flMu7O-m", 564 | "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd" 565 | }, 566 | "outputs": [], 567 | "source": [ 568 | "# Load saved model for prediction\n", 569 | "\n", 570 | "print(MODEL_PATH)\n", 571 | "\n", 572 | "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n", 573 | "print(\"model loaded for prediction\")\n", 574 | "\n", 575 | "model" 576 | ] 577 | }, 578 | { 579 | "attachments": {}, 580 | "cell_type": "markdown", 581 | "metadata": {}, 582 | "source": [ 583 | "#### Provide test images for MapAI Dataset" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": null, 589 | "metadata": {}, 590 | "outputs": [], 591 | "source": [ 592 | "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n", 593 | "PREDICTIONS_DIR" 594 | ] 595 | }, 596 | { 597 | "attachments": {}, 598 | "cell_type": "markdown", 599 | "metadata": {}, 600 | "source": [ 601 | "#### Make predictions on the entire MapAI dataset\n", 602 | "\n", 603 | "Make predictions on test images and save them to the folder named predictions." 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": { 610 | "colab": { 611 | "base_uri": "https://localhost:8080/" 612 | }, 613 | "id": "bq7BlbdrcgPB", 614 | "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1" 615 | }, 616 | "outputs": [], 617 | "source": [ 618 | "import random\n", 619 | "import gc\n", 620 | "from pathlib import Path\n", 621 | "import numpy as np\n", 622 | "from PIL import Image\n", 623 | "\n", 624 | "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n", 625 | "\n", 626 | "# Output folder for the predictions\n", 627 | "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n", 628 | "\n", 629 | "# PLOT TEST IMAGES as RGB\n", 630 | "for n in range(len(test_images)):\n", 631 | " gc.collect()\n", 632 | " # Test image number\n", 633 | " testImgName = str(Path(test_images[n]).stem) + '.tif'\n", 634 | " #print('#', testImgName)\n", 635 | "\n", 636 | " # Make predicton on a test image specified with counter n\n", 637 | " test_img = test_images[n]\n", 638 | " test_img_input = np.expand_dims(test_img, 0)\n", 639 | " #print('#', test_img_input[0])\n", 640 | "\n", 641 | " # PyTorch --> works\n", 642 | " model.eval()\n", 643 | " with torch.no_grad():\n", 644 | " image = cv2.imread(test_img_input[0])\n", 645 | " image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n", 646 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 647 | " image = image.astype(\"float32\") / 255\n", 648 | " \n", 649 | " # print('SIZE: ', image.shape)\n", 650 | "\n", 651 | " # make the channel axis to be the leading one, add batch dimension\n", 652 | " image = np.transpose(image, (2, 0, 1))\n", 653 | " # create a PyTorch tensor\n", 654 | " image = np.expand_dims(image, 0)\n", 655 | " # flash the tensor to the device\n", 656 | " image = torch.from_numpy(image).to(DEVICE)\n", 657 | "\n", 658 | " # make the prediction\n", 659 | " predMask = model(image).squeeze()\n", 660 | " # pass result through sigmoid\n", 661 | " predMask = torch.sigmoid(predMask)\n", 662 | "\n", 663 | " # convert result to numpy array\n", 664 | " predMask = predMask.cpu().numpy()\n", 665 | "\n", 666 | " # filter out the weak predictions and convert them to integers\n", 667 | " predMask = (predMask > THRESHOLD) * 255\n", 668 | " predMask = predMask.astype(np.uint8)\n", 669 | "\n", 670 | " # generate image from array\n", 671 | " pIMG = Image.fromarray(predMask)\n", 672 | " pIMG.save(str(output_folder + testImgName))\n", 673 | "\n", 674 | " print('Prediction:', testImgName, 'saved to:', output_folder)" 675 | ] 676 | }, 677 | { 678 | "attachments": {}, 679 | "cell_type": "markdown", 680 | "metadata": {}, 681 | "source": [ 682 | "#### Make predictions on single images by choice\n", 683 | "\n", 684 | "Change the parameter n to choose which image to plot." 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "# ----------------------------------------------------------------------\n", 694 | "\n", 695 | "predictions = glob.glob(output_folder + \"*.tif\")\n", 696 | "predictions.sort()\n", 697 | "print(\"# IMAGES for prediction: \", len(predictions))\n", 698 | "print(\"Choosen n can be from 0 o 1367! \")\n", 699 | "\n", 700 | "# ----------------------------------------------------------------------\n", 701 | "\n", 702 | "n = 900 # change this number depending on which image you want to test\n", 703 | "\n", 704 | "fig = plt.figure(figsize=(18,12))\n", 705 | "ax1 = fig.add_subplot(131)\n", 706 | "\n", 707 | "ax1.set_title('RGB image: ')\n", 708 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 709 | "ax1.imshow(image)\n", 710 | "ax1.set_axis_off()\n", 711 | "\n", 712 | "ax2 = fig.add_subplot(132)\n", 713 | "ax2.set_title('Ground truth: ')\n", 714 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 715 | "image *= 255\n", 716 | "ax2.imshow(image)\n", 717 | "ax2.set_axis_off()\n", 718 | "\n", 719 | "ax3 = fig.add_subplot(133)\n", 720 | "ax3.set_title('Prediction: ')\n", 721 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 722 | "ax3.imshow(image)\n", 723 | "ax3.set_axis_off()" 724 | ] 725 | }, 726 | { 727 | "attachments": {}, 728 | "cell_type": "markdown", 729 | "metadata": { 730 | "id": "Tg_0qxbcjzfw" 731 | }, 732 | "source": [ 733 | "### BUILDING FOOTPRINT REGULARIZATION\n", 734 | "\n", 735 | "Used repo: https://github.com/zorzi-s/projectRegularization\n", 736 | "\n", 737 | "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n", 738 | "\n", 739 | "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n", 740 | "\n", 741 | "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu" 742 | ] 743 | }, 744 | { 745 | "attachments": {}, 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows." 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": { 756 | "id": "yp8uKrNUjyGn" 757 | }, 758 | "outputs": [], 759 | "source": [ 760 | "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n", 761 | "\n", 762 | "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n", 763 | "print(projectRegDir)\n", 764 | "\n", 765 | "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n", 766 | "print(ptw)\n", 767 | "\n", 768 | "# OUTPUT REGULARIZATIONS DIR\n", 769 | "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n", 770 | "print(REGULARIZATION_DIR)\n", 771 | "\n", 772 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 773 | "ENCODER = os.path.join(ptw, \"E140000_e1\")\n", 774 | "GENERATOR = os.path.join(ptw, \"E140000_net\")\n", 775 | "\n", 776 | "print(ENCODER)\n", 777 | "print(GENERATOR)" 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": null, 783 | "metadata": {}, 784 | "outputs": [], 785 | "source": [ 786 | "# CREATE A NEW variables.py WITH USERS PATHS\n", 787 | "\n", 788 | "with open(projectRegDir + 'variables.py', 'w') as f:\n", 789 | " f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n", 790 | " f.write('# TRAINING \\n')\n", 791 | " f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 792 | " f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n", 793 | " f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 794 | " f.write('\\n')\n", 795 | " f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n", 796 | " f.write('\\n')\n", 797 | " f.write('# INFERENCE \\n')\n", 798 | " f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 799 | " f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 800 | " f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n", 801 | " f.write('\\n')\n", 802 | " f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n", 803 | " f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n", 804 | " f.close()\n", 805 | " \n", 806 | "print(\"variables.py created with users paths...\")\n" 807 | ] 808 | }, 809 | { 810 | "attachments": {}, 811 | "cell_type": "markdown", 812 | "metadata": {}, 813 | "source": [ 814 | "#### Run projectRegularization\n", 815 | "\n", 816 | "Takes around 6-8 minutes.\n", 817 | "\n", 818 | "You only need to change the command below and replace it with the absolute path for regularize.py" 819 | ] 820 | }, 821 | { 822 | "cell_type": "code", 823 | "execution_count": null, 824 | "metadata": {}, 825 | "outputs": [], 826 | "source": [ 827 | "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py" 828 | ] 829 | }, 830 | { 831 | "attachments": {}, 832 | "cell_type": "markdown", 833 | "metadata": {}, 834 | "source": [ 835 | "### Compare predictions and regularizations on a single image" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "metadata": { 842 | "colab": { 843 | "base_uri": "https://localhost:8080/" 844 | }, 845 | "id": "89nW6Q7F6aga", 846 | "outputId": "976d62a2-76a7-4b52-a4bc-218f63d8a122" 847 | }, 848 | "outputs": [], 849 | "source": [ 850 | "# Read Regularizations to plot and compare results\n", 851 | "\n", 852 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 853 | "regularizations.sort()\n", 854 | "\n", 855 | "print(\"# of predicted images: \", len(predictions))\n", 856 | "print(\"# of regularized images: \", len(regularizations))" 857 | ] 858 | }, 859 | { 860 | "attachments": {}, 861 | "cell_type": "markdown", 862 | "metadata": {}, 863 | "source": [ 864 | "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n", 865 | "\n", 866 | "Change parameter n accordingly." 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": null, 872 | "metadata": {}, 873 | "outputs": [], 874 | "source": [ 875 | "n = 600\n", 876 | "\n", 877 | "fig = plt.figure(figsize=(18,12))\n", 878 | "ax1 = fig.add_subplot(141)\n", 879 | "\n", 880 | "ax1.set_title('RGB: ')\n", 881 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 882 | "ax1.imshow(image)\n", 883 | "ax1.set_axis_off()\n", 884 | "\n", 885 | "ax2 = fig.add_subplot(142)\n", 886 | "ax2.set_title('Ground truth: ')\n", 887 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 888 | "image *= 255\n", 889 | "ax2.imshow(image)\n", 890 | "ax2.set_axis_off()\n", 891 | "\n", 892 | "ax3 = fig.add_subplot(143)\n", 893 | "ax3.set_title('Prediction: ')\n", 894 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 895 | "ax3.imshow(image)\n", 896 | "ax3.set_axis_off()\n", 897 | "\n", 898 | "ax4 = fig.add_subplot(144)\n", 899 | "ax4.set_title('Regularization: ')\n", 900 | "image = cv2.imread(regularizations[n])[:,:,::-1]\n", 901 | "ax4.imshow(image)\n", 902 | "ax4.set_axis_off()\n", 903 | "\n", 904 | "# DEFINE PATH FOR PLOTS TO BE SAVED\n", 905 | "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n", 906 | "print(figPath)\n", 907 | "\n", 908 | "# Save plot\n", 909 | "fig.savefig(figPath)" 910 | ] 911 | }, 912 | { 913 | "attachments": {}, 914 | "cell_type": "markdown", 915 | "metadata": {}, 916 | "source": [ 917 | "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n", 918 | "\n", 919 | "GDAL: https://gdal.org/'\n", 920 | "\n", 921 | "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n", 922 | "\n", 923 | "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n", 924 | "\n", 925 | "On Ubuntu you have to follow these steps:\n", 926 | "\n", 927 | "\n", 928 | "\n", 929 | "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n", 930 | "\n", 931 | "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n", 932 | "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n", 933 | "- python -m pip install --upgrade pip setuptools wheel\n", 934 | "- python -m pip install --upgrade gdal\n", 935 | "- conda install -c conda forge libgdal\n", 936 | "- conda install -c conda-forge libgdal\n", 937 | "- conda install -c conda-forge gdal\n", 938 | "- conda install tiledb=2.2\n", 939 | "- conda install poppler\n", 940 | "\n", 941 | "When you have this you can hopefully vectorize the detected masks quite easily." 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": null, 947 | "metadata": {}, 948 | "outputs": [], 949 | "source": [ 950 | "def get_fname_from_path(path):\n", 951 | " \"\"\"\n", 952 | " Given a path, returns the filename after the last frontslash character.\n", 953 | " \"\"\"\n", 954 | " return path.rsplit('/', 1)[-1]\n", 955 | "\n", 956 | "def get_fname_no_extension(path):\n", 957 | " \"\"\"\n", 958 | " Given a path, returns the filename without its extension.\n", 959 | " \"\"\"\n", 960 | " filename, extension = os.path.splitext(path)\n", 961 | " return filename" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": null, 967 | "metadata": { 968 | "id": "TDWUhUkJaYl8" 969 | }, 970 | "outputs": [], 971 | "source": [ 972 | "import osgeo\n", 973 | "from osgeo import gdal\n", 974 | "from osgeo import ogr\n", 975 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 976 | "\n", 977 | "# Choose which image to vectorize\n", 978 | "n = 0\n", 979 | "\n", 980 | "input = regularizations[n]\n", 981 | "print()\n", 982 | "print(\"INPUT: \", input)\n", 983 | "\n", 984 | "# print(get_fname_no_extension(input))\n", 985 | "\n", 986 | "# out\n", 987 | "output = get_fname_from_path(get_fname_no_extension(input)) + \".gpkg\"\n", 988 | "print(\"OUTPUT: \", output)\n", 989 | "\n", 990 | "# Open image with GDAl driver\n", 991 | "ds = gdal.Open(input)\n", 992 | "# Get the band\n", 993 | "band = ds.GetRasterBand(1)\n", 994 | "\n", 995 | "# Create the output shapefile\n", 996 | "driver = ogr.GetDriverByName(\"GPKG\")\n", 997 | "out_ds = driver.CreateDataSource(output)\n", 998 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 999 | "\n", 1000 | "# Add a field to the layer to store the pixel values\n", 1001 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 1002 | "out_layer.CreateField(field_defn)\n", 1003 | "\n", 1004 | "# Polygonize the PNG file\n", 1005 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 1006 | "\n", 1007 | "# Close the input and output files\n", 1008 | "out_ds = None\n", 1009 | "ds = None" 1010 | ] 1011 | }, 1012 | { 1013 | "attachments": {}, 1014 | "cell_type": "markdown", 1015 | "metadata": {}, 1016 | "source": [ 1017 | "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below." 1018 | ] 1019 | }, 1020 | { 1021 | "cell_type": "code", 1022 | "execution_count": null, 1023 | "metadata": {}, 1024 | "outputs": [], 1025 | "source": [ 1026 | "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n", 1027 | "\n", 1028 | "# RUN from the command line inside Ubuntu\n", 1029 | "# Change name of input and output according to user needs\n", 1030 | "\n", 1031 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 1032 | ] 1033 | } 1034 | ], 1035 | "metadata": { 1036 | "accelerator": "TPU", 1037 | "colab": { 1038 | "provenance": [] 1039 | }, 1040 | "gpuClass": "premium", 1041 | "kernelspec": { 1042 | "display_name": "torch", 1043 | "language": "python", 1044 | "name": "python3" 1045 | }, 1046 | "language_info": { 1047 | "codemirror_mode": { 1048 | "name": "ipython", 1049 | "version": 3 1050 | }, 1051 | "file_extension": ".py", 1052 | "mimetype": "text/x-python", 1053 | "name": "python", 1054 | "nbconvert_exporter": "python", 1055 | "pygments_lexer": "ipython3", 1056 | "version": "3.10.9" 1057 | } 1058 | }, 1059 | "nbformat": 4, 1060 | "nbformat_minor": 0 1061 | } 1062 | -------------------------------------------------------------------------------- /04-mapai-ft-unet-former-regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": { 7 | "id": "1KJnP7zSGQg5" 8 | }, 9 | "source": [ 10 | "## Binary semantic segmentation example using FT-U-Net-Former\n", 11 | "Preparation of dataset and model training code from here:\n", 12 | "\n", 13 | "https://pyimagesearch.com/2021/11/08/u-net-training-image-segmentation-models-in-pytorch/" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "colab": { 21 | "base_uri": "https://localhost:8080/" 22 | }, 23 | "id": "cFxHJWmXlcZk", 24 | "outputId": "b755d1a8-3650-42d9-8718-401b4458f049" 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import os\n", 29 | "import glob\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "\n", 32 | "import torch\n", 33 | "import torchvision\n", 34 | "from tqdm import tqdm\n", 35 | "\n", 36 | "print(torch.__version__)\n", 37 | "print(torchvision.__version__)\n", 38 | "\n", 39 | "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 40 | "print(DEVICE)\n", 41 | "\n", 42 | "# determine if we will be pinning memory during data loading\n", 43 | "PIN_MEMORY = True if DEVICE == \"cuda\" else False" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "id": "KVfaGZrWG63Q" 50 | }, 51 | "source": [ 52 | "#### CONFIGURE YOUR PATHS AND HYPERPARAMETERS FOR TRAINING BELOW." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "colab": { 60 | "base_uri": "https://localhost:8080/", 61 | "height": 235 62 | }, 63 | "id": "OjlBC-raVM2K", 64 | "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745" 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "GD_PATH = os.getcwd() # get current working directory for the repo\n", 69 | "print(GD_PATH)\n", 70 | "\n", 71 | "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n", 72 | "DATASET_PATH = \"/home/shymon/datasets/\"\n", 73 | "\n", 74 | "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n", 75 | "\n", 76 | "print(DATASET_PATH)\n", 77 | "\n", 78 | "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n", 79 | "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n", 80 | "\n", 81 | "print(TRAIN_IMG_DIR)\n", 82 | "print(TRAIN_MASK_DIR)\n", 83 | "\n", 84 | "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n", 85 | "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n", 86 | "\n", 87 | "print(VAL_IMG_DIR)\n", 88 | "print(VAL_MASK_DIR)\n", 89 | "\n", 90 | "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n", 91 | "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n", 92 | "\n", 93 | "print(TEST_IMG_DIR)\n", 94 | "print(TEST_MASK_DIR)\n", 95 | "\n", 96 | "# CONFIGURE MapAI DATASET\n", 97 | "NUM_CHANNELS = 3\n", 98 | "NUM_LEVELS = 3\n", 99 | "NUM_CLASSES = 1\n", 100 | "\n", 101 | "# IMAGE SHAPE\n", 102 | "IMG_WIDTH = 512\n", 103 | "IMG_HEIGHT = 512\n", 104 | "\n", 105 | "#---------------------------------------------------------------------------------------------------#\n", 106 | "\n", 107 | "# CONFIGURE parameters for training\n", 108 | "EPOCHS = 25\n", 109 | "init_lr = 1e-4 # learning rate\n", 110 | "BATCH_SIZE = 2\n", 111 | "\n", 112 | "THRESHOLD = 0.5\n", 113 | "base_output = \"out\"\n", 114 | "\n", 115 | "model_name = \"ft-unet-former-25-epochs.pth\" # provide name for model\n", 116 | "training_plot_name = \"ft-unet-former-25-epochs.png\"\n", 117 | "\n", 118 | "#---------------------------------------------------------------------------------------------------#\n", 119 | "\n", 120 | "# OUTPUT PATHS\n", 121 | "\n", 122 | "# Trained model path\n", 123 | "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n", 124 | "print(MODEL_PATH)\n", 125 | "PLOT_PATH = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n", 126 | "print(PLOT_PATH)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "id": "IfSMUZbWWdJn" 133 | }, 134 | "source": [ 135 | "### Load and read the MapAI dataset" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "id": "TPiACQ_6VyQP" 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "import tifffile\n", 147 | "from torch.utils.data import Dataset\n", 148 | "import cv2\n", 149 | "\n", 150 | "\n", 151 | "class mapAIdataset(Dataset):\n", 152 | " def __init__(self, imagePaths, maskPaths, transforms):\n", 153 | " # store the image and mask filepaths, and augmentation\n", 154 | " # transforms\n", 155 | " self.imagePaths = imagePaths\n", 156 | " self.maskPaths = maskPaths\n", 157 | " self.transforms = transforms\n", 158 | " \n", 159 | " def __len__(self):\n", 160 | " # return the number of total samples contained in the dataset\n", 161 | " return len(self.imagePaths)\n", 162 | " \n", 163 | " def __getitem__(self, idx):\n", 164 | " # grab the image path from the current index\n", 165 | " imagePath = self.imagePaths[idx]\n", 166 | " # load the image from disk, swap its channels from BGR to RGB,\n", 167 | " # and read the associated mask from disk\n", 168 | " image = cv2.imread(imagePath)\n", 169 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 170 | " mask = tifffile.imread(self.maskPaths[idx])\n", 171 | " # convert the mask to a float32 tensor with values in the range [0, 1]\n", 172 | " mask = mask.astype('float32')\n", 173 | " # check to see if we are applying any transformations\n", 174 | " if self.transforms is not None:\n", 175 | " # apply the transformations to both image and its mask\n", 176 | " image = self.transforms(image)\n", 177 | " mask = self.transforms(mask)\n", 178 | " \n", 179 | " # return a tuple of the image and its mask\n", 180 | " return (image, mask)" 181 | ] 182 | }, 183 | { 184 | "attachments": {}, 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "id": "AKXL9bO8WnNg" 188 | }, 189 | "source": [ 190 | "### Build FT-UNET-FORMER architecture\n", 191 | "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/FTUNetFormer.py" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "id": "9urE3W1iWp7v" 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "import sys\n", 203 | "subfolder = os.path.join(GD_PATH, \"models\")\n", 204 | "sys.path.insert(0, subfolder)\n", 205 | "\n", 206 | "import FTUNetFormer_model" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "id": "22hbANvfWxmX" 213 | }, 214 | "source": [ 215 | "### Training the segmentation model\n", 216 | "Below we append the paths for TRAIN/VAL/TEST sets - images/masks." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "colab": { 224 | "base_uri": "https://localhost:8080/" 225 | }, 226 | "id": "G2Jha-LCW0ir", 227 | "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2" 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "from torch.nn import BCEWithLogitsLoss\n", 232 | "from torch.optim import Adam\n", 233 | "from torch.utils.data import DataLoader\n", 234 | "from imutils import paths\n", 235 | "import time\n", 236 | "\n", 237 | "# TRAINING\n", 238 | "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n", 239 | "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n", 240 | "\n", 241 | "# VALIDATION\n", 242 | "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n", 243 | "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n", 244 | "\n", 245 | "\n", 246 | "# TEST\n", 247 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 248 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": { 254 | "id": "gtqUNGR1XCa5" 255 | }, 256 | "source": [ 257 | "### Define transformations\n", 258 | "\n", 259 | "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques." 260 | ] 261 | }, 262 | { 263 | "attachments": {}, 264 | "cell_type": "markdown", 265 | "metadata": { 266 | "id": "ghW7Nj0OEQMc" 267 | }, 268 | "source": [ 269 | "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n", 270 | "\n", 271 | "https://albumentations.ai/docs/getting_started/mask_augmentation/" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "colab": { 279 | "base_uri": "https://localhost:8080/" 280 | }, 281 | "id": "WR_dzdpCXCHY", 282 | "outputId": "4e9b1681-2846-489f-edf6-a6240af65563" 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "import torchvision.transforms as T\n", 287 | "\n", 288 | "# T.RandomHorizontalFlip(p=0.5),\n", 289 | "# T.RandomVerticalFlip(p=0.1),\n", 290 | "\n", 291 | "# Image augmentations applied\n", 292 | "transforms = T.Compose([T.ToPILImage(),\n", 293 | " T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n", 294 | " T.ToTensor()])\n", 295 | "\n", 296 | "# create the train and test datasets\n", 297 | "trainDS = mapAIdataset(imagePaths=train_images,\n", 298 | " maskPaths=train_masks,\n", 299 | " transforms=transforms)\n", 300 | "\n", 301 | "valDS = mapAIdataset(imagePaths=val_images,\n", 302 | " maskPaths=val_masks,\n", 303 | " transforms=transforms)\n", 304 | "\n", 305 | "testDS = mapAIdataset(imagePaths=test_images,\n", 306 | " maskPaths=test_masks,\n", 307 | " transforms=transforms)\n", 308 | "\n", 309 | "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n", 310 | "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n", 311 | "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n", 312 | "\n", 313 | "# create the training and test data loaders\n", 314 | "trainLoader = DataLoader(trainDS,\n", 315 | " shuffle=True,\n", 316 | " batch_size=BATCH_SIZE,\n", 317 | " pin_memory=PIN_MEMORY,\n", 318 | " num_workers=os.cpu_count())\n", 319 | "\n", 320 | "valLoader = DataLoader(valDS,\n", 321 | " shuffle=False,\n", 322 | " batch_size=BATCH_SIZE,\n", 323 | " pin_memory=PIN_MEMORY,\n", 324 | " num_workers=os.cpu_count())\n", 325 | "\n", 326 | "testLoader = DataLoader(testDS,\n", 327 | " shuffle=False,\n", 328 | " batch_size=BATCH_SIZE,\n", 329 | " pin_memory=PIN_MEMORY,\n", 330 | " num_workers=os.cpu_count())" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "id": "tAO9M_R4XG6q" 337 | }, 338 | "source": [ 339 | "### Initialize UNET model for training\n", 340 | "\n", 341 | "Here we initialize the defined UNET model for training and calculate the steps per epoch for train/val/test set." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "colab": { 349 | "base_uri": "https://localhost:8080/" 350 | }, 351 | "id": "2IMsYzUaXJW7", 352 | "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315" 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "model = FTUNetFormer_model.FTUNetFormer().to(DEVICE)\n", 357 | "\n", 358 | "# loss / optimizer\n", 359 | "lossFunction = BCEWithLogitsLoss()\n", 360 | "opt = Adam(model.parameters(), lr=init_lr, weight_decay=0.001)\n", 361 | "\n", 362 | "# calculate steps per epoch for train/val/test\n", 363 | "trainSteps = len(trainDS) // BATCH_SIZE \n", 364 | "valSteps = len(valDS) // BATCH_SIZE\n", 365 | "testSteps = len(testDS) // BATCH_SIZE\n", 366 | "\n", 367 | "print(trainSteps, valSteps, testSteps)\n", 368 | "\n", 369 | "# initialize a dictionary to store training history\n", 370 | "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n", 371 | "H" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": { 378 | "id": "WEP-IVokbWQg" 379 | }, 380 | "outputs": [], 381 | "source": [ 382 | "torch.cuda.empty_cache() # PyTorch thing to empty cache" 383 | ] 384 | }, 385 | { 386 | "attachments": {}, 387 | "cell_type": "markdown", 388 | "metadata": { 389 | "id": "xcjuKhMeXLU-" 390 | }, 391 | "source": [ 392 | "### TRAINING THE MODEL\n", 393 | "\n", 394 | "Run this piece of code only if you want to train the model from scratch.\n", 395 | "\n", 396 | "Training locally: BATCH_SIZE = 2 takes 5035 MB of GPU memory.\n", 397 | "\n" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "colab": { 405 | "base_uri": "https://localhost:8080/" 406 | }, 407 | "id": "vWuUyLUgXPNf", 408 | "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1" 409 | }, 410 | "outputs": [], 411 | "source": [ 412 | "# loop over epochs\n", 413 | "print(\"[INFO] training UNET ...\")\n", 414 | "startTime = time.time()\n", 415 | "\n", 416 | "for epoch in tqdm(range(EPOCHS)):\n", 417 | " model.train()\n", 418 | "\n", 419 | " # initialize total training and validation loss\n", 420 | " totalTrainLoss = 0\n", 421 | " totalValLoss = 0\n", 422 | " totalTrainAcc = 0\n", 423 | " totalValAcc = 0\n", 424 | "\n", 425 | " # loop over the training set\n", 426 | " for (i, (x, y)) in enumerate(trainLoader):\n", 427 | " # send output to device\n", 428 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 429 | "\n", 430 | " # perform a forward pass and calculate the training loss\n", 431 | " pred = model(x)\n", 432 | " loss = lossFunction(pred, y)\n", 433 | "\n", 434 | " # calculate the accuracy\n", 435 | " acc = ((pred > 0.5) == y).float().mean()\n", 436 | "\n", 437 | " # kill previously accumulated gradients then\n", 438 | " # perform backpropagation and update model parameters\n", 439 | " opt.zero_grad()\n", 440 | " loss.backward()\n", 441 | " opt.step()\n", 442 | "\n", 443 | " # add the loss and accuracy to the total training loss and accuracy\n", 444 | " totalTrainLoss += loss\n", 445 | " totalTrainAcc += acc\n", 446 | "\n", 447 | " # switch of autograd\n", 448 | " with torch.no_grad():\n", 449 | " # set the model in evaluation mode\n", 450 | " model.eval()\n", 451 | "\n", 452 | " # loop over the validation set\n", 453 | " for (x, y) in valLoader:\n", 454 | " # send the input to the device\n", 455 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 456 | "\n", 457 | " # make the predictions and calculate the validation loss\n", 458 | " pred = model(x)\n", 459 | " loss = lossFunction(pred, y)\n", 460 | "\n", 461 | " # calculate the accuracy\n", 462 | " acc = ((pred > 0.5) == y).float().mean()\n", 463 | "\n", 464 | " # add the loss and accuracy to the total validation loss and accuracy\n", 465 | " totalValLoss += loss\n", 466 | " totalValAcc += acc\n", 467 | "\n", 468 | " # calculate the average training and validation loss and accuracy\n", 469 | " avgTrainLoss = totalTrainLoss / trainSteps\n", 470 | " avgValLoss = totalValLoss / valSteps\n", 471 | " avgTrainAcc = totalTrainAcc / trainSteps\n", 472 | " avgValAcc = totalValAcc / valSteps\n", 473 | " \n", 474 | " # update our training history\n", 475 | " H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n", 476 | " H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n", 477 | "\n", 478 | " # print the model training and validation information\n", 479 | " print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n", 480 | " print(\"Train loss: {:.6f}, Train acc: {:.6f}, Val loss: {:.4f}, Val acc: {:.4f}\".format(\n", 481 | " avgTrainLoss, avgTrainAcc, avgValLoss, avgValAcc))\n", 482 | " \n", 483 | "# display the total time needed to perform the training\n", 484 | "endTime = time.time()\n", 485 | "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))\n", 486 | " " 487 | ] 488 | }, 489 | { 490 | "attachments": {}, 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "Train loss: 0.001194, Val loss: 0.0013\n", 495 | "[INFO] total time taken to train the model: 27115.38s" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": { 502 | "colab": { 503 | "base_uri": "https://localhost:8080/" 504 | }, 505 | "id": "CsJoOVn11rs9", 506 | "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc" 507 | }, 508 | "outputs": [], 509 | "source": [ 510 | "H # show traning/val loss history" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": { 516 | "id": "U6ChLXHuXZHA" 517 | }, 518 | "source": [ 519 | "### Plot the training and validation loss" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": { 526 | "colab": { 527 | "base_uri": "https://localhost:8080/", 528 | "height": 316 529 | }, 530 | "id": "j04HfubrXYvX", 531 | "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50" 532 | }, 533 | "outputs": [], 534 | "source": [ 535 | "# plot the training loss\n", 536 | "print(MODEL_PATH)\n", 537 | "print(PLOT_PATH)\n", 538 | "\n", 539 | "\n", 540 | "plt.style.use(\"ggplot\")\n", 541 | "plt.figure()\n", 542 | "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n", 543 | "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n", 544 | "plt.title(\"Training Loss on Dataset\")\n", 545 | "plt.xlabel(\"Epoch #\")\n", 546 | "plt.ylabel(\"Loss\")\n", 547 | "plt.legend(loc=\"lower left\")\n", 548 | "plt.savefig(PLOT_PATH)\n", 549 | "# serialize the model to disk\n", 550 | "torch.save(model, MODEL_PATH) # saves the model" 551 | ] 552 | }, 553 | { 554 | "attachments": {}, 555 | "cell_type": "markdown", 556 | "metadata": { 557 | "id": "5Y6Fx2oaWr0q" 558 | }, 559 | "source": [ 560 | "### Prediction part\n", 561 | "\n", 562 | "Here the trained model is loaded and use for prediction on test images." 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": { 569 | "colab": { 570 | "base_uri": "https://localhost:8080/" 571 | }, 572 | "id": "qYh4flMu7O-m", 573 | "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd" 574 | }, 575 | "outputs": [], 576 | "source": [ 577 | "# Load saved model for prediction\n", 578 | "\n", 579 | "print(MODEL_PATH)\n", 580 | "\n", 581 | "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n", 582 | "print(\"model loaded for prediction\")\n", 583 | "\n", 584 | "model" 585 | ] 586 | }, 587 | { 588 | "attachments": {}, 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": [ 592 | "#### Provide test images for MapAI Dataset" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n", 602 | "PREDICTIONS_DIR" 603 | ] 604 | }, 605 | { 606 | "attachments": {}, 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "#### Make predictions on the entire MapAI dataset\n", 611 | "\n", 612 | "Make predictions on test images and save them to the folder named predictions." 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [ 621 | "import random\n", 622 | "import gc\n", 623 | "from pathlib import Path\n", 624 | "import numpy as np\n", 625 | "from PIL import Image\n", 626 | "\n", 627 | "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n", 628 | "\n", 629 | "# Output folder for the predictions\n", 630 | "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n", 631 | "\n", 632 | "# PLOT TEST IMAGES as RGB\n", 633 | "for n in range(len(test_images)):\n", 634 | " gc.collect()\n", 635 | " # Test image number\n", 636 | " testImgName = str(Path(test_images[n]).stem) + '.tif'\n", 637 | " #print('#', testImgName)\n", 638 | "\n", 639 | " # Make predicton on a test image specified with counter n\n", 640 | " test_img = test_images[n]\n", 641 | " test_img_input = np.expand_dims(test_img, 0)\n", 642 | " #print('#', test_img_input[0])\n", 643 | "\n", 644 | " # PyTorch --> works\n", 645 | " model.eval()\n", 646 | " with torch.no_grad():\n", 647 | " image = cv2.imread(test_img_input[0])\n", 648 | " image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n", 649 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 650 | " image = image.astype(\"float32\") / 255\n", 651 | " \n", 652 | " # print('SIZE: ', image.shape)\n", 653 | "\n", 654 | " # make the channel axis to be the leading one, add batch dimension\n", 655 | " image = np.transpose(image, (2, 0, 1))\n", 656 | " # create a PyTorch tensor\n", 657 | " image = np.expand_dims(image, 0)\n", 658 | " # flash the tensor to the device\n", 659 | " image = torch.from_numpy(image).to(DEVICE)\n", 660 | "\n", 661 | " # make the prediction\n", 662 | " predMask = model(image).squeeze()\n", 663 | " # pass result through sigmoid\n", 664 | " predMask = torch.sigmoid(predMask)\n", 665 | "\n", 666 | " # convert result to numpy array\n", 667 | " predMask = predMask.cpu().numpy()\n", 668 | "\n", 669 | " # filter out the weak predictions and convert them to integers\n", 670 | " predMask = (predMask > THRESHOLD) * 255\n", 671 | " predMask = predMask.astype(np.uint8)\n", 672 | "\n", 673 | " # generate image from array\n", 674 | " pIMG = Image.fromarray(predMask)\n", 675 | " pIMG.save(str(output_folder + testImgName))\n", 676 | "\n", 677 | " print('Prediction:', testImgName, 'saved to:', output_folder)" 678 | ] 679 | }, 680 | { 681 | "attachments": {}, 682 | "cell_type": "markdown", 683 | "metadata": {}, 684 | "source": [ 685 | "#### Make predictions on single images by choice\n", 686 | "\n", 687 | "Change the parameter n to choose which image to plot." 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": null, 693 | "metadata": { 694 | "colab": { 695 | "base_uri": "https://localhost:8080/" 696 | }, 697 | "id": "bq7BlbdrcgPB", 698 | "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1" 699 | }, 700 | "outputs": [], 701 | "source": [ 702 | "# ----------------------------------------------------------------------\n", 703 | "output_folder = PREDICTIONS_DIR + \"/\"\n", 704 | "predictions = glob.glob(output_folder + \"*.tif\")\n", 705 | "predictions.sort()\n", 706 | "print(\"# IMAGES for prediction: \", len(predictions))\n", 707 | "print(\"Choosen n can be from 0 to 1367! \")\n", 708 | "\n", 709 | "# ----------------------------------------------------------------------\n", 710 | "\n", 711 | "n = 900 # change this number depending on which image you want to test\n", 712 | "\n", 713 | "fig = plt.figure(figsize=(18,12))\n", 714 | "ax1 = fig.add_subplot(131)\n", 715 | "\n", 716 | "ax1.set_title('RGB image: ')\n", 717 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 718 | "ax1.imshow(image)\n", 719 | "ax1.set_axis_off()\n", 720 | "\n", 721 | "ax2 = fig.add_subplot(132)\n", 722 | "ax2.set_title('Ground truth: ')\n", 723 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 724 | "image *= 255\n", 725 | "ax2.imshow(image)\n", 726 | "ax2.set_axis_off()\n", 727 | "\n", 728 | "ax3 = fig.add_subplot(133)\n", 729 | "ax3.set_title('Prediction: ')\n", 730 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 731 | "ax3.imshow(image)\n", 732 | "ax3.set_axis_off()" 733 | ] 734 | }, 735 | { 736 | "attachments": {}, 737 | "cell_type": "markdown", 738 | "metadata": {}, 739 | "source": [ 740 | "### BUILDING FOOTPRINT REGULARIZATION\n", 741 | "\n", 742 | "Used repo: https://github.com/zorzi-s/projectRegularization\n", 743 | "\n", 744 | "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n", 745 | "\n", 746 | "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n", 747 | "\n", 748 | "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu\n", 749 | "\n", 750 | "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows." 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": null, 756 | "metadata": {}, 757 | "outputs": [], 758 | "source": [ 759 | "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n", 760 | "\n", 761 | "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n", 762 | "print(projectRegDir)\n", 763 | "\n", 764 | "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n", 765 | "print(ptw)\n", 766 | "\n", 767 | "# OUTPUT REGULARIZATIONS DIR\n", 768 | "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n", 769 | "print(REGULARIZATION_DIR)\n", 770 | "\n", 771 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 772 | "ENCODER = os.path.join(ptw, \"E140000_e1\")\n", 773 | "GENERATOR = os.path.join(ptw, \"E140000_net\")\n", 774 | "\n", 775 | "print(ENCODER)\n", 776 | "print(GENERATOR)" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": null, 782 | "metadata": {}, 783 | "outputs": [], 784 | "source": [ 785 | "# CREATE A NEW variables.py WITH USERS PATHS\n", 786 | "\n", 787 | "with open(projectRegDir + 'variables.py', 'w') as f:\n", 788 | " f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n", 789 | " f.write('# TRAINING \\n')\n", 790 | " f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 791 | " f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n", 792 | " f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 793 | " f.write('\\n')\n", 794 | " f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n", 795 | " f.write('\\n')\n", 796 | " f.write('# INFERENCE \\n')\n", 797 | " f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 798 | " f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 799 | " f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n", 800 | " f.write('\\n')\n", 801 | " f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n", 802 | " f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n", 803 | " f.close()\n", 804 | " \n", 805 | "print(\"variables.py created with users paths...\")\n" 806 | ] 807 | }, 808 | { 809 | "attachments": {}, 810 | "cell_type": "markdown", 811 | "metadata": {}, 812 | "source": [ 813 | "#### Run projectRegularization\n", 814 | "\n", 815 | "Takes around 6-8 minutes.\n", 816 | "\n", 817 | "You only need to change the command below and replace it with the absolute path for regularize.py" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": null, 823 | "metadata": {}, 824 | "outputs": [], 825 | "source": [ 826 | "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py" 827 | ] 828 | }, 829 | { 830 | "attachments": {}, 831 | "cell_type": "markdown", 832 | "metadata": {}, 833 | "source": [ 834 | "### Compare predictions and regularizations on a single image" 835 | ] 836 | }, 837 | { 838 | "cell_type": "code", 839 | "execution_count": null, 840 | "metadata": {}, 841 | "outputs": [], 842 | "source": [ 843 | "# Read Regularizations to plot and compare results\n", 844 | "\n", 845 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 846 | "regularizations.sort()\n", 847 | "\n", 848 | "print(\"# of predicted images: \", len(predictions))\n", 849 | "print(\"# of regularized images: \", len(regularizations))" 850 | ] 851 | }, 852 | { 853 | "attachments": {}, 854 | "cell_type": "markdown", 855 | "metadata": {}, 856 | "source": [ 857 | "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n", 858 | "\n", 859 | "Change parameter n accordingly." 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": null, 865 | "metadata": {}, 866 | "outputs": [], 867 | "source": [ 868 | "n = 600\n", 869 | "\n", 870 | "fig = plt.figure(figsize=(18,12))\n", 871 | "ax1 = fig.add_subplot(141)\n", 872 | "\n", 873 | "ax1.set_title('RGB: ')\n", 874 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 875 | "ax1.imshow(image)\n", 876 | "ax1.set_axis_off()\n", 877 | "\n", 878 | "ax2 = fig.add_subplot(142)\n", 879 | "ax2.set_title('Ground truth: ')\n", 880 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 881 | "image *= 255\n", 882 | "ax2.imshow(image)\n", 883 | "ax2.set_axis_off()\n", 884 | "\n", 885 | "ax3 = fig.add_subplot(143)\n", 886 | "ax3.set_title('Prediction: ')\n", 887 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 888 | "ax3.imshow(image)\n", 889 | "ax3.set_axis_off()\n", 890 | "\n", 891 | "ax4 = fig.add_subplot(144)\n", 892 | "ax4.set_title('Regularization: ')\n", 893 | "image = cv2.imread(regularizations[n])[:,:,::-1]\n", 894 | "ax4.imshow(image)\n", 895 | "ax4.set_axis_off()\n", 896 | "\n", 897 | "# DEFINE PATH FOR PLOTS TO BE SAVED\n", 898 | "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n", 899 | "print(figPath)\n", 900 | "\n", 901 | "# Save plot\n", 902 | "fig.savefig(figPath)" 903 | ] 904 | }, 905 | { 906 | "attachments": {}, 907 | "cell_type": "markdown", 908 | "metadata": {}, 909 | "source": [ 910 | "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n", 911 | "\n", 912 | "GDAL: https://gdal.org/'\n", 913 | "\n", 914 | "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n", 915 | "\n", 916 | "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n", 917 | "\n", 918 | "On Ubuntu you have to follow these steps:\n", 919 | "\n", 920 | "\n", 921 | "\n", 922 | "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n", 923 | "\n", 924 | "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n", 925 | "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n", 926 | "- python -m pip install --upgrade pip setuptools wheel\n", 927 | "- python -m pip install --upgrade gdal\n", 928 | "- conda install -c conda forge libgdal\n", 929 | "- conda install -c conda-forge libgdal\n", 930 | "- conda install -c conda-forge gdal\n", 931 | "- conda install tiledb=2.2\n", 932 | "- conda install poppler\n", 933 | "\n", 934 | "When you have this you can hopefully vectorize the detected masks quite easily." 935 | ] 936 | }, 937 | { 938 | "cell_type": "code", 939 | "execution_count": null, 940 | "metadata": {}, 941 | "outputs": [], 942 | "source": [ 943 | "def get_filename_from_path(path):\n", 944 | " \"\"\"\n", 945 | " Given a path, returns the filename after the last frontslash character.\n", 946 | " \"\"\"\n", 947 | " return path.rsplit('/', 1)[-1]\n", 948 | "\n", 949 | "def get_fname_no_extension(path):\n", 950 | " \"\"\"\n", 951 | " Given a path, returns the filename without its extension.\n", 952 | " \"\"\"\n", 953 | " filename, extension = os.path.splitext(path)\n", 954 | " return filename" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": null, 960 | "metadata": {}, 961 | "outputs": [], 962 | "source": [ 963 | "import osgeo\n", 964 | "from osgeo import gdal\n", 965 | "from osgeo import ogr\n", 966 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 967 | "\n", 968 | "# Choose which image to vectorize\n", 969 | "n = 0\n", 970 | "\n", 971 | "input = regularizations[n]\n", 972 | "print()\n", 973 | "print(\"INPUT: \", input)\n", 974 | "\n", 975 | "# print(get_fname_no_extension(input))\n", 976 | "\n", 977 | "# out\n", 978 | "output = get_filename_from_path(get_fname_no_extension(input)) + \".gpkg\"\n", 979 | "print(\"OUTPUT: \", output)\n", 980 | "\n", 981 | "# Open image with GDAl driver\n", 982 | "ds = gdal.Open(input)\n", 983 | "# Get the band\n", 984 | "band = ds.GetRasterBand(1)\n", 985 | "\n", 986 | "# Create the output shapefile\n", 987 | "driver = ogr.GetDriverByName(\"GPKG\")\n", 988 | "out_ds = driver.CreateDataSource(output)\n", 989 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 990 | "\n", 991 | "# Add a field to the layer to store the pixel values\n", 992 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 993 | "out_layer.CreateField(field_defn)\n", 994 | "\n", 995 | "# Polygonize the PNG file\n", 996 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 997 | "\n", 998 | "# Close the input and output files\n", 999 | "out_ds = None\n", 1000 | "ds = None" 1001 | ] 1002 | }, 1003 | { 1004 | "attachments": {}, 1005 | "cell_type": "markdown", 1006 | "metadata": {}, 1007 | "source": [ 1008 | "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below." 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "code", 1013 | "execution_count": null, 1014 | "metadata": {}, 1015 | "outputs": [], 1016 | "source": [ 1017 | "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n", 1018 | "\n", 1019 | "# RUN from the command line inside Ubuntu\n", 1020 | "# Change name of input and output according to user needs\n", 1021 | "\n", 1022 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 1023 | ] 1024 | } 1025 | ], 1026 | "metadata": { 1027 | "accelerator": "TPU", 1028 | "colab": { 1029 | "provenance": [] 1030 | }, 1031 | "gpuClass": "premium", 1032 | "kernelspec": { 1033 | "display_name": "torch", 1034 | "language": "python", 1035 | "name": "python3" 1036 | }, 1037 | "language_info": { 1038 | "codemirror_mode": { 1039 | "name": "ipython", 1040 | "version": 3 1041 | }, 1042 | "file_extension": ".py", 1043 | "mimetype": "text/x-python", 1044 | "name": "python", 1045 | "nbconvert_exporter": "python", 1046 | "pygments_lexer": "ipython3", 1047 | "version": "3.10.9" 1048 | }, 1049 | "vscode": { 1050 | "interpreter": { 1051 | "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c" 1052 | } 1053 | } 1054 | }, 1055 | "nbformat": 4, 1056 | "nbformat_minor": 0 1057 | } 1058 | -------------------------------------------------------------------------------- /05-mapai-dcswin-regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": { 7 | "id": "1KJnP7zSGQg5" 8 | }, 9 | "source": [ 10 | "## Binary semantic segmentation example using DCSwin" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "colab": { 18 | "base_uri": "https://localhost:8080/" 19 | }, 20 | "id": "cFxHJWmXlcZk", 21 | "outputId": "b755d1a8-3650-42d9-8718-401b4458f049" 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import os\n", 26 | "import glob\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "\n", 29 | "import torch\n", 30 | "import torchvision\n", 31 | "from tqdm import tqdm\n", 32 | "\n", 33 | "print(torch.__version__)\n", 34 | "print(torchvision.__version__)\n", 35 | "\n", 36 | "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 37 | "print(DEVICE)\n", 38 | "\n", 39 | "# determine if we will be pinning memory during data loading\n", 40 | "PIN_MEMORY = True if DEVICE == \"cuda\" else False" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "id": "KVfaGZrWG63Q" 47 | }, 48 | "source": [ 49 | "#### CONFIGURE YOUR PATHS AND HYPERPARAMETERS FOR TRAINING BELOW." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "colab": { 57 | "base_uri": "https://localhost:8080/", 58 | "height": 235 59 | }, 60 | "id": "OjlBC-raVM2K", 61 | "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "GD_PATH = os.getcwd() # get current working directory for the repo\n", 66 | "print(GD_PATH)\n", 67 | "\n", 68 | "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n", 69 | "DATASET_PATH = \"/home/shymon/datasets/\"\n", 70 | "\n", 71 | "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n", 72 | "\n", 73 | "print(DATASET_PATH)\n", 74 | "\n", 75 | "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n", 76 | "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n", 77 | "\n", 78 | "print(TRAIN_IMG_DIR)\n", 79 | "print(TRAIN_MASK_DIR)\n", 80 | "\n", 81 | "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n", 82 | "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n", 83 | "\n", 84 | "print(VAL_IMG_DIR)\n", 85 | "print(VAL_MASK_DIR)\n", 86 | "\n", 87 | "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n", 88 | "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n", 89 | "\n", 90 | "print(TEST_IMG_DIR)\n", 91 | "print(TEST_MASK_DIR)\n", 92 | "\n", 93 | "# CONFIGURE MapAI DATASET\n", 94 | "NUM_CHANNELS = 3\n", 95 | "NUM_LEVELS = 3\n", 96 | "NUM_CLASSES = 1\n", 97 | "\n", 98 | "# IMAGE SHAPE\n", 99 | "IMG_WIDTH = 512\n", 100 | "IMG_HEIGHT = 512\n", 101 | "\n", 102 | "#---------------------------------------------------------------------------------------------------#\n", 103 | "\n", 104 | "# CONFIGURE parameters for training\n", 105 | "\n", 106 | "EPOCHS = 25\n", 107 | "init_lr = 1e-4 # learning rate\n", 108 | "BATCH_SIZE = 2\n", 109 | "\n", 110 | "THRESHOLD = 0.5\n", 111 | "base_output = \"out\"\n", 112 | "\n", 113 | "model_name = \"dcswin-25-epochs.pth\" # provide name for model\n", 114 | "training_plot_name = \"dcswin-25-epochs.png\"\n", 115 | "\n", 116 | "#---------------------------------------------------------------------------------------------------#\n", 117 | "\n", 118 | "# OUTPUT PATHS\n", 119 | "\n", 120 | "# Trained model path\n", 121 | "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n", 122 | "print(MODEL_PATH)\n", 123 | "PLOT_PATH = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n", 124 | "print(PLOT_PATH)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "id": "IfSMUZbWWdJn" 131 | }, 132 | "source": [ 133 | "### Load and read the MapAI dataset" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "id": "TPiACQ_6VyQP" 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "import tifffile\n", 145 | "from torch.utils.data import Dataset\n", 146 | "import cv2\n", 147 | "\n", 148 | "\n", 149 | "class mapAIdataset(Dataset):\n", 150 | " def __init__(self, imagePaths, maskPaths, transforms):\n", 151 | " # store the image and mask filepaths, and augmentation\n", 152 | " # transforms\n", 153 | " self.imagePaths = imagePaths\n", 154 | " self.maskPaths = maskPaths\n", 155 | " self.transforms = transforms\n", 156 | " \n", 157 | " def __len__(self):\n", 158 | " # return the number of total samples contained in the dataset\n", 159 | " return len(self.imagePaths)\n", 160 | " \n", 161 | " def __getitem__(self, idx):\n", 162 | " # grab the image path from the current index\n", 163 | " imagePath = self.imagePaths[idx]\n", 164 | " # load the image from disk, swap its channels from BGR to RGB,\n", 165 | " # and read the associated mask from disk\n", 166 | " image = cv2.imread(imagePath)\n", 167 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 168 | " mask = tifffile.imread(self.maskPaths[idx])\n", 169 | " # convert the mask to a float32 tensor with values in the range [0, 1]\n", 170 | " mask = mask.astype('float32')\n", 171 | " # check to see if we are applying any transformations\n", 172 | " if self.transforms is not None:\n", 173 | " # apply the transformations to both image and its mask\n", 174 | " image = self.transforms(image)\n", 175 | " mask = self.transforms(mask)\n", 176 | " \n", 177 | " # return a tuple of the image and its mask\n", 178 | " return (image, mask)" 179 | ] 180 | }, 181 | { 182 | "attachments": {}, 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "id": "AKXL9bO8WnNg" 186 | }, 187 | "source": [ 188 | "### Build DCSWIN architecture\n", 189 | "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/FTUNetFormer.py code changed for binary semantic segmentation." 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "id": "9urE3W1iWp7v" 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "import sys\n", 201 | "subfolder = os.path.join(GD_PATH, \"models\")\n", 202 | "sys.path.insert(0, subfolder)\n", 203 | "\n", 204 | "import DCSwin_model" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "id": "22hbANvfWxmX" 211 | }, 212 | "source": [ 213 | "### Training the segmentation model\n", 214 | "Below we append the paths for TRAIN/VAL/TEST sets - images/masks." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "colab": { 222 | "base_uri": "https://localhost:8080/" 223 | }, 224 | "id": "G2Jha-LCW0ir", 225 | "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2" 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "from torch.nn import BCEWithLogitsLoss\n", 230 | "from torch.optim import Adam\n", 231 | "from torch.utils.data import DataLoader\n", 232 | "from imutils import paths\n", 233 | "import time\n", 234 | "\n", 235 | "# TRAINING\n", 236 | "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n", 237 | "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n", 238 | "\n", 239 | "# VALIDATION\n", 240 | "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n", 241 | "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n", 242 | "\n", 243 | "\n", 244 | "# TEST\n", 245 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 246 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": { 252 | "id": "gtqUNGR1XCa5" 253 | }, 254 | "source": [ 255 | "### Define transformations\n", 256 | "\n", 257 | "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques." 258 | ] 259 | }, 260 | { 261 | "attachments": {}, 262 | "cell_type": "markdown", 263 | "metadata": { 264 | "id": "ghW7Nj0OEQMc" 265 | }, 266 | "source": [ 267 | "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n", 268 | "\n", 269 | "https://albumentations.ai/docs/getting_started/mask_augmentation/" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "colab": { 277 | "base_uri": "https://localhost:8080/" 278 | }, 279 | "id": "WR_dzdpCXCHY", 280 | "outputId": "4e9b1681-2846-489f-edf6-a6240af65563" 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "import torchvision.transforms as T\n", 285 | "\n", 286 | "# T.RandomHorizontalFlip(p=0.5),\n", 287 | "# T.RandomVerticalFlip(p=0.1),\n", 288 | "\n", 289 | "# Image augmentations applied\n", 290 | "transforms = T.Compose([T.ToPILImage(),\n", 291 | " T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n", 292 | " T.ToTensor()])\n", 293 | "\n", 294 | "# create the train and test datasets\n", 295 | "trainDS = mapAIdataset(imagePaths=train_images,\n", 296 | " maskPaths=train_masks,\n", 297 | " transforms=transforms)\n", 298 | "\n", 299 | "valDS = mapAIdataset(imagePaths=val_images,\n", 300 | " maskPaths=val_masks,\n", 301 | " transforms=transforms)\n", 302 | "\n", 303 | "testDS = mapAIdataset(imagePaths=test_images,\n", 304 | " maskPaths=test_masks,\n", 305 | " transforms=transforms)\n", 306 | "\n", 307 | "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n", 308 | "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n", 309 | "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n", 310 | "\n", 311 | "# create the training and test data loaders\n", 312 | "trainLoader = DataLoader(trainDS,\n", 313 | " shuffle=True,\n", 314 | " batch_size=BATCH_SIZE,\n", 315 | " pin_memory=PIN_MEMORY,\n", 316 | " num_workers=os.cpu_count())\n", 317 | "\n", 318 | "valLoader = DataLoader(valDS,\n", 319 | " shuffle=False,\n", 320 | " batch_size=BATCH_SIZE,\n", 321 | " pin_memory=PIN_MEMORY,\n", 322 | " num_workers=os.cpu_count())\n", 323 | "\n", 324 | "testLoader = DataLoader(testDS,\n", 325 | " shuffle=False,\n", 326 | " batch_size=BATCH_SIZE,\n", 327 | " pin_memory=PIN_MEMORY,\n", 328 | " num_workers=os.cpu_count())" 329 | ] 330 | }, 331 | { 332 | "attachments": {}, 333 | "cell_type": "markdown", 334 | "metadata": { 335 | "id": "tAO9M_R4XG6q" 336 | }, 337 | "source": [ 338 | "### Initialize DCSWIN model for training\n" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": { 345 | "colab": { 346 | "base_uri": "https://localhost:8080/" 347 | }, 348 | "id": "2IMsYzUaXJW7", 349 | "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315" 350 | }, 351 | "outputs": [], 352 | "source": [ 353 | "model = DCSwin_model.DCSwin().to(DEVICE)\n", 354 | "\n", 355 | "# loss / optimizer\n", 356 | "lossFunction = BCEWithLogitsLoss()\n", 357 | "opt = Adam(model.parameters(), lr=init_lr)\n", 358 | "\n", 359 | "# calculate steps per epoch for train/val/test\n", 360 | "trainSteps = len(trainDS) // BATCH_SIZE \n", 361 | "valSteps = len(valDS) // BATCH_SIZE\n", 362 | "testSteps = len(testDS) // BATCH_SIZE\n", 363 | "\n", 364 | "print(trainSteps, valSteps, testSteps)\n", 365 | "\n", 366 | "# initialize a dictionary to store training history\n", 367 | "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n", 368 | "H" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": { 375 | "id": "WEP-IVokbWQg" 376 | }, 377 | "outputs": [], 378 | "source": [ 379 | "torch.cuda.empty_cache()" 380 | ] 381 | }, 382 | { 383 | "attachments": {}, 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "id": "xcjuKhMeXLU-" 387 | }, 388 | "source": [ 389 | "### TRAINING THE MODEL\n", 390 | "\n", 391 | "Run this piece of code only if you want to train the model from scratch.\n", 392 | "\n", 393 | "Training locally: BATCH_SIZE = 2 takes 5035 MB of GPU memory.\n", 394 | "\n" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "colab": { 402 | "base_uri": "https://localhost:8080/" 403 | }, 404 | "id": "vWuUyLUgXPNf", 405 | "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1" 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "# loop over epochs\n", 410 | "print(\"[INFO] training DCSwin ...\")\n", 411 | "startTime = time.time()\n", 412 | "\n", 413 | "for epoch in tqdm(range(EPOCHS)):\n", 414 | " model.train()\n", 415 | "\n", 416 | " # initialize total training and validation loss\n", 417 | " totalTrainLoss = 0\n", 418 | " totalValLoss = 0\n", 419 | "\n", 420 | " # loop over the training set\n", 421 | " for (i, (x, y)) in enumerate(trainLoader):\n", 422 | " # send output to device\n", 423 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 424 | "\n", 425 | " # perform a forward pass and calculate the training loss\n", 426 | " pred = model(x)\n", 427 | " loss = lossFunction(pred, y)\n", 428 | "\n", 429 | " # kill previously accumulated gradients then\n", 430 | " # perform backpropagation and update model parameters\n", 431 | " opt.zero_grad()\n", 432 | " loss.backward()\n", 433 | " opt.step()\n", 434 | "\n", 435 | " # add the loss to the total training loss\n", 436 | " totalTrainLoss += loss\n", 437 | "\n", 438 | " # switch of autograd\n", 439 | " with torch.no_grad():\n", 440 | " # set the model in evaluation mode\n", 441 | " model.eval()\n", 442 | "\n", 443 | " # loop over the validation set\n", 444 | " for (x, y) in valLoader:\n", 445 | " # send the input to the device\n", 446 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 447 | "\n", 448 | " # make the predictions and calculate the validation loss\n", 449 | " pred = model(x)\n", 450 | " totalValLoss += lossFunction(pred, y)\n", 451 | "\n", 452 | " # calculate the average training and validation loss\n", 453 | " avgTrainLoss = totalTrainLoss / trainSteps\n", 454 | " avgValLoss = totalValLoss / valSteps\n", 455 | " \n", 456 | " # update our training history\n", 457 | " H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n", 458 | " H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n", 459 | "\n", 460 | " # print the model training and validation information\n", 461 | " print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n", 462 | " print(\"Train loss: {:.6f}, Val loss: {:.4f}\".format(avgTrainLoss, avgValLoss))\n", 463 | " \n", 464 | "# display the total time needed to perform the training\n", 465 | "endTime = time.time()\n", 466 | "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))" 467 | ] 468 | }, 469 | { 470 | "attachments": {}, 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "Train loss: 0.001194, Val loss: 0.0013\n", 475 | "[INFO] total time taken to train the model: 27115.38s" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": { 482 | "colab": { 483 | "base_uri": "https://localhost:8080/" 484 | }, 485 | "id": "CsJoOVn11rs9", 486 | "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc" 487 | }, 488 | "outputs": [], 489 | "source": [ 490 | "H # show traning/val loss history" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": { 496 | "id": "U6ChLXHuXZHA" 497 | }, 498 | "source": [ 499 | "### Plot the training and validation loss" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": { 506 | "colab": { 507 | "base_uri": "https://localhost:8080/", 508 | "height": 316 509 | }, 510 | "id": "j04HfubrXYvX", 511 | "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50" 512 | }, 513 | "outputs": [], 514 | "source": [ 515 | "# plot the training loss\n", 516 | "print(MODEL_PATH)\n", 517 | "print(PLOT_PATH)\n", 518 | "\n", 519 | "plt.style.use(\"ggplot\")\n", 520 | "plt.figure()\n", 521 | "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n", 522 | "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n", 523 | "plt.title(\"Training Loss on Dataset\")\n", 524 | "plt.xlabel(\"Epoch #\")\n", 525 | "plt.ylabel(\"Loss\")\n", 526 | "plt.legend(loc=\"lower left\")\n", 527 | "plt.savefig(PLOT_PATH)\n", 528 | "# serialize the model to disk\n", 529 | "torch.save(model, MODEL_PATH) # saves the model" 530 | ] 531 | }, 532 | { 533 | "attachments": {}, 534 | "cell_type": "markdown", 535 | "metadata": { 536 | "id": "5Y6Fx2oaWr0q" 537 | }, 538 | "source": [ 539 | "### Prediction part\n", 540 | "\n", 541 | "Here the trained model is loaded and use for prediction on test images." 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": null, 547 | "metadata": { 548 | "colab": { 549 | "base_uri": "https://localhost:8080/" 550 | }, 551 | "id": "qYh4flMu7O-m", 552 | "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd" 553 | }, 554 | "outputs": [], 555 | "source": [ 556 | "# Load saved model for prediction\n", 557 | "\n", 558 | "print(MODEL_PATH)\n", 559 | "\n", 560 | "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n", 561 | "print(\"model loaded for prediction\")\n", 562 | "\n", 563 | "model" 564 | ] 565 | }, 566 | { 567 | "attachments": {}, 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "#### Provide test images for MapAI Dataset" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n", 581 | "PREDICTIONS_DIR" 582 | ] 583 | }, 584 | { 585 | "attachments": {}, 586 | "cell_type": "markdown", 587 | "metadata": {}, 588 | "source": [ 589 | "#### Make predictions on the entire MapAI dataset\n", 590 | "\n", 591 | "Make predictions on test images and save them to the folder named predictions." 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "import random\n", 601 | "import gc\n", 602 | "from pathlib import Path\n", 603 | "import numpy as np\n", 604 | "from PIL import Image\n", 605 | "\n", 606 | "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n", 607 | "\n", 608 | "# Output folder for the predictions\n", 609 | "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n", 610 | "\n", 611 | "# PLOT TEST IMAGES as RGB\n", 612 | "for n in range(len(test_images)):\n", 613 | " gc.collect()\n", 614 | " # Test image number\n", 615 | " testImgName = str(Path(test_images[n]).stem) + '.tif'\n", 616 | " #print('#', testImgName)\n", 617 | "\n", 618 | " # Make predicton on a test image specified with counter n\n", 619 | " test_img = test_images[n]\n", 620 | " test_img_input = np.expand_dims(test_img, 0)\n", 621 | " #print('#', test_img_input[0])\n", 622 | "\n", 623 | " # PyTorch --> works\n", 624 | " model.eval()\n", 625 | " with torch.no_grad():\n", 626 | " image = cv2.imread(test_img_input[0])\n", 627 | " image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n", 628 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 629 | " image = image.astype(\"float32\") / 255\n", 630 | " \n", 631 | " # print('SIZE: ', image.shape)\n", 632 | "\n", 633 | " # make the channel axis to be the leading one, add batch dimension\n", 634 | " image = np.transpose(image, (2, 0, 1))\n", 635 | " # create a PyTorch tensor\n", 636 | " image = np.expand_dims(image, 0)\n", 637 | " # flash the tensor to the device\n", 638 | " image = torch.from_numpy(image).to(DEVICE)\n", 639 | "\n", 640 | " # make the prediction\n", 641 | " predMask = model(image).squeeze()\n", 642 | " # pass result through sigmoid\n", 643 | " predMask = torch.sigmoid(predMask)\n", 644 | "\n", 645 | " # convert result to numpy array\n", 646 | " predMask = predMask.cpu().numpy()\n", 647 | "\n", 648 | " # filter out the weak predictions and convert them to integers\n", 649 | " predMask = (predMask > THRESHOLD) * 255\n", 650 | " predMask = predMask.astype(np.uint8)\n", 651 | "\n", 652 | " # generate image from array\n", 653 | " pIMG = Image.fromarray(predMask)\n", 654 | " pIMG.save(str(output_folder + testImgName))\n", 655 | "\n", 656 | " print('Prediction:', testImgName, 'saved to:', output_folder)" 657 | ] 658 | }, 659 | { 660 | "attachments": {}, 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "#### Make predictions on single images by choice\n", 665 | "\n", 666 | "Change the parameter n to choose which image to plot." 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": { 673 | "colab": { 674 | "base_uri": "https://localhost:8080/" 675 | }, 676 | "id": "bq7BlbdrcgPB", 677 | "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1" 678 | }, 679 | "outputs": [], 680 | "source": [ 681 | "# ----------------------------------------------------------------------\n", 682 | "\n", 683 | "output_folder = PREDICTIONS_DIR + \"/\" + \"*.tif\"\n", 684 | "\n", 685 | "predictions = glob.glob(output_folder)\n", 686 | "predictions.sort()\n", 687 | "print(\"# IMAGES for prediction: \", len(predictions))\n", 688 | "print(\"Choosen n can be from 0 o 1367! \")\n", 689 | "\n", 690 | "\n", 691 | "# ----------------------------------------------------------------------\n", 692 | "\n", 693 | "n = 900 # change this number depending on which image you want to test\n", 694 | "\n", 695 | "fig = plt.figure(figsize=(18,12))\n", 696 | "ax1 = fig.add_subplot(131)\n", 697 | "\n", 698 | "ax1.set_title('RGB image: ')\n", 699 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 700 | "ax1.imshow(image)\n", 701 | "ax1.set_axis_off()\n", 702 | "\n", 703 | "ax2 = fig.add_subplot(132)\n", 704 | "ax2.set_title('Ground truth: ')\n", 705 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 706 | "image *= 255\n", 707 | "ax2.imshow(image)\n", 708 | "ax2.set_axis_off()\n", 709 | "\n", 710 | "ax3 = fig.add_subplot(133)\n", 711 | "ax3.set_title('Prediction: ')\n", 712 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 713 | "ax3.imshow(image)\n", 714 | "ax3.set_axis_off()" 715 | ] 716 | }, 717 | { 718 | "attachments": {}, 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "### BUILDING FOOTPRINT REGULARIZATION\n", 723 | "\n", 724 | "Used repo: https://github.com/zorzi-s/projectRegularization\n", 725 | "\n", 726 | "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n", 727 | "\n", 728 | "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n", 729 | "\n", 730 | "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu\n", 731 | "\n", 732 | "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows." 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [ 741 | "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n", 742 | "\n", 743 | "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n", 744 | "print(projectRegDir)\n", 745 | "\n", 746 | "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n", 747 | "print(ptw)\n", 748 | "\n", 749 | "# OUTPUT REGULARIZATIONS DIR\n", 750 | "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n", 751 | "print(REGULARIZATION_DIR)\n", 752 | "\n", 753 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 754 | "ENCODER = os.path.join(ptw, \"E140000_e1\")\n", 755 | "GENERATOR = os.path.join(ptw, \"E140000_net\")\n", 756 | "\n", 757 | "print(ENCODER)\n", 758 | "print(GENERATOR)" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": null, 764 | "metadata": {}, 765 | "outputs": [], 766 | "source": [ 767 | "# CREATE A NEW variables.py WITH USERS PATHS\n", 768 | "\n", 769 | "with open(projectRegDir + 'variables.py', 'w') as f:\n", 770 | " f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n", 771 | " f.write('# TRAINING \\n')\n", 772 | " f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 773 | " f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n", 774 | " f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 775 | " f.write('\\n')\n", 776 | " f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n", 777 | " f.write('\\n')\n", 778 | " f.write('# INFERENCE \\n')\n", 779 | " f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 780 | " f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 781 | " f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n", 782 | " f.write('\\n')\n", 783 | " f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n", 784 | " f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n", 785 | " f.close()\n", 786 | " \n", 787 | "print(\"variables.py created with users paths...\")\n" 788 | ] 789 | }, 790 | { 791 | "attachments": {}, 792 | "cell_type": "markdown", 793 | "metadata": {}, 794 | "source": [ 795 | "#### Run projectRegularization\n", 796 | "\n", 797 | "Takes around 6-8 minutes.\n", 798 | "\n", 799 | "You only need to change the command below and replace it with the absolute path for regularize.py" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": null, 805 | "metadata": {}, 806 | "outputs": [], 807 | "source": [ 808 | "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py" 809 | ] 810 | }, 811 | { 812 | "attachments": {}, 813 | "cell_type": "markdown", 814 | "metadata": {}, 815 | "source": [ 816 | "### Compare predictions and regularizations on a single image" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": null, 822 | "metadata": {}, 823 | "outputs": [], 824 | "source": [ 825 | "# Read Regularizations to plot and compare results\n", 826 | "\n", 827 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 828 | "regularizations.sort()\n", 829 | "\n", 830 | "print(\"# of predicted images: \", len(predictions))\n", 831 | "print(\"# of regularized images: \", len(regularizations))" 832 | ] 833 | }, 834 | { 835 | "attachments": {}, 836 | "cell_type": "markdown", 837 | "metadata": {}, 838 | "source": [ 839 | "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n", 840 | "\n", 841 | "Change parameter n accordingly." 842 | ] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "metadata": {}, 848 | "outputs": [], 849 | "source": [ 850 | "n = 600\n", 851 | "\n", 852 | "fig = plt.figure(figsize=(18,12))\n", 853 | "ax1 = fig.add_subplot(141)\n", 854 | "\n", 855 | "ax1.set_title('RGB: ')\n", 856 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 857 | "ax1.imshow(image)\n", 858 | "ax1.set_axis_off()\n", 859 | "\n", 860 | "ax2 = fig.add_subplot(142)\n", 861 | "ax2.set_title('Ground truth: ')\n", 862 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 863 | "image *= 255\n", 864 | "ax2.imshow(image)\n", 865 | "ax2.set_axis_off()\n", 866 | "\n", 867 | "ax3 = fig.add_subplot(143)\n", 868 | "ax3.set_title('Prediction: ')\n", 869 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 870 | "ax3.imshow(image)\n", 871 | "ax3.set_axis_off()\n", 872 | "\n", 873 | "ax4 = fig.add_subplot(144)\n", 874 | "ax4.set_title('Regularization: ')\n", 875 | "image = cv2.imread(regularizations[n])[:,:,::-1]\n", 876 | "ax4.imshow(image)\n", 877 | "ax4.set_axis_off()\n", 878 | "\n", 879 | "# DEFINE PATH FOR PLOTS TO BE SAVED\n", 880 | "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n", 881 | "print(figPath)\n", 882 | "\n", 883 | "# Save plot\n", 884 | "fig.savefig(figPath)" 885 | ] 886 | }, 887 | { 888 | "attachments": {}, 889 | "cell_type": "markdown", 890 | "metadata": {}, 891 | "source": [ 892 | "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n", 893 | "\n", 894 | "GDAL: https://gdal.org/'\n", 895 | "\n", 896 | "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n", 897 | "\n", 898 | "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n", 899 | "\n", 900 | "On Ubuntu you have to follow these steps:\n", 901 | "\n", 902 | "\n", 903 | "\n", 904 | "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n", 905 | "\n", 906 | "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n", 907 | "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n", 908 | "- python -m pip install --upgrade pip setuptools wheel\n", 909 | "- python -m pip install --upgrade gdal\n", 910 | "- conda install -c conda forge libgdal\n", 911 | "- conda install -c conda-forge libgdal\n", 912 | "- conda install -c conda-forge gdal\n", 913 | "- conda install tiledb=2.2\n", 914 | "- conda install poppler\n", 915 | "\n", 916 | "When you have this you can hopefully vectorize the detected masks quite easily." 917 | ] 918 | }, 919 | { 920 | "cell_type": "code", 921 | "execution_count": null, 922 | "metadata": {}, 923 | "outputs": [], 924 | "source": [ 925 | "def get_fname_from_path(path):\n", 926 | " \"\"\"\n", 927 | " Given a path, returns the filename after the last frontslash character.\n", 928 | " \"\"\"\n", 929 | " return path.rsplit('/', 1)[-1]\n", 930 | "\n", 931 | "def get_fname_no_extension(path):\n", 932 | " \"\"\"\n", 933 | " Given a path, returns the filename without its extension.\n", 934 | " \"\"\"\n", 935 | " filename, extension = os.path.splitext(path)\n", 936 | " return filename" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": null, 942 | "metadata": {}, 943 | "outputs": [], 944 | "source": [ 945 | "import osgeo\n", 946 | "from osgeo import gdal\n", 947 | "from osgeo import ogr\n", 948 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 949 | "\n", 950 | "# Choose which image to vectorize\n", 951 | "n = 0\n", 952 | "\n", 953 | "input = regularizations[n]\n", 954 | "print()\n", 955 | "print(\"INPUT: \", input)\n", 956 | "\n", 957 | "# print(get_fname_no_extension(input))\n", 958 | "\n", 959 | "# out\n", 960 | "output = get_fname_from_path(get_fname_no_extension(input)) + \".gpkg\"\n", 961 | "print(\"OUTPUT: \", output)\n", 962 | "\n", 963 | "# Open image with GDAl driver\n", 964 | "ds = gdal.Open(input)\n", 965 | "# Get the band\n", 966 | "band = ds.GetRasterBand(1)\n", 967 | "\n", 968 | "# Create the output shapefile\n", 969 | "driver = ogr.GetDriverByName(\"GPKG\")\n", 970 | "out_ds = driver.CreateDataSource(output)\n", 971 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 972 | "\n", 973 | "# Add a field to the layer to store the pixel values\n", 974 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 975 | "out_layer.CreateField(field_defn)\n", 976 | "\n", 977 | "# Polygonize the PNG file\n", 978 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 979 | "\n", 980 | "# Close the input and output files\n", 981 | "out_ds = None\n", 982 | "ds = None" 983 | ] 984 | }, 985 | { 986 | "attachments": {}, 987 | "cell_type": "markdown", 988 | "metadata": {}, 989 | "source": [ 990 | "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below." 991 | ] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": null, 996 | "metadata": {}, 997 | "outputs": [], 998 | "source": [ 999 | "# ogr2ogr -where ID=\"1\" outfile.gpkg infile.\n", 1000 | "\n", 1001 | "# RUN from the command line inside Ubuntu\n", 1002 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 1003 | ] 1004 | } 1005 | ], 1006 | "metadata": { 1007 | "accelerator": "TPU", 1008 | "colab": { 1009 | "provenance": [] 1010 | }, 1011 | "gpuClass": "premium", 1012 | "kernelspec": { 1013 | "display_name": "torch", 1014 | "language": "python", 1015 | "name": "python3" 1016 | }, 1017 | "language_info": { 1018 | "codemirror_mode": { 1019 | "name": "ipython", 1020 | "version": 3 1021 | }, 1022 | "file_extension": ".py", 1023 | "mimetype": "text/x-python", 1024 | "name": "python", 1025 | "nbconvert_exporter": "python", 1026 | "pygments_lexer": "ipython3", 1027 | "version": "3.10.9" 1028 | }, 1029 | "vscode": { 1030 | "interpreter": { 1031 | "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c" 1032 | } 1033 | } 1034 | }, 1035 | "nbformat": 4, 1036 | "nbformat_minor": 0 1037 | } 1038 | -------------------------------------------------------------------------------- /06-evaluate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### Notebook to evaluate results" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import os\n", 18 | "import cv2\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from imutils import paths\n", 21 | "import time\n", 22 | "import glob\n", 23 | "\n", 24 | "import tifffile as tiff\n", 25 | "import numpy as np\n", 26 | "from PIL import Image\n", 27 | "\n", 28 | "import numpy as np\n", 29 | "from sklearn.metrics import jaccard_score" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "# TEST IMAGES: 1368\n", 42 | "# PREDICTIONS: 1368\n", 43 | "# REGULARIZATIONS: 1368\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "# CONFIGURE PATHS\n", 49 | "GD_PATH = os.getcwd() + \"/\"\n", 50 | "PLOT_PATH = GD_PATH + \"plots/\"\n", 51 | "\n", 52 | "# TEST\n", 53 | "# Task 1: \n", 54 | "TEST_IMG_DIR = \"/home/shymon/datasets/mapai_full/task1_test/images/\"\n", 55 | "TEST_MASK_DIR = \"/home/shymon/datasets/mapai_full/task1_test/masks/\"\n", 56 | "\n", 57 | "# TEST\n", 58 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 59 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))\n", 60 | "\n", 61 | "PREDICTIONS_DIR = GD_PATH + \"predictions/\"\n", 62 | "REGULARIZATION_DIR = GD_PATH + \"regularizations/\"\n", 63 | "\n", 64 | "# read predictions\n", 65 | "predictions = glob.glob(PREDICTIONS_DIR + \"*.tif\")\n", 66 | "predictions.sort()\n", 67 | "\n", 68 | "# read regularizations\n", 69 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 70 | "regularizations.sort()\n", 71 | "\n", 72 | "print(\"# TEST IMAGES: \", len(test_images))\n", 73 | "print(\"# PREDICTIONS: \", len(predictions))\n", 74 | "print(\"# REGULARIZATIONS: \", len(predictions))\n", 75 | "\n", 76 | "# Project Regularization directory\n", 77 | "projectRegDir = GD_PATH + \"projectRegularization\" + \"/\"\n", 78 | "\n", 79 | "ptw = projectRegDir + \"pretrained_weights\" + \"/\"\n", 80 | "\n", 81 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 82 | "ENCODER = ptw + \"E140000_e1\"\n", 83 | "GENERATOR = ptw + \"E140000_net\"\n", 84 | "\n", 85 | "# print(ENCODER)\n", 86 | "# print(GENERATOR)" 87 | ] 88 | }, 89 | { 90 | "attachments": {}, 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Calculate Intersection over Union on the test set" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def iou(gt_mask, pred_mask):\n", 104 | "\n", 105 | " \"\"\"\n", 106 | " Calculates the intersection over union (BIoU) between two binary semantic segmentation masks.\n", 107 | " \n", 108 | " Arguments:\n", 109 | " mask1 -- a 2D numpy array representing the first mask\n", 110 | " mask2 -- a 2D numpy array representing the second mask\n", 111 | " \n", 112 | " Returns:\n", 113 | " iou -- a float representing the BIoU between the two masks\n", 114 | " \"\"\"\n", 115 | "\n", 116 | " intersection = np.logical_and(gt_mask, pred_mask).sum()\n", 117 | " union = np.logical_or(gt_mask, pred_mask).sum()\n", 118 | " iou_score = intersection / union if union != 0 else np.nan\n", 119 | "\n", 120 | " return iou_score\n", 121 | "\n", 122 | "def biou(segA, segB, boundary_width=1):\n", 123 | " \"\"\"\n", 124 | " Calculate the Boundary Intersection over Union (BIoU) metric between two binary segmentation masks.\n", 125 | "\n", 126 | " Parameters:\n", 127 | " segA (numpy array): A 2-dimensional binary numpy array representing the first segmentation mask.\n", 128 | " segB (numpy array): A 2-dimensional binary numpy array representing the second segmentation mask.\n", 129 | " boundary_width (int): The width of the boundary region to be included in the calculation (default is 1).\n", 130 | "\n", 131 | " Returns:\n", 132 | " float: The BIoU metric between the two segmentation masks.\n", 133 | " \"\"\"\n", 134 | "\n", 135 | " # Compute the boundaries of the segmentation masks\n", 136 | " boundaryA = np.zeros_like(segA)\n", 137 | " boundaryA[:,boundary_width:-boundary_width] = segA[:,boundary_width:-boundary_width] ^ segA[:, :-2*boundary_width] ^ segA[:, 2*boundary_width:]\n", 138 | " boundaryA[boundary_width:-boundary_width,:] = boundaryA[boundary_width:-boundary_width,:] ^ segA[:-2*boundary_width,:] ^ segA[2*boundary_width:,:]\n", 139 | "\n", 140 | " boundaryB = np.zeros_like(segB)\n", 141 | " boundaryB[:,boundary_width:-boundary_width] = segB[:,boundary_width:-boundary_width] ^ segB[:, :-2*boundary_width] ^ segB[:, 2*boundary_width:]\n", 142 | " boundaryB[boundary_width:-boundary_width,:] = boundaryB[boundary_width:-boundary_width,:] ^ segB[:-2*boundary_width,:] ^ segB[2*boundary_width:,:]\n", 143 | "\n", 144 | " # Compute the coordinates of the intersection boundary\n", 145 | " intersection_boundary = boundaryA & boundaryB\n", 146 | "\n", 147 | " # Compute the coordinates of the union boundary\n", 148 | " union_boundary = boundaryA | boundaryB\n", 149 | "\n", 150 | " # Compute the area of intersection boundary\n", 151 | " intersection_boundary_area = np.count_nonzero(intersection_boundary)\n", 152 | "\n", 153 | " # Compute the area of union boundary\n", 154 | " union_boundary_area = np.count_nonzero(union_boundary)\n", 155 | "\n", 156 | " # Compute the intersection and union of the interior regions\n", 157 | " intersection = np.logical_and(segA, segB)\n", 158 | " union = np.logical_or(segA, segB)\n", 159 | "\n", 160 | " # Compute the area of intersection and union of the interior regions\n", 161 | " intersection_area = np.count_nonzero(intersection)\n", 162 | " union_area = np.count_nonzero(union)\n", 163 | "\n", 164 | " # Compute the BIoU metric\n", 165 | " biou = (intersection_area + intersection_boundary_area) / (union_area + union_boundary_area + 1e-6)\n", 166 | "\n", 167 | " return biou\n", 168 | "\n", 169 | "# To read the original test images from MapAI\n", 170 | "def test2arr(tif_img):\n", 171 | " img = tiff.imread(tif_img)\n", 172 | " arr = np.array(img)\n", 173 | " return arr\n", 174 | "\n", 175 | "# To read the predictions and regularizations\n", 176 | "def pr2arr(tif_img):\n", 177 | " img = tiff.imread(tif_img)\n", 178 | " img = img / 255\n", 179 | " img = cv2.resize(img, (500, 500))\n", 180 | " arr = np.array(img)\n", 181 | " arr = arr.astype(np.uint8)\n", 182 | " return arr\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### Evaluation on single image by choice" 190 | ] 191 | }, 192 | { 193 | "attachments": {}, 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "(1) Without regularization" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 4, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "Jaccard score or IoU with Scikit-learn: 0.7001\n", 210 | "Jaccard score or IoU with manual function: 0.7001\n", 211 | "Boundary Intersection over Union: 0.6959\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "n = 900\n", 217 | "\n", 218 | "jaccard_sklearn = jaccard_score(test2arr(test_masks[n]), pr2arr(predictions[n]), average='micro')\n", 219 | "print(\"Jaccard score or IoU with Scikit-learn: \", round(jaccard_sklearn, 4))\n", 220 | "\n", 221 | "iou_man = iou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 222 | "print(\"Jaccard score or IoU with manual function: \", round(iou_man, 4))\n", 223 | "\n", 224 | "biou_man = biou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 225 | "print(\"Boundary Intersection over Union: \", round(biou_man, 4))" 226 | ] 227 | }, 228 | { 229 | "attachments": {}, 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "(2) With regularization" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 5, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "Jaccard score or IoU with Scikit-learn: 0.6841\n", 246 | "Jaccard score or IoU with manual function: 0.6841\n", 247 | "Boundary Intersection over Union: 0.6801\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "n = 900\n", 253 | "\n", 254 | "jaccard_sklearn = jaccard_score(test2arr(test_masks[n]), pr2arr(regularizations[n]), average='micro')\n", 255 | "print(\"Jaccard score or IoU with Scikit-learn: \", round(jaccard_sklearn, 4))\n", 256 | "\n", 257 | "iou_man = iou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 258 | "print(\"Jaccard score or IoU with manual function: \", round(iou_man, 4))\n", 259 | "\n", 260 | "biou_man = biou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 261 | "print(\"Boundary Intersection over Union: \", round(biou_man, 4))" 262 | ] 263 | }, 264 | { 265 | "attachments": {}, 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "### Evaluation on entire MapAI dataset" 270 | ] 271 | }, 272 | { 273 | "attachments": {}, 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "(1) Without regularization" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 6, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "Evaluation without regularization: \n", 290 | "Mean IoU for Task 1: 0.3995\n", 291 | "Mean BIoU for Task 1: 0.3766\n", 292 | "S metric for Task 1: 0.3881\n" 293 | ] 294 | } 295 | ], 296 | "source": [ 297 | "iou_mapai = np.array([])\n", 298 | "biou_mapai = np.array([])\n", 299 | "\n", 300 | "for n in range(len(test_masks)):\n", 301 | " \n", 302 | " # Calculate metrics\n", 303 | "\n", 304 | " # IoU\n", 305 | " iou_single = iou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 306 | "\n", 307 | " # BIoU\n", 308 | " biou_single = biou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 309 | "\n", 310 | " # Append to whole array\n", 311 | " iou_mapai = np.append(iou_mapai, iou_single)\n", 312 | " biou_mapai = np.append(biou_mapai, biou_single)\n", 313 | "\n", 314 | "#iou_mapai = iou_mapai[iou_mapai != 0]\n", 315 | "#biou_mapai = biou_mapai[biou_mapai != 0]\n", 316 | "\n", 317 | "print(\"Evaluation without regularization: \")\n", 318 | "print(\"Mean IoU for Task 1: \", round(np.nanmean(iou_mapai), 4))\n", 319 | "print(\"Mean BIoU for Task 1: \", round(np.nanmean(biou_mapai), 4))\n", 320 | "print(\"S metric for Task 1: \", round(((np.nanmean(biou_mapai) + np.nanmean(iou_mapai)) / 2 ), 4))" 321 | ] 322 | }, 323 | { 324 | "attachments": {}, 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "(2) With regularization" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 7, 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "name": "stdout", 338 | "output_type": "stream", 339 | "text": [ 340 | "Mean IoU for Task 1: 0.4017\n", 341 | "Mean BIoU for Task 1: 0.378\n", 342 | "S metric for Task 1: 0.3898\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "iou_mapai = np.array([])\n", 348 | "biou_mapai = np.array([])\n", 349 | "\n", 350 | "for n in range(len(test_masks)):\n", 351 | " \n", 352 | " # Calculate metrics\n", 353 | "\n", 354 | " # IoU\n", 355 | " iou_single = iou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 356 | "\n", 357 | " # BIoU\n", 358 | " biou_single = biou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 359 | "\n", 360 | " # Append to whole array\n", 361 | " iou_mapai = np.append(iou_mapai, iou_single)\n", 362 | " biou_mapai = np.append(biou_mapai, biou_single)\n", 363 | "\n", 364 | "#iou_mapai = iou_mapai[iou_mapai != 0]\n", 365 | "#biou_mapai = biou_mapai[biou_mapai != 0]\n", 366 | "\n", 367 | "print(\"Mean IoU for Task 1: \", round(np.nanmean(iou_mapai), 4))\n", 368 | "print(\"Mean BIoU for Task 1: \", round(np.nanmean(biou_mapai), 4))\n", 369 | "print(\"S metric for Task 1: \", round(((np.nanmean(biou_mapai) + np.nanmean(iou_mapai)) / 2 ), 4))" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [] 378 | } 379 | ], 380 | "metadata": { 381 | "kernelspec": { 382 | "display_name": "torch", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.10.9" 397 | }, 398 | "orig_nbformat": 4, 399 | "vscode": { 400 | "interpreter": { 401 | "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c" 402 | } 403 | } 404 | }, 405 | "nbformat": 4, 406 | "nbformat_minor": 2 407 | } 408 | -------------------------------------------------------------------------------- /07-vectorize-building-footprint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### Vectorize the predicted building footprints with GDAL" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import osgeo\n", 18 | "from osgeo import gdal\n", 19 | "from osgeo import ogr\n", 20 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 21 | "\n", 22 | "# PROVIDE SEGMENTATION PATH\n", 23 | "input = \"INSERT PATH HERE\"\n", 24 | "\n", 25 | "# PROVIDE OUTPUT VECTOR FILE\n", 26 | "output = \"INSERT PATH HERE\"\n", 27 | "\n", 28 | "# Open image with GDAl driver\n", 29 | "ds = gdal.Open(input)\n", 30 | "\n", 31 | "# Get GeoTransform\n", 32 | "ds.SetGeoTransform([0,1,0,0,0,-1])\n", 33 | "\n", 34 | "# Get the band\n", 35 | "band = ds.GetRasterBand(1)\n", 36 | "# Create the output shapefile\n", 37 | "driver = ogr.GetDriverByName(\"GeoPackage\")\n", 38 | "out_ds = driver.CreateDataSource(output)\n", 39 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 40 | "\n", 41 | "# Add a field to the layer to store the pixel values\n", 42 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 43 | "out_layer.CreateField(field_defn)\n", 44 | "\n", 45 | "# Polygonize the PNG file\n", 46 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 47 | "\n", 48 | "# Close the input and output files\n", 49 | "out_ds = None\n", 50 | "ds = None" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n", 60 | "\n", 61 | "# RUN from the command line inside Ubuntu\n", 62 | "# Change name of input and output according to user needs\n", 63 | "\n", 64 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 65 | ] 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python 3", 71 | "language": "python", 72 | "name": "python3" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython3", 84 | "version": "3.10.6" 85 | }, 86 | "orig_nbformat": 4, 87 | "vscode": { 88 | "interpreter": { 89 | "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" 90 | } 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 2 95 | } 96 | -------------------------------------------------------------------------------- /DATASET/info.txt: -------------------------------------------------------------------------------- 1 | STORE THE MAPAI DATASET IN THIS FOLDER. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## mapAI-regularization 2 | 3 | The repository stores the code for our presented work at Foss4G 2023 with the title: **AN END-TO-END DEEP LEARNING WORKFLOW FOR BUILDING SEGMENTATION, 4 | BOUNDARY REGULARIZATION AND VECTORIZATION OF BUILDING FOOTPRINTS.** 5 | 6 | ## Introduction 7 | 8 | The purpose of our research is to develop and end-to-end workflow for accurate segmentation of building footprints including three major steps: 9 | - (1) binary semantic segmentation with a CNN, 10 | - (2) applying building boundary regularization and 11 | - (3) vectorization. 12 | 13 | The dataset used for building segmentation is the NORA MapAI: Precision in Building Segmentation dataset. We have developed an implementation for building footprint segmentation. Our approach extends the segmentation by implementing projectRegularization from (Zorzi and Fraundorfer, 2019, Zorzi et al., 2021) on a semantic segmentation task. The link to the official repository can be accessed here: https://github.com/zorzi-s/projectRegularization. Note that this is already included in our repository. 14 | 15 | ## MapAI dataset 16 | 17 | The original MapAI: Precision in Building Segmentation dataset can be downloaded manually from Huggingface: https://huggingface.co/datasets/sjyhne/mapai_training_data 18 | 19 | or by running our first notebook. 20 | 21 | ## Installation 22 | 23 | ``` 24 | git clone https://github.com/s1m0nS/mapAI-regularization.git 25 | cd mapAI-regularization 26 | conda create --name mapai python=3.10 27 | conda activate mapai 28 | pip install -r requirements.txt 29 | ``` 30 | Installing GDAL inside a conda environment can be tricky. Follow the steps below according to your OS. 31 | 32 | **Linux:** 33 | 34 | ``` 35 | sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove 36 | sudo apt-get install -y cdo nco gdal-bin libgdal-dev- 37 | python -m pip install --upgrade pip setuptools wheel 38 | python -m pip install --upgrade gdal 39 | conda install -c conda forge libgdal 40 | conda install -c conda-forge libgdal 41 | conda install -c conda-forge gdal 42 | conda install tiledb=2.2 43 | conda install poppler 44 | ``` 45 | 46 | **Windows:** 47 | 48 | Get the appropriate .whl file for your Python version from: https://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal 49 | For Python 3.10 use either: 50 | - GDAL‑3.4.3‑cp310‑cp310‑win_amd64.whl or 51 | - GDAL‑3.4.3‑cp310‑cp310‑win32.whl. 52 | 53 | then install the appropriate one as: 54 | ``` 55 | conda activate mapai 56 | python -m pip install C:\Users\...\GDAL‑3.4.3‑cp310‑cp310‑win_amd64.whl 57 | ``` 58 | 59 | Run our Jupyter Notebooks and enjoy the process. If you encounter errors post an issue. 60 | 61 | Feedback and new ideas are welcome. 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /models/UNetFormer_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from einops import rearrange, repeat 5 | 6 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_ 7 | import timm 8 | 9 | 10 | class ConvBNReLU(nn.Sequential): 11 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False): 12 | super(ConvBNReLU, self).__init__( 13 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, 14 | dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2), 15 | norm_layer(out_channels), 16 | nn.ReLU6() 17 | ) 18 | 19 | 20 | class ConvBN(nn.Sequential): 21 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False): 22 | super(ConvBN, self).__init__( 23 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, 24 | dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2), 25 | norm_layer(out_channels) 26 | ) 27 | 28 | 29 | class Conv(nn.Sequential): 30 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False): 31 | super(Conv, self).__init__( 32 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, 33 | dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2) 34 | ) 35 | 36 | 37 | class SeparableConvBNReLU(nn.Sequential): 38 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, 39 | norm_layer=nn.BatchNorm2d): 40 | super(SeparableConvBNReLU, self).__init__( 41 | nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation, 42 | padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, 43 | groups=in_channels, bias=False), 44 | norm_layer(out_channels), 45 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), 46 | nn.ReLU6() 47 | ) 48 | 49 | 50 | class SeparableConvBN(nn.Sequential): 51 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, 52 | norm_layer=nn.BatchNorm2d): 53 | super(SeparableConvBN, self).__init__( 54 | nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation, 55 | padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, 56 | groups=in_channels, bias=False), 57 | norm_layer(out_channels), 58 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 59 | ) 60 | 61 | 62 | class SeparableConv(nn.Sequential): 63 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1): 64 | super(SeparableConv, self).__init__( 65 | nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation, 66 | padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, 67 | groups=in_channels, bias=False), 68 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 69 | ) 70 | 71 | 72 | class Mlp(nn.Module): 73 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.): 74 | super().__init__() 75 | out_features = out_features or in_features 76 | hidden_features = hidden_features or in_features 77 | self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True) 78 | self.act = act_layer() 79 | self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True) 80 | self.drop = nn.Dropout(drop, inplace=True) 81 | 82 | def forward(self, x): 83 | x = self.fc1(x) 84 | x = self.act(x) 85 | x = self.drop(x) 86 | x = self.fc2(x) 87 | x = self.drop(x) 88 | return x 89 | 90 | 91 | class GlobalLocalAttention(nn.Module): 92 | def __init__(self, 93 | dim=256, 94 | num_heads=16, 95 | qkv_bias=False, 96 | window_size=8, 97 | relative_pos_embedding=True 98 | ): 99 | super().__init__() 100 | self.num_heads = num_heads 101 | head_dim = dim // self.num_heads 102 | self.scale = head_dim ** -0.5 103 | self.ws = window_size 104 | 105 | self.qkv = Conv(dim, 3*dim, kernel_size=1, bias=qkv_bias) 106 | self.local1 = ConvBN(dim, dim, kernel_size=3) 107 | self.local2 = ConvBN(dim, dim, kernel_size=1) 108 | self.proj = SeparableConvBN(dim, dim, kernel_size=window_size) 109 | 110 | self.attn_x = nn.AvgPool2d(kernel_size=(window_size, 1), stride=1, padding=(window_size//2 - 1, 0)) 111 | self.attn_y = nn.AvgPool2d(kernel_size=(1, window_size), stride=1, padding=(0, window_size//2 - 1)) 112 | 113 | self.relative_pos_embedding = relative_pos_embedding 114 | 115 | if self.relative_pos_embedding: 116 | # define a parameter table of relative position bias 117 | self.relative_position_bias_table = nn.Parameter( 118 | torch.zeros((2 * window_size - 1) * (2 * window_size - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH 119 | 120 | # get pair-wise relative position index for each token inside the window 121 | coords_h = torch.arange(self.ws) 122 | coords_w = torch.arange(self.ws) 123 | coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww 124 | coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww 125 | relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww 126 | relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 127 | relative_coords[:, :, 0] += self.ws - 1 # shift to start from 0 128 | relative_coords[:, :, 1] += self.ws - 1 129 | relative_coords[:, :, 0] *= 2 * self.ws - 1 130 | relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww 131 | self.register_buffer("relative_position_index", relative_position_index) 132 | 133 | trunc_normal_(self.relative_position_bias_table, std=.02) 134 | 135 | def pad(self, x, ps): 136 | _, _, H, W = x.size() 137 | if W % ps != 0: 138 | x = F.pad(x, (0, ps - W % ps), mode='reflect') 139 | if H % ps != 0: 140 | x = F.pad(x, (0, 0, 0, ps - H % ps), mode='reflect') 141 | return x 142 | 143 | def pad_out(self, x): 144 | x = F.pad(x, pad=(0, 1, 0, 1), mode='reflect') 145 | return x 146 | 147 | def forward(self, x): 148 | B, C, H, W = x.shape 149 | 150 | local = self.local2(x) + self.local1(x) 151 | 152 | x = self.pad(x, self.ws) 153 | B, C, Hp, Wp = x.shape 154 | qkv = self.qkv(x) 155 | 156 | q, k, v = rearrange(qkv, 'b (qkv h d) (hh ws1) (ww ws2) -> qkv (b hh ww) h (ws1 ws2) d', h=self.num_heads, 157 | d=C//self.num_heads, hh=Hp//self.ws, ww=Wp//self.ws, qkv=3, ws1=self.ws, ws2=self.ws) 158 | 159 | dots = (q @ k.transpose(-2, -1)) * self.scale 160 | 161 | if self.relative_pos_embedding: 162 | relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( 163 | self.ws * self.ws, self.ws * self.ws, -1) # Wh*Ww,Wh*Ww,nH 164 | relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww 165 | dots += relative_position_bias.unsqueeze(0) 166 | 167 | attn = dots.softmax(dim=-1) 168 | attn = attn @ v 169 | 170 | attn = rearrange(attn, '(b hh ww) h (ws1 ws2) d -> b (h d) (hh ws1) (ww ws2)', h=self.num_heads, 171 | d=C//self.num_heads, hh=Hp//self.ws, ww=Wp//self.ws, ws1=self.ws, ws2=self.ws) 172 | 173 | attn = attn[:, :, :H, :W] 174 | 175 | out = self.attn_x(F.pad(attn, pad=(0, 0, 0, 1), mode='reflect')) + \ 176 | self.attn_y(F.pad(attn, pad=(0, 1, 0, 0), mode='reflect')) 177 | 178 | out = out + local 179 | out = self.pad_out(out) 180 | out = self.proj(out) 181 | # print(out.size()) 182 | out = out[:, :, :H, :W] 183 | 184 | return out 185 | 186 | 187 | class Block(nn.Module): 188 | def __init__(self, dim=256, num_heads=16, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., 189 | drop_path=0., act_layer=nn.ReLU6, norm_layer=nn.BatchNorm2d, window_size=8): 190 | super().__init__() 191 | self.norm1 = norm_layer(dim) 192 | self.attn = GlobalLocalAttention(dim, num_heads=num_heads, qkv_bias=qkv_bias, window_size=window_size) 193 | 194 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 195 | mlp_hidden_dim = int(dim * mlp_ratio) 196 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, out_features=dim, act_layer=act_layer, drop=drop) 197 | self.norm2 = norm_layer(dim) 198 | 199 | def forward(self, x): 200 | 201 | x = x + self.drop_path(self.attn(self.norm1(x))) 202 | x = x + self.drop_path(self.mlp(self.norm2(x))) 203 | 204 | return x 205 | 206 | 207 | class WF(nn.Module): 208 | def __init__(self, in_channels=128, decode_channels=128, eps=1e-8): 209 | super(WF, self).__init__() 210 | self.pre_conv = Conv(in_channels, decode_channels, kernel_size=1) 211 | 212 | self.weights = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True) 213 | self.eps = eps 214 | self.post_conv = ConvBNReLU(decode_channels, decode_channels, kernel_size=3) 215 | 216 | def forward(self, x, res): 217 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 218 | weights = nn.ReLU()(self.weights) 219 | fuse_weights = weights / (torch.sum(weights, dim=0) + self.eps) 220 | x = fuse_weights[0] * self.pre_conv(res) + fuse_weights[1] * x 221 | x = self.post_conv(x) 222 | return x 223 | 224 | 225 | class FeatureRefinementHead(nn.Module): 226 | def __init__(self, in_channels=64, decode_channels=64): 227 | super().__init__() 228 | self.pre_conv = Conv(in_channels, decode_channels, kernel_size=1) 229 | 230 | self.weights = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True) 231 | self.eps = 1e-8 232 | self.post_conv = ConvBNReLU(decode_channels, decode_channels, kernel_size=3) 233 | 234 | self.pa = nn.Sequential(nn.Conv2d(decode_channels, decode_channels, kernel_size=3, padding=1, groups=decode_channels), 235 | nn.Sigmoid()) 236 | self.ca = nn.Sequential(nn.AdaptiveAvgPool2d(1), 237 | Conv(decode_channels, decode_channels//16, kernel_size=1), 238 | nn.ReLU6(), 239 | Conv(decode_channels//16, decode_channels, kernel_size=1), 240 | nn.Sigmoid()) 241 | 242 | self.shortcut = ConvBN(decode_channels, decode_channels, kernel_size=1) 243 | self.proj = SeparableConvBN(decode_channels, decode_channels, kernel_size=3) 244 | self.act = nn.ReLU6() 245 | 246 | def forward(self, x, res): 247 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 248 | weights = nn.ReLU()(self.weights) 249 | fuse_weights = weights / (torch.sum(weights, dim=0) + self.eps) 250 | x = fuse_weights[0] * self.pre_conv(res) + fuse_weights[1] * x 251 | x = self.post_conv(x) 252 | shortcut = self.shortcut(x) 253 | pa = self.pa(x) * x 254 | ca = self.ca(x) * x 255 | x = pa + ca 256 | x = self.proj(x) + shortcut 257 | x = self.act(x) 258 | 259 | return x 260 | 261 | 262 | class AuxHead(nn.Module): 263 | 264 | def __init__(self, in_channels=64, num_classes=8): 265 | super().__init__() 266 | self.conv = ConvBNReLU(in_channels, in_channels) 267 | self.drop = nn.Dropout(0.1) 268 | self.conv_out = Conv(in_channels, num_classes, kernel_size=1) 269 | 270 | def forward(self, x, h, w): 271 | feat = self.conv(x) 272 | feat = self.drop(feat) 273 | feat = self.conv_out(feat) 274 | feat = F.interpolate(feat, size=(h, w), mode='bilinear', align_corners=False) 275 | return feat 276 | 277 | 278 | class Decoder(nn.Module): 279 | def __init__(self, 280 | encoder_channels=(64, 128, 256, 512), 281 | decode_channels=64, 282 | dropout=0.1, 283 | window_size=8, 284 | num_classes=6): 285 | super(Decoder, self).__init__() 286 | 287 | self.pre_conv = ConvBN(encoder_channels[-1], decode_channels, kernel_size=1) 288 | self.b4 = Block(dim=decode_channels, num_heads=8, window_size=window_size) 289 | 290 | self.b3 = Block(dim=decode_channels, num_heads=8, window_size=window_size) 291 | self.p3 = WF(encoder_channels[-2], decode_channels) 292 | 293 | self.b2 = Block(dim=decode_channels, num_heads=8, window_size=window_size) 294 | self.p2 = WF(encoder_channels[-3], decode_channels) 295 | 296 | if self.training: 297 | self.up4 = nn.UpsamplingBilinear2d(scale_factor=4) 298 | self.up3 = nn.UpsamplingBilinear2d(scale_factor=2) 299 | self.aux_head = AuxHead(decode_channels, num_classes) 300 | 301 | self.p1 = FeatureRefinementHead(encoder_channels[-4], decode_channels) 302 | 303 | self.segmentation_head = nn.Sequential(ConvBNReLU(decode_channels, decode_channels), 304 | nn.Dropout2d(p=dropout, inplace=True), 305 | Conv(decode_channels, num_classes, kernel_size=1)) 306 | self.init_weight() 307 | 308 | def forward(self, res1, res2, res3, res4, h, w): 309 | if self.training: 310 | x = self.b4(self.pre_conv(res4)) 311 | h4 = self.up4(x) 312 | 313 | x = self.p3(x, res3) 314 | x = self.b3(x) 315 | h3 = self.up3(x) 316 | 317 | x = self.p2(x, res2) 318 | x = self.b2(x) 319 | h2 = x 320 | x = self.p1(x, res1) 321 | x = self.segmentation_head(x) 322 | x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=False) 323 | 324 | ah = h4 + h3 + h2 325 | ah = self.aux_head(ah, h, w) 326 | 327 | return x, ah 328 | else: 329 | x = self.b4(self.pre_conv(res4)) 330 | x = self.p3(x, res3) 331 | x = self.b3(x) 332 | 333 | x = self.p2(x, res2) 334 | x = self.b2(x) 335 | 336 | x = self.p1(x, res1) 337 | 338 | x = self.segmentation_head(x) 339 | x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=False) 340 | 341 | return x 342 | 343 | def init_weight(self): 344 | for m in self.children(): 345 | if isinstance(m, nn.Conv2d): 346 | nn.init.kaiming_normal_(m.weight, a=1) 347 | if m.bias is not None: 348 | nn.init.constant_(m.bias, 0) 349 | 350 | 351 | class UNetFormer(nn.Module): 352 | def __init__(self, 353 | decode_channels=64, 354 | dropout=0.5, 355 | backbone_name='swsl_resnet18', 356 | pretrained=True, 357 | window_size=8, 358 | num_classes=1 359 | ): 360 | super().__init__() 361 | 362 | self.backbone = timm.create_model(backbone_name, features_only=True, output_stride=32, 363 | out_indices=(1, 2, 3, 4), pretrained=pretrained) 364 | encoder_channels = self.backbone.feature_info.channels() 365 | 366 | self.decoder = Decoder(encoder_channels, decode_channels, dropout, window_size, num_classes) 367 | 368 | def forward(self, x): 369 | h, w = x.size()[-2:] 370 | res1, res2, res3, res4 = self.backbone(x) 371 | if self.training: 372 | x, ah = self.decoder(res1, res2, res3, res4, h, w) 373 | return x 374 | else: 375 | x = self.decoder(res1, res2, res3, res4, h, w) 376 | return x -------------------------------------------------------------------------------- /models/__pycache__/DCSwin_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/DCSwin_model.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/FTUNetFormer_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/FTUNetFormer_model.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/UNetFormer_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/UNetFormer_model.cpython-310.pyc -------------------------------------------------------------------------------- /plots/compare-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1.png -------------------------------------------------------------------------------- /plots/compare-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-10.png -------------------------------------------------------------------------------- /plots/compare-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-100.png -------------------------------------------------------------------------------- /plots/compare-1000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1000.png -------------------------------------------------------------------------------- /plots/compare-1100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1100.png -------------------------------------------------------------------------------- /plots/compare-1200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1200.png -------------------------------------------------------------------------------- /plots/compare-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-2.png -------------------------------------------------------------------------------- /plots/compare-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-22.png -------------------------------------------------------------------------------- /plots/compare-250.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-250.png -------------------------------------------------------------------------------- /plots/compare-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-3.png -------------------------------------------------------------------------------- /plots/compare-33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-33.png -------------------------------------------------------------------------------- /plots/compare-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-4.png -------------------------------------------------------------------------------- /plots/compare-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-5.png -------------------------------------------------------------------------------- /plots/compare-500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-500.png -------------------------------------------------------------------------------- /plots/compare-600.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-600.png -------------------------------------------------------------------------------- /plots/compare-750.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-750.png -------------------------------------------------------------------------------- /plots/compare-800.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-800.png -------------------------------------------------------------------------------- /plots/compare-900.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-900.png -------------------------------------------------------------------------------- /plots/dcswin-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/dcswin-25-epochs.png -------------------------------------------------------------------------------- /plots/ft-unet-former-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/ft-unet-former-25-epochs.png -------------------------------------------------------------------------------- /plots/unet-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/unet-25-epochs.png -------------------------------------------------------------------------------- /plots/unet-former-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/unet-former-25-epochs.png -------------------------------------------------------------------------------- /predictions/bergen_-5943_1104.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5943_1104.tif -------------------------------------------------------------------------------- /predictions/bergen_-5944_1104.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5944_1104.tif -------------------------------------------------------------------------------- /predictions/bergen_-5948_1107.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5948_1107.tif -------------------------------------------------------------------------------- /predictions/kristiansand_-4712_-1562.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1562.tif -------------------------------------------------------------------------------- /predictions/kristiansand_-4712_-1563.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1563.tif -------------------------------------------------------------------------------- /predictions/kristiansand_-4712_-1568.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1568.tif -------------------------------------------------------------------------------- /predictions/oslo_-3133_244.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/oslo_-3133_244.tif -------------------------------------------------------------------------------- /predictions/tromso_923_11083.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11083.tif -------------------------------------------------------------------------------- /predictions/tromso_923_11084.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11084.tif -------------------------------------------------------------------------------- /predictions/tromso_923_11086.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11086.tif -------------------------------------------------------------------------------- /projectRegularization/INFO.txt: -------------------------------------------------------------------------------- 1 | projectRegularization was downloaded from: 2 | https://github.com/zorzi-s/projectRegularization 3 | 4 | Download and the pretrained_weights from: 5 | https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu 6 | 7 | Save them to folder: pretrained_weights 8 | -------------------------------------------------------------------------------- /projectRegularization/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | SOFTWARE LICENSE AGREEMENT 3 | 4 | ICG Software – 2021, all rights reserved, hereinafter "the Software". 5 | 6 | This software has been developed by researchers of ICG (Institute of Computer Graphics and Vision). 7 | 8 | Institute of Computer Graphics and Vision (ICG), Inffeldgasse 16/II, 9 | 8010 Graz, Austria 10 | 11 | ICG holds all the ownership rights on the Software. 12 | 13 | The Software is still being currently developed. It is the ICG’s aim for the Software 14 | to be used by the scientific community so as to test it and, evaluate it so that ICG may improve it. 15 | 16 | For these reasons ICG has decided to distribute the Software. 17 | 18 | ICG grants to the academic user, a free of charge, without right to sub license non-exclusive right 19 | to use the Software for research purposes for a period of one (1) year from the date of the download 20 | of the source code. Any other use without of prior consent of ICG is prohibited. 21 | 22 | The academic user explicitly acknowledges having received from ICG all information allowing him 23 | to appreciate the adequacy between of the Software and his needs and to undertake all necessary 24 | precautions for his execution and use. 25 | 26 | The Software is provided only as a source. 27 | 28 | In case of using the Software for a publication or other results obtained through the use of the Software, 29 | user should cite the Software as follows : 30 | 31 | @inproceedings{zorzi2021machine, 32 | title={Machine-learned regularization and polygonization of building segmentation masks}, 33 | author={Zorzi, Stefano and Bittner, Ksenia and Fraundorfer, Friedrich}, 34 | booktitle={2020 25th International Conference on Pattern Recognition (ICPR)}, 35 | pages={3098--3105}, 36 | year={2021}, 37 | organization={IEEE} 38 | } 39 | 40 | Every user of the Software could communicate to the developers [stefano.zorzi@icg.tugraz.at] 41 | his or her remarks as to the use of the Software. 42 | 43 | THE USER CANNOT USE, EXPLOIT OR COMMERCIALLY DISTRIBUTE THE SOFTWARE WITHOUT PRIOR AND EXPLICIT CONSENT 44 | OF ICG (fraundorfer@icg.tugraz.at). ANY SUCH ACTION WILL CONSTITUTE A FORGERY. 45 | 46 | THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, 47 | WITH REGARDS TO COMMERCIAL USE, PROFESSIONAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALIZATION OR ADAPTATION. 48 | 49 | UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL ICG OR THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 51 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 52 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, 53 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 54 | -------------------------------------------------------------------------------- /projectRegularization/README.md: -------------------------------------------------------------------------------- 1 | # Regularization of Building Boundaries in Satellite and Aerial Images 2 | This repository contains the implementation for our publication "Machine-learned regularization and polygonization of building segmentation masks", ICPR 2021. 3 | If you use this implementation please cite the following publication: 4 | 5 | ~~~ 6 | @inproceedings{zorzi2021machine, 7 | title={Machine-learned regularization and polygonization of building segmentation masks}, 8 | author={Zorzi, Stefano and Bittner, Ksenia and Fraundorfer, Friedrich}, 9 | booktitle={2020 25th International Conference on Pattern Recognition (ICPR)}, 10 | pages={3098--3105}, 11 | year={2021}, 12 | organization={IEEE} 13 | } 14 | ~~~ 15 | and 16 | ~~~ 17 | @inproceedings{zorzi2019regularization, 18 | title={Regularization of building boundaries in satellite images using adversarial and regularized losses}, 19 | author={Zorzi, Stefano and Fraundorfer, Friedrich}, 20 | booktitle={IGARSS 2019-2019 IEEE International Geoscience and Remote Sensing Symposium}, 21 | pages={5140--5143}, 22 | year={2019}, 23 | organization={IEEE} 24 | } 25 | ~~~ 26 | 27 |