├── 01-download-and-prepare-mapAI.ipynb ├── 02-mapai-unet-regularization.ipynb ├── 03-mapai-unet-former-regularization.ipynb ├── 04-mapai-ft-unet-former-regularization.ipynb ├── 05-mapai-dcswin-regularization.ipynb ├── 06-evaluate.ipynb ├── 07-vectorize-building-footprint.ipynb ├── DATASET └── info.txt ├── README.md ├── models ├── DCSwin_model.py ├── FTUNetFormer_model.py ├── UNetFormer_model.py └── __pycache__ │ ├── DCSwin_model.cpython-310.pyc │ ├── FTUNetFormer_model.cpython-310.pyc │ └── UNetFormer_model.cpython-310.pyc ├── plots ├── compare-1.png ├── compare-10.png ├── compare-100.png ├── compare-1000.png ├── compare-1100.png ├── compare-1200.png ├── compare-2.png ├── compare-22.png ├── compare-250.png ├── compare-3.png ├── compare-33.png ├── compare-4.png ├── compare-5.png ├── compare-500.png ├── compare-600.png ├── compare-750.png ├── compare-800.png ├── compare-900.png ├── dcswin-25-epochs.png ├── ft-unet-former-25-epochs.png ├── unet-25-epochs.png └── unet-former-25-epochs.png ├── predictions ├── bergen_-5943_1104.tif ├── bergen_-5944_1104.tif ├── bergen_-5948_1107.tif ├── kristiansand_-4712_-1562.tif ├── kristiansand_-4712_-1563.tif ├── kristiansand_-4712_-1568.tif ├── oslo_-3133_244.tif ├── tromso_923_11083.tif ├── tromso_923_11084.tif └── tromso_923_11086.tif ├── projectRegularization ├── INFO.txt ├── LICENSE ├── README.md ├── README.png ├── __pycache__ │ ├── crf_loss.cpython-310.pyc │ ├── data_loader_gan.cpython-310.pyc │ ├── models.cpython-310.pyc │ ├── training_utils.cpython-310.pyc │ └── variables.cpython-310.pyc ├── crf_loss.py ├── data_loader_gan.py ├── gdal ├── models.py ├── regularize.py ├── train_gan_net.py ├── training_utils.py └── variables.py ├── regularizations ├── bergen_-5943_1104.tif ├── bergen_-5944_1104.tif ├── bergen_-5948_1107.tif ├── kristiansand_-4712_-1562.tif ├── kristiansand_-4712_-1563.tif ├── kristiansand_-4712_-1568.tif ├── oslo_-3133_244.tif ├── tromso_923_11083.tif ├── tromso_923_11084.tif └── tromso_923_11086.tif ├── requirements.txt └── trained_models └── Link_to_download_trained_models.txt /03-mapai-unet-former-regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": { 7 | "id": "1KJnP7zSGQg5" 8 | }, 9 | "source": [ 10 | "## Binary semantic segmentation example using U-Net-Former\n", 11 | "Preparation of dataset and model training code from here:\n", 12 | "\n", 13 | "https://pyimagesearch.com/2021/11/08/u-net-training-image-segmentation-models-in-pytorch/" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "colab": { 21 | "base_uri": "https://localhost:8080/" 22 | }, 23 | "id": "cFxHJWmXlcZk", 24 | "outputId": "b755d1a8-3650-42d9-8718-401b4458f049" 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import os\n", 29 | "import glob\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "\n", 32 | "import torch\n", 33 | "import torchvision\n", 34 | "from tqdm import tqdm\n", 35 | "\n", 36 | "print(torch.__version__)\n", 37 | "print(torchvision.__version__)\n", 38 | "\n", 39 | "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 40 | "print(DEVICE)\n", 41 | "\n", 42 | "# determine if we will be pinning memory during data loading\n", 43 | "PIN_MEMORY = True if DEVICE == \"cuda\" else False" 44 | ] 45 | }, 46 | { 47 | "attachments": {}, 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "id": "KVfaGZrWG63Q" 51 | }, 52 | "source": [ 53 | "#### CONFIGURE PATHS AND HYPERPARAMETERS FOR TRAINING BELOW." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "colab": { 61 | "base_uri": "https://localhost:8080/", 62 | "height": 235 63 | }, 64 | "id": "OjlBC-raVM2K", 65 | "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745" 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "GD_PATH = os.getcwd() # get current working directory for the repo\n", 70 | "print(GD_PATH)\n", 71 | "\n", 72 | "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n", 73 | "DATASET_PATH = \"/home/shymon/datasets/\"\n", 74 | "\n", 75 | "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n", 76 | "\n", 77 | "print(DATASET_PATH)\n", 78 | "\n", 79 | "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n", 80 | "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n", 81 | "\n", 82 | "print(TRAIN_IMG_DIR)\n", 83 | "print(TRAIN_MASK_DIR)\n", 84 | "\n", 85 | "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n", 86 | "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n", 87 | "\n", 88 | "print(VAL_IMG_DIR)\n", 89 | "print(VAL_MASK_DIR)\n", 90 | "\n", 91 | "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n", 92 | "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n", 93 | "\n", 94 | "print(TEST_IMG_DIR)\n", 95 | "print(TEST_MASK_DIR)\n", 96 | "\n", 97 | "# CONFIGURE MapAI DATASET\n", 98 | "NUM_CHANNELS = 3\n", 99 | "NUM_LEVELS = 3\n", 100 | "NUM_CLASSES = 1\n", 101 | "\n", 102 | "# IMAGE SHAPE\n", 103 | "IMG_WIDTH = 512\n", 104 | "IMG_HEIGHT = 512\n", 105 | "\n", 106 | "#---------------------------------------------------------------------------------------------------#\n", 107 | "\n", 108 | "# CONFIGURE parameters for training\n", 109 | "EPOCHS = 25\n", 110 | "init_lr = 1e-4 # learning rate\n", 111 | "BATCH_SIZE = 2\n", 112 | "\n", 113 | "THRESHOLD = 0.5\n", 114 | "base_output = \"out\"\n", 115 | "\n", 116 | "model_name = \"unet-former-25-epochs.pth\" # provide name for model\n", 117 | "training_plot_name = \"unet-former-25-epochs.png\"\n", 118 | "\n", 119 | "#---------------------------------------------------------------------------------------------------#\n", 120 | "\n", 121 | "# OUTPUT PATHS\n", 122 | "\n", 123 | "# Trained model path\n", 124 | "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n", 125 | "print(MODEL_PATH)\n", 126 | "PLOT_PATH = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n", 127 | "print(PLOT_PATH)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": { 133 | "id": "IfSMUZbWWdJn" 134 | }, 135 | "source": [ 136 | "### Load and read the MapAI dataset" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "id": "TPiACQ_6VyQP" 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "import tifffile\n", 148 | "from torch.utils.data import Dataset\n", 149 | "import cv2\n", 150 | "\n", 151 | "\n", 152 | "class mapAIdataset(Dataset):\n", 153 | " def __init__(self, imagePaths, maskPaths, transforms):\n", 154 | " # store the image and mask filepaths, and augmentation\n", 155 | " # transforms\n", 156 | " self.imagePaths = imagePaths\n", 157 | " self.maskPaths = maskPaths\n", 158 | " self.transforms = transforms\n", 159 | " \n", 160 | " def __len__(self):\n", 161 | " # return the number of total samples contained in the dataset\n", 162 | " return len(self.imagePaths)\n", 163 | " \n", 164 | " def __getitem__(self, idx):\n", 165 | " # grab the image path from the current index\n", 166 | " imagePath = self.imagePaths[idx]\n", 167 | " # load the image from disk, swap its channels from BGR to RGB,\n", 168 | " # and read the associated mask from disk\n", 169 | " image = cv2.imread(imagePath)\n", 170 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 171 | " mask = tifffile.imread(self.maskPaths[idx])\n", 172 | " # convert the mask to a float32 tensor with values in the range [0, 1]\n", 173 | " mask = mask.astype('float32')\n", 174 | " # check to see if we are applying any transformations\n", 175 | " if self.transforms is not None:\n", 176 | " # apply the transformations to both image and its mask\n", 177 | " image = self.transforms(image)\n", 178 | " mask = self.transforms(mask)\n", 179 | " \n", 180 | " # return a tuple of the image and its mask\n", 181 | " return (image, mask)" 182 | ] 183 | }, 184 | { 185 | "attachments": {}, 186 | "cell_type": "markdown", 187 | "metadata": { 188 | "id": "AKXL9bO8WnNg" 189 | }, 190 | "source": [ 191 | "### U-Net Former architecture\n", 192 | "\n", 193 | "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/UNetFormer.py\n", 194 | "saved into UNetFormer.py file, from where we import the model." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "id": "9urE3W1iWp7v" 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "import sys\n", 206 | "subfolder = os.path.join(GD_PATH, \"models\")\n", 207 | "sys.path.insert(0, subfolder)\n", 208 | "\n", 209 | "import UNetFormer_model" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": { 215 | "id": "22hbANvfWxmX" 216 | }, 217 | "source": [ 218 | "### Training the segmentation model\n", 219 | "Below we append the paths for TRAIN/VAL/TEST sets - images/masks." 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "colab": { 227 | "base_uri": "https://localhost:8080/" 228 | }, 229 | "id": "G2Jha-LCW0ir", 230 | "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2" 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "from torch.nn import BCEWithLogitsLoss\n", 235 | "from torch.optim import Adam\n", 236 | "from torch.utils.data import DataLoader\n", 237 | "from imutils import paths\n", 238 | "import time\n", 239 | "\n", 240 | "# TRAINING\n", 241 | "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n", 242 | "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n", 243 | "\n", 244 | "# VALIDATION\n", 245 | "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n", 246 | "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n", 247 | "\n", 248 | "\n", 249 | "# TEST\n", 250 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 251 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": { 257 | "id": "gtqUNGR1XCa5" 258 | }, 259 | "source": [ 260 | "### Define transformations\n", 261 | "\n", 262 | "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques." 263 | ] 264 | }, 265 | { 266 | "attachments": {}, 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "id": "ghW7Nj0OEQMc" 270 | }, 271 | "source": [ 272 | "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n", 273 | "\n", 274 | "https://albumentations.ai/docs/getting_started/mask_augmentation/" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | }, 284 | "id": "WR_dzdpCXCHY", 285 | "outputId": "4e9b1681-2846-489f-edf6-a6240af65563" 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "import torchvision.transforms as T\n", 290 | "\n", 291 | "# T.RandomHorizontalFlip(p=0.5),\n", 292 | "# T.RandomVerticalFlip(p=0.1),\n", 293 | "\n", 294 | "# Image augmentations applied\n", 295 | "transforms = T.Compose([T.ToPILImage(),\n", 296 | " T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n", 297 | " T.ToTensor()])\n", 298 | "\n", 299 | "# create the train and test datasets\n", 300 | "trainDS = mapAIdataset(imagePaths=train_images,\n", 301 | " maskPaths=train_masks,\n", 302 | " transforms=transforms)\n", 303 | "\n", 304 | "valDS = mapAIdataset(imagePaths=val_images,\n", 305 | " maskPaths=val_masks,\n", 306 | " transforms=transforms)\n", 307 | "\n", 308 | "testDS = mapAIdataset(imagePaths=test_images,\n", 309 | " maskPaths=test_masks,\n", 310 | " transforms=transforms)\n", 311 | "\n", 312 | "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n", 313 | "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n", 314 | "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n", 315 | "\n", 316 | "# create the training and test data loaders\n", 317 | "trainLoader = DataLoader(trainDS,\n", 318 | " shuffle=True,\n", 319 | " batch_size=BATCH_SIZE,\n", 320 | " pin_memory=PIN_MEMORY,\n", 321 | " num_workers=os.cpu_count())\n", 322 | "\n", 323 | "valLoader = DataLoader(valDS,\n", 324 | " shuffle=False,\n", 325 | " batch_size=BATCH_SIZE,\n", 326 | " pin_memory=PIN_MEMORY,\n", 327 | " num_workers=os.cpu_count())\n", 328 | "\n", 329 | "testLoader = DataLoader(testDS,\n", 330 | " shuffle=False,\n", 331 | " batch_size=BATCH_SIZE,\n", 332 | " pin_memory=PIN_MEMORY,\n", 333 | " num_workers=os.cpu_count())" 334 | ] 335 | }, 336 | { 337 | "attachments": {}, 338 | "cell_type": "markdown", 339 | "metadata": { 340 | "id": "tAO9M_R4XG6q" 341 | }, 342 | "source": [ 343 | "### Initialize UNET-FORMER model for training" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": { 350 | "colab": { 351 | "base_uri": "https://localhost:8080/" 352 | }, 353 | "id": "2IMsYzUaXJW7", 354 | "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315" 355 | }, 356 | "outputs": [], 357 | "source": [ 358 | "model = UNetFormer_model.UNetFormer().to(DEVICE)\n", 359 | "\n", 360 | "# loss / optimizer\n", 361 | "lossFunction = BCEWithLogitsLoss()\n", 362 | "opt = Adam(model.parameters(), lr=init_lr, weight_decay=0.001)\n", 363 | "\n", 364 | "# calculate steps per epoch for train/val/test\n", 365 | "trainSteps = len(trainDS) // BATCH_SIZE \n", 366 | "valSteps = len(valDS) // BATCH_SIZE\n", 367 | "testSteps = len(testDS) // BATCH_SIZE\n", 368 | "\n", 369 | "print(trainSteps, valSteps, testSteps)\n", 370 | "\n", 371 | "# initialize a dictionary to store training history\n", 372 | "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n", 373 | "H" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "id": "WEP-IVokbWQg" 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "torch.cuda.empty_cache()" 385 | ] 386 | }, 387 | { 388 | "attachments": {}, 389 | "cell_type": "markdown", 390 | "metadata": { 391 | "id": "xcjuKhMeXLU-" 392 | }, 393 | "source": [ 394 | "### TRAINING THE MODEL\n", 395 | "\n", 396 | "Run this piece of code only if you want to train the model from scratch.\n", 397 | "\n", 398 | "Training locally: BATCH_SIZE = 2 takes 5035 MB of GPU memory.\n", 399 | "\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "colab": { 407 | "base_uri": "https://localhost:8080/" 408 | }, 409 | "id": "vWuUyLUgXPNf", 410 | "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1" 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "# loop over epochs\n", 415 | "print(\"[INFO] training UNET-FORMER ...\")\n", 416 | "startTime = time.time()\n", 417 | "\n", 418 | "for epoch in tqdm(range(EPOCHS)):\n", 419 | " model.train()\n", 420 | "\n", 421 | " # initialize total training and validation loss\n", 422 | " totalTrainLoss = 0\n", 423 | " totalValLoss = 0\n", 424 | " totalTrainAcc = 0\n", 425 | " totalValAcc = 0\n", 426 | "\n", 427 | " # loop over the training set\n", 428 | " for (i, (x, y)) in enumerate(trainLoader):\n", 429 | " # send output to device\n", 430 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 431 | "\n", 432 | " # perform a forward pass and calculate the training loss\n", 433 | " pred = model(x)\n", 434 | " loss = lossFunction(pred, y)\n", 435 | "\n", 436 | " # calculate the accuracy\n", 437 | " acc = ((pred > 0.5) == y).float().mean()\n", 438 | "\n", 439 | " # kill previously accumulated gradients then\n", 440 | " # perform backpropagation and update model parameters\n", 441 | " opt.zero_grad()\n", 442 | " loss.backward()\n", 443 | " opt.step()\n", 444 | "\n", 445 | " # add the loss and accuracy to the total training loss and accuracy\n", 446 | " totalTrainLoss += loss\n", 447 | " totalTrainAcc += acc\n", 448 | "\n", 449 | " # switch of autograd\n", 450 | " with torch.no_grad():\n", 451 | " # set the model in evaluation mode\n", 452 | " model.eval()\n", 453 | "\n", 454 | " # loop over the validation set\n", 455 | " for (x, y) in valLoader:\n", 456 | " # send the input to the device\n", 457 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 458 | "\n", 459 | " # make the predictions and calculate the validation loss\n", 460 | " pred = model(x)\n", 461 | " loss = lossFunction(pred, y)\n", 462 | "\n", 463 | " # calculate the accuracy\n", 464 | " acc = ((pred > 0.5) == y).float().mean()\n", 465 | "\n", 466 | " # add the loss and accuracy to the total validation loss and accuracy\n", 467 | " totalValLoss += loss\n", 468 | " totalValAcc += acc\n", 469 | "\n", 470 | " # calculate the average training and validation loss and accuracy\n", 471 | " avgTrainLoss = totalTrainLoss / trainSteps\n", 472 | " avgValLoss = totalValLoss / valSteps\n", 473 | " avgTrainAcc = totalTrainAcc / trainSteps\n", 474 | " avgValAcc = totalValAcc / valSteps\n", 475 | " \n", 476 | " # update our training history\n", 477 | " H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n", 478 | " H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n", 479 | "\n", 480 | " # print the model training and validation information\n", 481 | " print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n", 482 | " print(\"Train loss: {:.6f}, Train acc: {:.6f}, Val loss: {:.4f}, Val acc: {:.4f}\".format(\n", 483 | " avgTrainLoss, avgTrainAcc, avgValLoss, avgValAcc))\n", 484 | " \n", 485 | "# display the total time needed to perform the training\n", 486 | "endTime = time.time()\n", 487 | "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))\n", 488 | " " 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": { 495 | "colab": { 496 | "base_uri": "https://localhost:8080/" 497 | }, 498 | "id": "CsJoOVn11rs9", 499 | "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc" 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "H # show traning/val loss history" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": { 509 | "id": "U6ChLXHuXZHA" 510 | }, 511 | "source": [ 512 | "### Plot the training and validation loss" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": { 519 | "colab": { 520 | "base_uri": "https://localhost:8080/", 521 | "height": 316 522 | }, 523 | "id": "j04HfubrXYvX", 524 | "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50" 525 | }, 526 | "outputs": [], 527 | "source": [ 528 | "# plot the training loss\n", 529 | "print(MODEL_PATH)\n", 530 | "print(PLOT_PATH)\n", 531 | "\n", 532 | "plt.style.use(\"ggplot\")\n", 533 | "plt.figure()\n", 534 | "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n", 535 | "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n", 536 | "plt.title(\"Training Loss on Dataset\")\n", 537 | "plt.xlabel(\"Epoch #\")\n", 538 | "plt.ylabel(\"Loss\")\n", 539 | "plt.legend(loc=\"lower left\")\n", 540 | "plt.savefig(PLOT_PATH)\n", 541 | "# serialize the model to disk\n", 542 | "torch.save(model, MODEL_PATH) # saves the model" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": { 548 | "id": "5Y6Fx2oaWr0q" 549 | }, 550 | "source": [ 551 | "### Prediction part\n", 552 | "\n", 553 | "Here the trained model is loaded and use for prediction on test images." 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": null, 559 | "metadata": { 560 | "colab": { 561 | "base_uri": "https://localhost:8080/" 562 | }, 563 | "id": "qYh4flMu7O-m", 564 | "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd" 565 | }, 566 | "outputs": [], 567 | "source": [ 568 | "# Load saved model for prediction\n", 569 | "\n", 570 | "print(MODEL_PATH)\n", 571 | "\n", 572 | "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n", 573 | "print(\"model loaded for prediction\")\n", 574 | "\n", 575 | "model" 576 | ] 577 | }, 578 | { 579 | "attachments": {}, 580 | "cell_type": "markdown", 581 | "metadata": {}, 582 | "source": [ 583 | "#### Provide test images for MapAI Dataset" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": null, 589 | "metadata": {}, 590 | "outputs": [], 591 | "source": [ 592 | "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n", 593 | "PREDICTIONS_DIR" 594 | ] 595 | }, 596 | { 597 | "attachments": {}, 598 | "cell_type": "markdown", 599 | "metadata": {}, 600 | "source": [ 601 | "#### Make predictions on the entire MapAI dataset\n", 602 | "\n", 603 | "Make predictions on test images and save them to the folder named predictions." 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": { 610 | "colab": { 611 | "base_uri": "https://localhost:8080/" 612 | }, 613 | "id": "bq7BlbdrcgPB", 614 | "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1" 615 | }, 616 | "outputs": [], 617 | "source": [ 618 | "import random\n", 619 | "import gc\n", 620 | "from pathlib import Path\n", 621 | "import numpy as np\n", 622 | "from PIL import Image\n", 623 | "\n", 624 | "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n", 625 | "\n", 626 | "# Output folder for the predictions\n", 627 | "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n", 628 | "\n", 629 | "# PLOT TEST IMAGES as RGB\n", 630 | "for n in range(len(test_images)):\n", 631 | " gc.collect()\n", 632 | " # Test image number\n", 633 | " testImgName = str(Path(test_images[n]).stem) + '.tif'\n", 634 | " #print('#', testImgName)\n", 635 | "\n", 636 | " # Make predicton on a test image specified with counter n\n", 637 | " test_img = test_images[n]\n", 638 | " test_img_input = np.expand_dims(test_img, 0)\n", 639 | " #print('#', test_img_input[0])\n", 640 | "\n", 641 | " # PyTorch --> works\n", 642 | " model.eval()\n", 643 | " with torch.no_grad():\n", 644 | " image = cv2.imread(test_img_input[0])\n", 645 | " image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n", 646 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 647 | " image = image.astype(\"float32\") / 255\n", 648 | " \n", 649 | " # print('SIZE: ', image.shape)\n", 650 | "\n", 651 | " # make the channel axis to be the leading one, add batch dimension\n", 652 | " image = np.transpose(image, (2, 0, 1))\n", 653 | " # create a PyTorch tensor\n", 654 | " image = np.expand_dims(image, 0)\n", 655 | " # flash the tensor to the device\n", 656 | " image = torch.from_numpy(image).to(DEVICE)\n", 657 | "\n", 658 | " # make the prediction\n", 659 | " predMask = model(image).squeeze()\n", 660 | " # pass result through sigmoid\n", 661 | " predMask = torch.sigmoid(predMask)\n", 662 | "\n", 663 | " # convert result to numpy array\n", 664 | " predMask = predMask.cpu().numpy()\n", 665 | "\n", 666 | " # filter out the weak predictions and convert them to integers\n", 667 | " predMask = (predMask > THRESHOLD) * 255\n", 668 | " predMask = predMask.astype(np.uint8)\n", 669 | "\n", 670 | " # generate image from array\n", 671 | " pIMG = Image.fromarray(predMask)\n", 672 | " pIMG.save(str(output_folder + testImgName))\n", 673 | "\n", 674 | " print('Prediction:', testImgName, 'saved to:', output_folder)" 675 | ] 676 | }, 677 | { 678 | "attachments": {}, 679 | "cell_type": "markdown", 680 | "metadata": {}, 681 | "source": [ 682 | "#### Make predictions on single images by choice\n", 683 | "\n", 684 | "Change the parameter n to choose which image to plot." 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "# ----------------------------------------------------------------------\n", 694 | "\n", 695 | "predictions = glob.glob(output_folder + \"*.tif\")\n", 696 | "predictions.sort()\n", 697 | "print(\"# IMAGES for prediction: \", len(predictions))\n", 698 | "print(\"Choosen n can be from 0 o 1367! \")\n", 699 | "\n", 700 | "# ----------------------------------------------------------------------\n", 701 | "\n", 702 | "n = 900 # change this number depending on which image you want to test\n", 703 | "\n", 704 | "fig = plt.figure(figsize=(18,12))\n", 705 | "ax1 = fig.add_subplot(131)\n", 706 | "\n", 707 | "ax1.set_title('RGB image: ')\n", 708 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 709 | "ax1.imshow(image)\n", 710 | "ax1.set_axis_off()\n", 711 | "\n", 712 | "ax2 = fig.add_subplot(132)\n", 713 | "ax2.set_title('Ground truth: ')\n", 714 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 715 | "image *= 255\n", 716 | "ax2.imshow(image)\n", 717 | "ax2.set_axis_off()\n", 718 | "\n", 719 | "ax3 = fig.add_subplot(133)\n", 720 | "ax3.set_title('Prediction: ')\n", 721 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 722 | "ax3.imshow(image)\n", 723 | "ax3.set_axis_off()" 724 | ] 725 | }, 726 | { 727 | "attachments": {}, 728 | "cell_type": "markdown", 729 | "metadata": { 730 | "id": "Tg_0qxbcjzfw" 731 | }, 732 | "source": [ 733 | "### BUILDING FOOTPRINT REGULARIZATION\n", 734 | "\n", 735 | "Used repo: https://github.com/zorzi-s/projectRegularization\n", 736 | "\n", 737 | "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n", 738 | "\n", 739 | "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n", 740 | "\n", 741 | "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu" 742 | ] 743 | }, 744 | { 745 | "attachments": {}, 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows." 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": { 756 | "id": "yp8uKrNUjyGn" 757 | }, 758 | "outputs": [], 759 | "source": [ 760 | "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n", 761 | "\n", 762 | "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n", 763 | "print(projectRegDir)\n", 764 | "\n", 765 | "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n", 766 | "print(ptw)\n", 767 | "\n", 768 | "# OUTPUT REGULARIZATIONS DIR\n", 769 | "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n", 770 | "print(REGULARIZATION_DIR)\n", 771 | "\n", 772 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 773 | "ENCODER = os.path.join(ptw, \"E140000_e1\")\n", 774 | "GENERATOR = os.path.join(ptw, \"E140000_net\")\n", 775 | "\n", 776 | "print(ENCODER)\n", 777 | "print(GENERATOR)" 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": null, 783 | "metadata": {}, 784 | "outputs": [], 785 | "source": [ 786 | "# CREATE A NEW variables.py WITH USERS PATHS\n", 787 | "\n", 788 | "with open(projectRegDir + 'variables.py', 'w') as f:\n", 789 | " f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n", 790 | " f.write('# TRAINING \\n')\n", 791 | " f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 792 | " f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n", 793 | " f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 794 | " f.write('\\n')\n", 795 | " f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n", 796 | " f.write('\\n')\n", 797 | " f.write('# INFERENCE \\n')\n", 798 | " f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 799 | " f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 800 | " f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n", 801 | " f.write('\\n')\n", 802 | " f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n", 803 | " f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n", 804 | " f.close()\n", 805 | " \n", 806 | "print(\"variables.py created with users paths...\")\n" 807 | ] 808 | }, 809 | { 810 | "attachments": {}, 811 | "cell_type": "markdown", 812 | "metadata": {}, 813 | "source": [ 814 | "#### Run projectRegularization\n", 815 | "\n", 816 | "Takes around 6-8 minutes.\n", 817 | "\n", 818 | "You only need to change the command below and replace it with the absolute path for regularize.py" 819 | ] 820 | }, 821 | { 822 | "cell_type": "code", 823 | "execution_count": null, 824 | "metadata": {}, 825 | "outputs": [], 826 | "source": [ 827 | "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py" 828 | ] 829 | }, 830 | { 831 | "attachments": {}, 832 | "cell_type": "markdown", 833 | "metadata": {}, 834 | "source": [ 835 | "### Compare predictions and regularizations on a single image" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "metadata": { 842 | "colab": { 843 | "base_uri": "https://localhost:8080/" 844 | }, 845 | "id": "89nW6Q7F6aga", 846 | "outputId": "976d62a2-76a7-4b52-a4bc-218f63d8a122" 847 | }, 848 | "outputs": [], 849 | "source": [ 850 | "# Read Regularizations to plot and compare results\n", 851 | "\n", 852 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 853 | "regularizations.sort()\n", 854 | "\n", 855 | "print(\"# of predicted images: \", len(predictions))\n", 856 | "print(\"# of regularized images: \", len(regularizations))" 857 | ] 858 | }, 859 | { 860 | "attachments": {}, 861 | "cell_type": "markdown", 862 | "metadata": {}, 863 | "source": [ 864 | "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n", 865 | "\n", 866 | "Change parameter n accordingly." 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": null, 872 | "metadata": {}, 873 | "outputs": [], 874 | "source": [ 875 | "n = 600\n", 876 | "\n", 877 | "fig = plt.figure(figsize=(18,12))\n", 878 | "ax1 = fig.add_subplot(141)\n", 879 | "\n", 880 | "ax1.set_title('RGB: ')\n", 881 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 882 | "ax1.imshow(image)\n", 883 | "ax1.set_axis_off()\n", 884 | "\n", 885 | "ax2 = fig.add_subplot(142)\n", 886 | "ax2.set_title('Ground truth: ')\n", 887 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 888 | "image *= 255\n", 889 | "ax2.imshow(image)\n", 890 | "ax2.set_axis_off()\n", 891 | "\n", 892 | "ax3 = fig.add_subplot(143)\n", 893 | "ax3.set_title('Prediction: ')\n", 894 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 895 | "ax3.imshow(image)\n", 896 | "ax3.set_axis_off()\n", 897 | "\n", 898 | "ax4 = fig.add_subplot(144)\n", 899 | "ax4.set_title('Regularization: ')\n", 900 | "image = cv2.imread(regularizations[n])[:,:,::-1]\n", 901 | "ax4.imshow(image)\n", 902 | "ax4.set_axis_off()\n", 903 | "\n", 904 | "# DEFINE PATH FOR PLOTS TO BE SAVED\n", 905 | "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n", 906 | "print(figPath)\n", 907 | "\n", 908 | "# Save plot\n", 909 | "fig.savefig(figPath)" 910 | ] 911 | }, 912 | { 913 | "attachments": {}, 914 | "cell_type": "markdown", 915 | "metadata": {}, 916 | "source": [ 917 | "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n", 918 | "\n", 919 | "GDAL: https://gdal.org/'\n", 920 | "\n", 921 | "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n", 922 | "\n", 923 | "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n", 924 | "\n", 925 | "On Ubuntu you have to follow these steps:\n", 926 | "\n", 927 | "\n", 928 | "\n", 929 | "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n", 930 | "\n", 931 | "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n", 932 | "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n", 933 | "- python -m pip install --upgrade pip setuptools wheel\n", 934 | "- python -m pip install --upgrade gdal\n", 935 | "- conda install -c conda forge libgdal\n", 936 | "- conda install -c conda-forge libgdal\n", 937 | "- conda install -c conda-forge gdal\n", 938 | "- conda install tiledb=2.2\n", 939 | "- conda install poppler\n", 940 | "\n", 941 | "When you have this you can hopefully vectorize the detected masks quite easily." 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": null, 947 | "metadata": {}, 948 | "outputs": [], 949 | "source": [ 950 | "def get_fname_from_path(path):\n", 951 | " \"\"\"\n", 952 | " Given a path, returns the filename after the last frontslash character.\n", 953 | " \"\"\"\n", 954 | " return path.rsplit('/', 1)[-1]\n", 955 | "\n", 956 | "def get_fname_no_extension(path):\n", 957 | " \"\"\"\n", 958 | " Given a path, returns the filename without its extension.\n", 959 | " \"\"\"\n", 960 | " filename, extension = os.path.splitext(path)\n", 961 | " return filename" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": null, 967 | "metadata": { 968 | "id": "TDWUhUkJaYl8" 969 | }, 970 | "outputs": [], 971 | "source": [ 972 | "import osgeo\n", 973 | "from osgeo import gdal\n", 974 | "from osgeo import ogr\n", 975 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 976 | "\n", 977 | "# Choose which image to vectorize\n", 978 | "n = 0\n", 979 | "\n", 980 | "input = regularizations[n]\n", 981 | "print()\n", 982 | "print(\"INPUT: \", input)\n", 983 | "\n", 984 | "# print(get_fname_no_extension(input))\n", 985 | "\n", 986 | "# out\n", 987 | "output = get_fname_from_path(get_fname_no_extension(input)) + \".gpkg\"\n", 988 | "print(\"OUTPUT: \", output)\n", 989 | "\n", 990 | "# Open image with GDAl driver\n", 991 | "ds = gdal.Open(input)\n", 992 | "# Get the band\n", 993 | "band = ds.GetRasterBand(1)\n", 994 | "\n", 995 | "# Create the output shapefile\n", 996 | "driver = ogr.GetDriverByName(\"GPKG\")\n", 997 | "out_ds = driver.CreateDataSource(output)\n", 998 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 999 | "\n", 1000 | "# Add a field to the layer to store the pixel values\n", 1001 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 1002 | "out_layer.CreateField(field_defn)\n", 1003 | "\n", 1004 | "# Polygonize the PNG file\n", 1005 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 1006 | "\n", 1007 | "# Close the input and output files\n", 1008 | "out_ds = None\n", 1009 | "ds = None" 1010 | ] 1011 | }, 1012 | { 1013 | "attachments": {}, 1014 | "cell_type": "markdown", 1015 | "metadata": {}, 1016 | "source": [ 1017 | "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below." 1018 | ] 1019 | }, 1020 | { 1021 | "cell_type": "code", 1022 | "execution_count": null, 1023 | "metadata": {}, 1024 | "outputs": [], 1025 | "source": [ 1026 | "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n", 1027 | "\n", 1028 | "# RUN from the command line inside Ubuntu\n", 1029 | "# Change name of input and output according to user needs\n", 1030 | "\n", 1031 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 1032 | ] 1033 | } 1034 | ], 1035 | "metadata": { 1036 | "accelerator": "TPU", 1037 | "colab": { 1038 | "provenance": [] 1039 | }, 1040 | "gpuClass": "premium", 1041 | "kernelspec": { 1042 | "display_name": "torch", 1043 | "language": "python", 1044 | "name": "python3" 1045 | }, 1046 | "language_info": { 1047 | "codemirror_mode": { 1048 | "name": "ipython", 1049 | "version": 3 1050 | }, 1051 | "file_extension": ".py", 1052 | "mimetype": "text/x-python", 1053 | "name": "python", 1054 | "nbconvert_exporter": "python", 1055 | "pygments_lexer": "ipython3", 1056 | "version": "3.10.9" 1057 | } 1058 | }, 1059 | "nbformat": 4, 1060 | "nbformat_minor": 0 1061 | } 1062 | -------------------------------------------------------------------------------- /04-mapai-ft-unet-former-regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": { 7 | "id": "1KJnP7zSGQg5" 8 | }, 9 | "source": [ 10 | "## Binary semantic segmentation example using FT-U-Net-Former\n", 11 | "Preparation of dataset and model training code from here:\n", 12 | "\n", 13 | "https://pyimagesearch.com/2021/11/08/u-net-training-image-segmentation-models-in-pytorch/" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "colab": { 21 | "base_uri": "https://localhost:8080/" 22 | }, 23 | "id": "cFxHJWmXlcZk", 24 | "outputId": "b755d1a8-3650-42d9-8718-401b4458f049" 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import os\n", 29 | "import glob\n", 30 | "import matplotlib.pyplot as plt\n", 31 | "\n", 32 | "import torch\n", 33 | "import torchvision\n", 34 | "from tqdm import tqdm\n", 35 | "\n", 36 | "print(torch.__version__)\n", 37 | "print(torchvision.__version__)\n", 38 | "\n", 39 | "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 40 | "print(DEVICE)\n", 41 | "\n", 42 | "# determine if we will be pinning memory during data loading\n", 43 | "PIN_MEMORY = True if DEVICE == \"cuda\" else False" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "id": "KVfaGZrWG63Q" 50 | }, 51 | "source": [ 52 | "#### CONFIGURE YOUR PATHS AND HYPERPARAMETERS FOR TRAINING BELOW." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "colab": { 60 | "base_uri": "https://localhost:8080/", 61 | "height": 235 62 | }, 63 | "id": "OjlBC-raVM2K", 64 | "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745" 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "GD_PATH = os.getcwd() # get current working directory for the repo\n", 69 | "print(GD_PATH)\n", 70 | "\n", 71 | "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n", 72 | "DATASET_PATH = \"/home/shymon/datasets/\"\n", 73 | "\n", 74 | "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n", 75 | "\n", 76 | "print(DATASET_PATH)\n", 77 | "\n", 78 | "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n", 79 | "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n", 80 | "\n", 81 | "print(TRAIN_IMG_DIR)\n", 82 | "print(TRAIN_MASK_DIR)\n", 83 | "\n", 84 | "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n", 85 | "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n", 86 | "\n", 87 | "print(VAL_IMG_DIR)\n", 88 | "print(VAL_MASK_DIR)\n", 89 | "\n", 90 | "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n", 91 | "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n", 92 | "\n", 93 | "print(TEST_IMG_DIR)\n", 94 | "print(TEST_MASK_DIR)\n", 95 | "\n", 96 | "# CONFIGURE MapAI DATASET\n", 97 | "NUM_CHANNELS = 3\n", 98 | "NUM_LEVELS = 3\n", 99 | "NUM_CLASSES = 1\n", 100 | "\n", 101 | "# IMAGE SHAPE\n", 102 | "IMG_WIDTH = 512\n", 103 | "IMG_HEIGHT = 512\n", 104 | "\n", 105 | "#---------------------------------------------------------------------------------------------------#\n", 106 | "\n", 107 | "# CONFIGURE parameters for training\n", 108 | "EPOCHS = 25\n", 109 | "init_lr = 1e-4 # learning rate\n", 110 | "BATCH_SIZE = 2\n", 111 | "\n", 112 | "THRESHOLD = 0.5\n", 113 | "base_output = \"out\"\n", 114 | "\n", 115 | "model_name = \"ft-unet-former-25-epochs.pth\" # provide name for model\n", 116 | "training_plot_name = \"ft-unet-former-25-epochs.png\"\n", 117 | "\n", 118 | "#---------------------------------------------------------------------------------------------------#\n", 119 | "\n", 120 | "# OUTPUT PATHS\n", 121 | "\n", 122 | "# Trained model path\n", 123 | "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n", 124 | "print(MODEL_PATH)\n", 125 | "PLOT_PATH = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n", 126 | "print(PLOT_PATH)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "id": "IfSMUZbWWdJn" 133 | }, 134 | "source": [ 135 | "### Load and read the MapAI dataset" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "id": "TPiACQ_6VyQP" 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "import tifffile\n", 147 | "from torch.utils.data import Dataset\n", 148 | "import cv2\n", 149 | "\n", 150 | "\n", 151 | "class mapAIdataset(Dataset):\n", 152 | " def __init__(self, imagePaths, maskPaths, transforms):\n", 153 | " # store the image and mask filepaths, and augmentation\n", 154 | " # transforms\n", 155 | " self.imagePaths = imagePaths\n", 156 | " self.maskPaths = maskPaths\n", 157 | " self.transforms = transforms\n", 158 | " \n", 159 | " def __len__(self):\n", 160 | " # return the number of total samples contained in the dataset\n", 161 | " return len(self.imagePaths)\n", 162 | " \n", 163 | " def __getitem__(self, idx):\n", 164 | " # grab the image path from the current index\n", 165 | " imagePath = self.imagePaths[idx]\n", 166 | " # load the image from disk, swap its channels from BGR to RGB,\n", 167 | " # and read the associated mask from disk\n", 168 | " image = cv2.imread(imagePath)\n", 169 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 170 | " mask = tifffile.imread(self.maskPaths[idx])\n", 171 | " # convert the mask to a float32 tensor with values in the range [0, 1]\n", 172 | " mask = mask.astype('float32')\n", 173 | " # check to see if we are applying any transformations\n", 174 | " if self.transforms is not None:\n", 175 | " # apply the transformations to both image and its mask\n", 176 | " image = self.transforms(image)\n", 177 | " mask = self.transforms(mask)\n", 178 | " \n", 179 | " # return a tuple of the image and its mask\n", 180 | " return (image, mask)" 181 | ] 182 | }, 183 | { 184 | "attachments": {}, 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "id": "AKXL9bO8WnNg" 188 | }, 189 | "source": [ 190 | "### Build FT-UNET-FORMER architecture\n", 191 | "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/FTUNetFormer.py" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "id": "9urE3W1iWp7v" 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "import sys\n", 203 | "subfolder = os.path.join(GD_PATH, \"models\")\n", 204 | "sys.path.insert(0, subfolder)\n", 205 | "\n", 206 | "import FTUNetFormer_model" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "id": "22hbANvfWxmX" 213 | }, 214 | "source": [ 215 | "### Training the segmentation model\n", 216 | "Below we append the paths for TRAIN/VAL/TEST sets - images/masks." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "colab": { 224 | "base_uri": "https://localhost:8080/" 225 | }, 226 | "id": "G2Jha-LCW0ir", 227 | "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2" 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "from torch.nn import BCEWithLogitsLoss\n", 232 | "from torch.optim import Adam\n", 233 | "from torch.utils.data import DataLoader\n", 234 | "from imutils import paths\n", 235 | "import time\n", 236 | "\n", 237 | "# TRAINING\n", 238 | "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n", 239 | "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n", 240 | "\n", 241 | "# VALIDATION\n", 242 | "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n", 243 | "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n", 244 | "\n", 245 | "\n", 246 | "# TEST\n", 247 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 248 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": { 254 | "id": "gtqUNGR1XCa5" 255 | }, 256 | "source": [ 257 | "### Define transformations\n", 258 | "\n", 259 | "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques." 260 | ] 261 | }, 262 | { 263 | "attachments": {}, 264 | "cell_type": "markdown", 265 | "metadata": { 266 | "id": "ghW7Nj0OEQMc" 267 | }, 268 | "source": [ 269 | "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n", 270 | "\n", 271 | "https://albumentations.ai/docs/getting_started/mask_augmentation/" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "colab": { 279 | "base_uri": "https://localhost:8080/" 280 | }, 281 | "id": "WR_dzdpCXCHY", 282 | "outputId": "4e9b1681-2846-489f-edf6-a6240af65563" 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "import torchvision.transforms as T\n", 287 | "\n", 288 | "# T.RandomHorizontalFlip(p=0.5),\n", 289 | "# T.RandomVerticalFlip(p=0.1),\n", 290 | "\n", 291 | "# Image augmentations applied\n", 292 | "transforms = T.Compose([T.ToPILImage(),\n", 293 | " T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n", 294 | " T.ToTensor()])\n", 295 | "\n", 296 | "# create the train and test datasets\n", 297 | "trainDS = mapAIdataset(imagePaths=train_images,\n", 298 | " maskPaths=train_masks,\n", 299 | " transforms=transforms)\n", 300 | "\n", 301 | "valDS = mapAIdataset(imagePaths=val_images,\n", 302 | " maskPaths=val_masks,\n", 303 | " transforms=transforms)\n", 304 | "\n", 305 | "testDS = mapAIdataset(imagePaths=test_images,\n", 306 | " maskPaths=test_masks,\n", 307 | " transforms=transforms)\n", 308 | "\n", 309 | "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n", 310 | "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n", 311 | "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n", 312 | "\n", 313 | "# create the training and test data loaders\n", 314 | "trainLoader = DataLoader(trainDS,\n", 315 | " shuffle=True,\n", 316 | " batch_size=BATCH_SIZE,\n", 317 | " pin_memory=PIN_MEMORY,\n", 318 | " num_workers=os.cpu_count())\n", 319 | "\n", 320 | "valLoader = DataLoader(valDS,\n", 321 | " shuffle=False,\n", 322 | " batch_size=BATCH_SIZE,\n", 323 | " pin_memory=PIN_MEMORY,\n", 324 | " num_workers=os.cpu_count())\n", 325 | "\n", 326 | "testLoader = DataLoader(testDS,\n", 327 | " shuffle=False,\n", 328 | " batch_size=BATCH_SIZE,\n", 329 | " pin_memory=PIN_MEMORY,\n", 330 | " num_workers=os.cpu_count())" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "id": "tAO9M_R4XG6q" 337 | }, 338 | "source": [ 339 | "### Initialize UNET model for training\n", 340 | "\n", 341 | "Here we initialize the defined UNET model for training and calculate the steps per epoch for train/val/test set." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "colab": { 349 | "base_uri": "https://localhost:8080/" 350 | }, 351 | "id": "2IMsYzUaXJW7", 352 | "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315" 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "model = FTUNetFormer_model.FTUNetFormer().to(DEVICE)\n", 357 | "\n", 358 | "# loss / optimizer\n", 359 | "lossFunction = BCEWithLogitsLoss()\n", 360 | "opt = Adam(model.parameters(), lr=init_lr, weight_decay=0.001)\n", 361 | "\n", 362 | "# calculate steps per epoch for train/val/test\n", 363 | "trainSteps = len(trainDS) // BATCH_SIZE \n", 364 | "valSteps = len(valDS) // BATCH_SIZE\n", 365 | "testSteps = len(testDS) // BATCH_SIZE\n", 366 | "\n", 367 | "print(trainSteps, valSteps, testSteps)\n", 368 | "\n", 369 | "# initialize a dictionary to store training history\n", 370 | "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n", 371 | "H" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": { 378 | "id": "WEP-IVokbWQg" 379 | }, 380 | "outputs": [], 381 | "source": [ 382 | "torch.cuda.empty_cache() # PyTorch thing to empty cache" 383 | ] 384 | }, 385 | { 386 | "attachments": {}, 387 | "cell_type": "markdown", 388 | "metadata": { 389 | "id": "xcjuKhMeXLU-" 390 | }, 391 | "source": [ 392 | "### TRAINING THE MODEL\n", 393 | "\n", 394 | "Run this piece of code only if you want to train the model from scratch.\n", 395 | "\n", 396 | "Training locally: BATCH_SIZE = 2 takes 5035 MB of GPU memory.\n", 397 | "\n" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "colab": { 405 | "base_uri": "https://localhost:8080/" 406 | }, 407 | "id": "vWuUyLUgXPNf", 408 | "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1" 409 | }, 410 | "outputs": [], 411 | "source": [ 412 | "# loop over epochs\n", 413 | "print(\"[INFO] training UNET ...\")\n", 414 | "startTime = time.time()\n", 415 | "\n", 416 | "for epoch in tqdm(range(EPOCHS)):\n", 417 | " model.train()\n", 418 | "\n", 419 | " # initialize total training and validation loss\n", 420 | " totalTrainLoss = 0\n", 421 | " totalValLoss = 0\n", 422 | " totalTrainAcc = 0\n", 423 | " totalValAcc = 0\n", 424 | "\n", 425 | " # loop over the training set\n", 426 | " for (i, (x, y)) in enumerate(trainLoader):\n", 427 | " # send output to device\n", 428 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 429 | "\n", 430 | " # perform a forward pass and calculate the training loss\n", 431 | " pred = model(x)\n", 432 | " loss = lossFunction(pred, y)\n", 433 | "\n", 434 | " # calculate the accuracy\n", 435 | " acc = ((pred > 0.5) == y).float().mean()\n", 436 | "\n", 437 | " # kill previously accumulated gradients then\n", 438 | " # perform backpropagation and update model parameters\n", 439 | " opt.zero_grad()\n", 440 | " loss.backward()\n", 441 | " opt.step()\n", 442 | "\n", 443 | " # add the loss and accuracy to the total training loss and accuracy\n", 444 | " totalTrainLoss += loss\n", 445 | " totalTrainAcc += acc\n", 446 | "\n", 447 | " # switch of autograd\n", 448 | " with torch.no_grad():\n", 449 | " # set the model in evaluation mode\n", 450 | " model.eval()\n", 451 | "\n", 452 | " # loop over the validation set\n", 453 | " for (x, y) in valLoader:\n", 454 | " # send the input to the device\n", 455 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 456 | "\n", 457 | " # make the predictions and calculate the validation loss\n", 458 | " pred = model(x)\n", 459 | " loss = lossFunction(pred, y)\n", 460 | "\n", 461 | " # calculate the accuracy\n", 462 | " acc = ((pred > 0.5) == y).float().mean()\n", 463 | "\n", 464 | " # add the loss and accuracy to the total validation loss and accuracy\n", 465 | " totalValLoss += loss\n", 466 | " totalValAcc += acc\n", 467 | "\n", 468 | " # calculate the average training and validation loss and accuracy\n", 469 | " avgTrainLoss = totalTrainLoss / trainSteps\n", 470 | " avgValLoss = totalValLoss / valSteps\n", 471 | " avgTrainAcc = totalTrainAcc / trainSteps\n", 472 | " avgValAcc = totalValAcc / valSteps\n", 473 | " \n", 474 | " # update our training history\n", 475 | " H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n", 476 | " H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n", 477 | "\n", 478 | " # print the model training and validation information\n", 479 | " print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n", 480 | " print(\"Train loss: {:.6f}, Train acc: {:.6f}, Val loss: {:.4f}, Val acc: {:.4f}\".format(\n", 481 | " avgTrainLoss, avgTrainAcc, avgValLoss, avgValAcc))\n", 482 | " \n", 483 | "# display the total time needed to perform the training\n", 484 | "endTime = time.time()\n", 485 | "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))\n", 486 | " " 487 | ] 488 | }, 489 | { 490 | "attachments": {}, 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "Train loss: 0.001194, Val loss: 0.0013\n", 495 | "[INFO] total time taken to train the model: 27115.38s" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": { 502 | "colab": { 503 | "base_uri": "https://localhost:8080/" 504 | }, 505 | "id": "CsJoOVn11rs9", 506 | "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc" 507 | }, 508 | "outputs": [], 509 | "source": [ 510 | "H # show traning/val loss history" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": { 516 | "id": "U6ChLXHuXZHA" 517 | }, 518 | "source": [ 519 | "### Plot the training and validation loss" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": { 526 | "colab": { 527 | "base_uri": "https://localhost:8080/", 528 | "height": 316 529 | }, 530 | "id": "j04HfubrXYvX", 531 | "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50" 532 | }, 533 | "outputs": [], 534 | "source": [ 535 | "# plot the training loss\n", 536 | "print(MODEL_PATH)\n", 537 | "print(PLOT_PATH)\n", 538 | "\n", 539 | "\n", 540 | "plt.style.use(\"ggplot\")\n", 541 | "plt.figure()\n", 542 | "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n", 543 | "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n", 544 | "plt.title(\"Training Loss on Dataset\")\n", 545 | "plt.xlabel(\"Epoch #\")\n", 546 | "plt.ylabel(\"Loss\")\n", 547 | "plt.legend(loc=\"lower left\")\n", 548 | "plt.savefig(PLOT_PATH)\n", 549 | "# serialize the model to disk\n", 550 | "torch.save(model, MODEL_PATH) # saves the model" 551 | ] 552 | }, 553 | { 554 | "attachments": {}, 555 | "cell_type": "markdown", 556 | "metadata": { 557 | "id": "5Y6Fx2oaWr0q" 558 | }, 559 | "source": [ 560 | "### Prediction part\n", 561 | "\n", 562 | "Here the trained model is loaded and use for prediction on test images." 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": { 569 | "colab": { 570 | "base_uri": "https://localhost:8080/" 571 | }, 572 | "id": "qYh4flMu7O-m", 573 | "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd" 574 | }, 575 | "outputs": [], 576 | "source": [ 577 | "# Load saved model for prediction\n", 578 | "\n", 579 | "print(MODEL_PATH)\n", 580 | "\n", 581 | "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n", 582 | "print(\"model loaded for prediction\")\n", 583 | "\n", 584 | "model" 585 | ] 586 | }, 587 | { 588 | "attachments": {}, 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": [ 592 | "#### Provide test images for MapAI Dataset" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": {}, 599 | "outputs": [], 600 | "source": [ 601 | "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n", 602 | "PREDICTIONS_DIR" 603 | ] 604 | }, 605 | { 606 | "attachments": {}, 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "#### Make predictions on the entire MapAI dataset\n", 611 | "\n", 612 | "Make predictions on test images and save them to the folder named predictions." 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [ 621 | "import random\n", 622 | "import gc\n", 623 | "from pathlib import Path\n", 624 | "import numpy as np\n", 625 | "from PIL import Image\n", 626 | "\n", 627 | "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n", 628 | "\n", 629 | "# Output folder for the predictions\n", 630 | "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n", 631 | "\n", 632 | "# PLOT TEST IMAGES as RGB\n", 633 | "for n in range(len(test_images)):\n", 634 | " gc.collect()\n", 635 | " # Test image number\n", 636 | " testImgName = str(Path(test_images[n]).stem) + '.tif'\n", 637 | " #print('#', testImgName)\n", 638 | "\n", 639 | " # Make predicton on a test image specified with counter n\n", 640 | " test_img = test_images[n]\n", 641 | " test_img_input = np.expand_dims(test_img, 0)\n", 642 | " #print('#', test_img_input[0])\n", 643 | "\n", 644 | " # PyTorch --> works\n", 645 | " model.eval()\n", 646 | " with torch.no_grad():\n", 647 | " image = cv2.imread(test_img_input[0])\n", 648 | " image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n", 649 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 650 | " image = image.astype(\"float32\") / 255\n", 651 | " \n", 652 | " # print('SIZE: ', image.shape)\n", 653 | "\n", 654 | " # make the channel axis to be the leading one, add batch dimension\n", 655 | " image = np.transpose(image, (2, 0, 1))\n", 656 | " # create a PyTorch tensor\n", 657 | " image = np.expand_dims(image, 0)\n", 658 | " # flash the tensor to the device\n", 659 | " image = torch.from_numpy(image).to(DEVICE)\n", 660 | "\n", 661 | " # make the prediction\n", 662 | " predMask = model(image).squeeze()\n", 663 | " # pass result through sigmoid\n", 664 | " predMask = torch.sigmoid(predMask)\n", 665 | "\n", 666 | " # convert result to numpy array\n", 667 | " predMask = predMask.cpu().numpy()\n", 668 | "\n", 669 | " # filter out the weak predictions and convert them to integers\n", 670 | " predMask = (predMask > THRESHOLD) * 255\n", 671 | " predMask = predMask.astype(np.uint8)\n", 672 | "\n", 673 | " # generate image from array\n", 674 | " pIMG = Image.fromarray(predMask)\n", 675 | " pIMG.save(str(output_folder + testImgName))\n", 676 | "\n", 677 | " print('Prediction:', testImgName, 'saved to:', output_folder)" 678 | ] 679 | }, 680 | { 681 | "attachments": {}, 682 | "cell_type": "markdown", 683 | "metadata": {}, 684 | "source": [ 685 | "#### Make predictions on single images by choice\n", 686 | "\n", 687 | "Change the parameter n to choose which image to plot." 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": null, 693 | "metadata": { 694 | "colab": { 695 | "base_uri": "https://localhost:8080/" 696 | }, 697 | "id": "bq7BlbdrcgPB", 698 | "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1" 699 | }, 700 | "outputs": [], 701 | "source": [ 702 | "# ----------------------------------------------------------------------\n", 703 | "output_folder = PREDICTIONS_DIR + \"/\"\n", 704 | "predictions = glob.glob(output_folder + \"*.tif\")\n", 705 | "predictions.sort()\n", 706 | "print(\"# IMAGES for prediction: \", len(predictions))\n", 707 | "print(\"Choosen n can be from 0 to 1367! \")\n", 708 | "\n", 709 | "# ----------------------------------------------------------------------\n", 710 | "\n", 711 | "n = 900 # change this number depending on which image you want to test\n", 712 | "\n", 713 | "fig = plt.figure(figsize=(18,12))\n", 714 | "ax1 = fig.add_subplot(131)\n", 715 | "\n", 716 | "ax1.set_title('RGB image: ')\n", 717 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 718 | "ax1.imshow(image)\n", 719 | "ax1.set_axis_off()\n", 720 | "\n", 721 | "ax2 = fig.add_subplot(132)\n", 722 | "ax2.set_title('Ground truth: ')\n", 723 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 724 | "image *= 255\n", 725 | "ax2.imshow(image)\n", 726 | "ax2.set_axis_off()\n", 727 | "\n", 728 | "ax3 = fig.add_subplot(133)\n", 729 | "ax3.set_title('Prediction: ')\n", 730 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 731 | "ax3.imshow(image)\n", 732 | "ax3.set_axis_off()" 733 | ] 734 | }, 735 | { 736 | "attachments": {}, 737 | "cell_type": "markdown", 738 | "metadata": {}, 739 | "source": [ 740 | "### BUILDING FOOTPRINT REGULARIZATION\n", 741 | "\n", 742 | "Used repo: https://github.com/zorzi-s/projectRegularization\n", 743 | "\n", 744 | "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n", 745 | "\n", 746 | "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n", 747 | "\n", 748 | "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu\n", 749 | "\n", 750 | "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows." 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": null, 756 | "metadata": {}, 757 | "outputs": [], 758 | "source": [ 759 | "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n", 760 | "\n", 761 | "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n", 762 | "print(projectRegDir)\n", 763 | "\n", 764 | "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n", 765 | "print(ptw)\n", 766 | "\n", 767 | "# OUTPUT REGULARIZATIONS DIR\n", 768 | "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n", 769 | "print(REGULARIZATION_DIR)\n", 770 | "\n", 771 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 772 | "ENCODER = os.path.join(ptw, \"E140000_e1\")\n", 773 | "GENERATOR = os.path.join(ptw, \"E140000_net\")\n", 774 | "\n", 775 | "print(ENCODER)\n", 776 | "print(GENERATOR)" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": null, 782 | "metadata": {}, 783 | "outputs": [], 784 | "source": [ 785 | "# CREATE A NEW variables.py WITH USERS PATHS\n", 786 | "\n", 787 | "with open(projectRegDir + 'variables.py', 'w') as f:\n", 788 | " f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n", 789 | " f.write('# TRAINING \\n')\n", 790 | " f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 791 | " f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n", 792 | " f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 793 | " f.write('\\n')\n", 794 | " f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n", 795 | " f.write('\\n')\n", 796 | " f.write('# INFERENCE \\n')\n", 797 | " f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 798 | " f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 799 | " f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n", 800 | " f.write('\\n')\n", 801 | " f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n", 802 | " f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n", 803 | " f.close()\n", 804 | " \n", 805 | "print(\"variables.py created with users paths...\")\n" 806 | ] 807 | }, 808 | { 809 | "attachments": {}, 810 | "cell_type": "markdown", 811 | "metadata": {}, 812 | "source": [ 813 | "#### Run projectRegularization\n", 814 | "\n", 815 | "Takes around 6-8 minutes.\n", 816 | "\n", 817 | "You only need to change the command below and replace it with the absolute path for regularize.py" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": null, 823 | "metadata": {}, 824 | "outputs": [], 825 | "source": [ 826 | "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py" 827 | ] 828 | }, 829 | { 830 | "attachments": {}, 831 | "cell_type": "markdown", 832 | "metadata": {}, 833 | "source": [ 834 | "### Compare predictions and regularizations on a single image" 835 | ] 836 | }, 837 | { 838 | "cell_type": "code", 839 | "execution_count": null, 840 | "metadata": {}, 841 | "outputs": [], 842 | "source": [ 843 | "# Read Regularizations to plot and compare results\n", 844 | "\n", 845 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 846 | "regularizations.sort()\n", 847 | "\n", 848 | "print(\"# of predicted images: \", len(predictions))\n", 849 | "print(\"# of regularized images: \", len(regularizations))" 850 | ] 851 | }, 852 | { 853 | "attachments": {}, 854 | "cell_type": "markdown", 855 | "metadata": {}, 856 | "source": [ 857 | "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n", 858 | "\n", 859 | "Change parameter n accordingly." 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": null, 865 | "metadata": {}, 866 | "outputs": [], 867 | "source": [ 868 | "n = 600\n", 869 | "\n", 870 | "fig = plt.figure(figsize=(18,12))\n", 871 | "ax1 = fig.add_subplot(141)\n", 872 | "\n", 873 | "ax1.set_title('RGB: ')\n", 874 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 875 | "ax1.imshow(image)\n", 876 | "ax1.set_axis_off()\n", 877 | "\n", 878 | "ax2 = fig.add_subplot(142)\n", 879 | "ax2.set_title('Ground truth: ')\n", 880 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 881 | "image *= 255\n", 882 | "ax2.imshow(image)\n", 883 | "ax2.set_axis_off()\n", 884 | "\n", 885 | "ax3 = fig.add_subplot(143)\n", 886 | "ax3.set_title('Prediction: ')\n", 887 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 888 | "ax3.imshow(image)\n", 889 | "ax3.set_axis_off()\n", 890 | "\n", 891 | "ax4 = fig.add_subplot(144)\n", 892 | "ax4.set_title('Regularization: ')\n", 893 | "image = cv2.imread(regularizations[n])[:,:,::-1]\n", 894 | "ax4.imshow(image)\n", 895 | "ax4.set_axis_off()\n", 896 | "\n", 897 | "# DEFINE PATH FOR PLOTS TO BE SAVED\n", 898 | "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n", 899 | "print(figPath)\n", 900 | "\n", 901 | "# Save plot\n", 902 | "fig.savefig(figPath)" 903 | ] 904 | }, 905 | { 906 | "attachments": {}, 907 | "cell_type": "markdown", 908 | "metadata": {}, 909 | "source": [ 910 | "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n", 911 | "\n", 912 | "GDAL: https://gdal.org/'\n", 913 | "\n", 914 | "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n", 915 | "\n", 916 | "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n", 917 | "\n", 918 | "On Ubuntu you have to follow these steps:\n", 919 | "\n", 920 | "\n", 921 | "\n", 922 | "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n", 923 | "\n", 924 | "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n", 925 | "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n", 926 | "- python -m pip install --upgrade pip setuptools wheel\n", 927 | "- python -m pip install --upgrade gdal\n", 928 | "- conda install -c conda forge libgdal\n", 929 | "- conda install -c conda-forge libgdal\n", 930 | "- conda install -c conda-forge gdal\n", 931 | "- conda install tiledb=2.2\n", 932 | "- conda install poppler\n", 933 | "\n", 934 | "When you have this you can hopefully vectorize the detected masks quite easily." 935 | ] 936 | }, 937 | { 938 | "cell_type": "code", 939 | "execution_count": null, 940 | "metadata": {}, 941 | "outputs": [], 942 | "source": [ 943 | "def get_filename_from_path(path):\n", 944 | " \"\"\"\n", 945 | " Given a path, returns the filename after the last frontslash character.\n", 946 | " \"\"\"\n", 947 | " return path.rsplit('/', 1)[-1]\n", 948 | "\n", 949 | "def get_fname_no_extension(path):\n", 950 | " \"\"\"\n", 951 | " Given a path, returns the filename without its extension.\n", 952 | " \"\"\"\n", 953 | " filename, extension = os.path.splitext(path)\n", 954 | " return filename" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": null, 960 | "metadata": {}, 961 | "outputs": [], 962 | "source": [ 963 | "import osgeo\n", 964 | "from osgeo import gdal\n", 965 | "from osgeo import ogr\n", 966 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 967 | "\n", 968 | "# Choose which image to vectorize\n", 969 | "n = 0\n", 970 | "\n", 971 | "input = regularizations[n]\n", 972 | "print()\n", 973 | "print(\"INPUT: \", input)\n", 974 | "\n", 975 | "# print(get_fname_no_extension(input))\n", 976 | "\n", 977 | "# out\n", 978 | "output = get_filename_from_path(get_fname_no_extension(input)) + \".gpkg\"\n", 979 | "print(\"OUTPUT: \", output)\n", 980 | "\n", 981 | "# Open image with GDAl driver\n", 982 | "ds = gdal.Open(input)\n", 983 | "# Get the band\n", 984 | "band = ds.GetRasterBand(1)\n", 985 | "\n", 986 | "# Create the output shapefile\n", 987 | "driver = ogr.GetDriverByName(\"GPKG\")\n", 988 | "out_ds = driver.CreateDataSource(output)\n", 989 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 990 | "\n", 991 | "# Add a field to the layer to store the pixel values\n", 992 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 993 | "out_layer.CreateField(field_defn)\n", 994 | "\n", 995 | "# Polygonize the PNG file\n", 996 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 997 | "\n", 998 | "# Close the input and output files\n", 999 | "out_ds = None\n", 1000 | "ds = None" 1001 | ] 1002 | }, 1003 | { 1004 | "attachments": {}, 1005 | "cell_type": "markdown", 1006 | "metadata": {}, 1007 | "source": [ 1008 | "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below." 1009 | ] 1010 | }, 1011 | { 1012 | "cell_type": "code", 1013 | "execution_count": null, 1014 | "metadata": {}, 1015 | "outputs": [], 1016 | "source": [ 1017 | "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n", 1018 | "\n", 1019 | "# RUN from the command line inside Ubuntu\n", 1020 | "# Change name of input and output according to user needs\n", 1021 | "\n", 1022 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 1023 | ] 1024 | } 1025 | ], 1026 | "metadata": { 1027 | "accelerator": "TPU", 1028 | "colab": { 1029 | "provenance": [] 1030 | }, 1031 | "gpuClass": "premium", 1032 | "kernelspec": { 1033 | "display_name": "torch", 1034 | "language": "python", 1035 | "name": "python3" 1036 | }, 1037 | "language_info": { 1038 | "codemirror_mode": { 1039 | "name": "ipython", 1040 | "version": 3 1041 | }, 1042 | "file_extension": ".py", 1043 | "mimetype": "text/x-python", 1044 | "name": "python", 1045 | "nbconvert_exporter": "python", 1046 | "pygments_lexer": "ipython3", 1047 | "version": "3.10.9" 1048 | }, 1049 | "vscode": { 1050 | "interpreter": { 1051 | "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c" 1052 | } 1053 | } 1054 | }, 1055 | "nbformat": 4, 1056 | "nbformat_minor": 0 1057 | } 1058 | -------------------------------------------------------------------------------- /05-mapai-dcswin-regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": { 7 | "id": "1KJnP7zSGQg5" 8 | }, 9 | "source": [ 10 | "## Binary semantic segmentation example using DCSwin" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "colab": { 18 | "base_uri": "https://localhost:8080/" 19 | }, 20 | "id": "cFxHJWmXlcZk", 21 | "outputId": "b755d1a8-3650-42d9-8718-401b4458f049" 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import os\n", 26 | "import glob\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "\n", 29 | "import torch\n", 30 | "import torchvision\n", 31 | "from tqdm import tqdm\n", 32 | "\n", 33 | "print(torch.__version__)\n", 34 | "print(torchvision.__version__)\n", 35 | "\n", 36 | "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", 37 | "print(DEVICE)\n", 38 | "\n", 39 | "# determine if we will be pinning memory during data loading\n", 40 | "PIN_MEMORY = True if DEVICE == \"cuda\" else False" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "id": "KVfaGZrWG63Q" 47 | }, 48 | "source": [ 49 | "#### CONFIGURE YOUR PATHS AND HYPERPARAMETERS FOR TRAINING BELOW." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "colab": { 57 | "base_uri": "https://localhost:8080/", 58 | "height": 235 59 | }, 60 | "id": "OjlBC-raVM2K", 61 | "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745" 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "GD_PATH = os.getcwd() # get current working directory for the repo\n", 66 | "print(GD_PATH)\n", 67 | "\n", 68 | "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n", 69 | "DATASET_PATH = \"/home/shymon/datasets/\"\n", 70 | "\n", 71 | "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n", 72 | "\n", 73 | "print(DATASET_PATH)\n", 74 | "\n", 75 | "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n", 76 | "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n", 77 | "\n", 78 | "print(TRAIN_IMG_DIR)\n", 79 | "print(TRAIN_MASK_DIR)\n", 80 | "\n", 81 | "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n", 82 | "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n", 83 | "\n", 84 | "print(VAL_IMG_DIR)\n", 85 | "print(VAL_MASK_DIR)\n", 86 | "\n", 87 | "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n", 88 | "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n", 89 | "\n", 90 | "print(TEST_IMG_DIR)\n", 91 | "print(TEST_MASK_DIR)\n", 92 | "\n", 93 | "# CONFIGURE MapAI DATASET\n", 94 | "NUM_CHANNELS = 3\n", 95 | "NUM_LEVELS = 3\n", 96 | "NUM_CLASSES = 1\n", 97 | "\n", 98 | "# IMAGE SHAPE\n", 99 | "IMG_WIDTH = 512\n", 100 | "IMG_HEIGHT = 512\n", 101 | "\n", 102 | "#---------------------------------------------------------------------------------------------------#\n", 103 | "\n", 104 | "# CONFIGURE parameters for training\n", 105 | "\n", 106 | "EPOCHS = 25\n", 107 | "init_lr = 1e-4 # learning rate\n", 108 | "BATCH_SIZE = 2\n", 109 | "\n", 110 | "THRESHOLD = 0.5\n", 111 | "base_output = \"out\"\n", 112 | "\n", 113 | "model_name = \"dcswin-25-epochs.pth\" # provide name for model\n", 114 | "training_plot_name = \"dcswin-25-epochs.png\"\n", 115 | "\n", 116 | "#---------------------------------------------------------------------------------------------------#\n", 117 | "\n", 118 | "# OUTPUT PATHS\n", 119 | "\n", 120 | "# Trained model path\n", 121 | "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n", 122 | "print(MODEL_PATH)\n", 123 | "PLOT_PATH = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n", 124 | "print(PLOT_PATH)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "id": "IfSMUZbWWdJn" 131 | }, 132 | "source": [ 133 | "### Load and read the MapAI dataset" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "id": "TPiACQ_6VyQP" 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "import tifffile\n", 145 | "from torch.utils.data import Dataset\n", 146 | "import cv2\n", 147 | "\n", 148 | "\n", 149 | "class mapAIdataset(Dataset):\n", 150 | " def __init__(self, imagePaths, maskPaths, transforms):\n", 151 | " # store the image and mask filepaths, and augmentation\n", 152 | " # transforms\n", 153 | " self.imagePaths = imagePaths\n", 154 | " self.maskPaths = maskPaths\n", 155 | " self.transforms = transforms\n", 156 | " \n", 157 | " def __len__(self):\n", 158 | " # return the number of total samples contained in the dataset\n", 159 | " return len(self.imagePaths)\n", 160 | " \n", 161 | " def __getitem__(self, idx):\n", 162 | " # grab the image path from the current index\n", 163 | " imagePath = self.imagePaths[idx]\n", 164 | " # load the image from disk, swap its channels from BGR to RGB,\n", 165 | " # and read the associated mask from disk\n", 166 | " image = cv2.imread(imagePath)\n", 167 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 168 | " mask = tifffile.imread(self.maskPaths[idx])\n", 169 | " # convert the mask to a float32 tensor with values in the range [0, 1]\n", 170 | " mask = mask.astype('float32')\n", 171 | " # check to see if we are applying any transformations\n", 172 | " if self.transforms is not None:\n", 173 | " # apply the transformations to both image and its mask\n", 174 | " image = self.transforms(image)\n", 175 | " mask = self.transforms(mask)\n", 176 | " \n", 177 | " # return a tuple of the image and its mask\n", 178 | " return (image, mask)" 179 | ] 180 | }, 181 | { 182 | "attachments": {}, 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "id": "AKXL9bO8WnNg" 186 | }, 187 | "source": [ 188 | "### Build DCSWIN architecture\n", 189 | "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/FTUNetFormer.py code changed for binary semantic segmentation." 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "id": "9urE3W1iWp7v" 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "import sys\n", 201 | "subfolder = os.path.join(GD_PATH, \"models\")\n", 202 | "sys.path.insert(0, subfolder)\n", 203 | "\n", 204 | "import DCSwin_model" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "id": "22hbANvfWxmX" 211 | }, 212 | "source": [ 213 | "### Training the segmentation model\n", 214 | "Below we append the paths for TRAIN/VAL/TEST sets - images/masks." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "colab": { 222 | "base_uri": "https://localhost:8080/" 223 | }, 224 | "id": "G2Jha-LCW0ir", 225 | "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2" 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "from torch.nn import BCEWithLogitsLoss\n", 230 | "from torch.optim import Adam\n", 231 | "from torch.utils.data import DataLoader\n", 232 | "from imutils import paths\n", 233 | "import time\n", 234 | "\n", 235 | "# TRAINING\n", 236 | "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n", 237 | "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n", 238 | "\n", 239 | "# VALIDATION\n", 240 | "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n", 241 | "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n", 242 | "\n", 243 | "\n", 244 | "# TEST\n", 245 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 246 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": { 252 | "id": "gtqUNGR1XCa5" 253 | }, 254 | "source": [ 255 | "### Define transformations\n", 256 | "\n", 257 | "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques." 258 | ] 259 | }, 260 | { 261 | "attachments": {}, 262 | "cell_type": "markdown", 263 | "metadata": { 264 | "id": "ghW7Nj0OEQMc" 265 | }, 266 | "source": [ 267 | "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n", 268 | "\n", 269 | "https://albumentations.ai/docs/getting_started/mask_augmentation/" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "colab": { 277 | "base_uri": "https://localhost:8080/" 278 | }, 279 | "id": "WR_dzdpCXCHY", 280 | "outputId": "4e9b1681-2846-489f-edf6-a6240af65563" 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "import torchvision.transforms as T\n", 285 | "\n", 286 | "# T.RandomHorizontalFlip(p=0.5),\n", 287 | "# T.RandomVerticalFlip(p=0.1),\n", 288 | "\n", 289 | "# Image augmentations applied\n", 290 | "transforms = T.Compose([T.ToPILImage(),\n", 291 | " T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n", 292 | " T.ToTensor()])\n", 293 | "\n", 294 | "# create the train and test datasets\n", 295 | "trainDS = mapAIdataset(imagePaths=train_images,\n", 296 | " maskPaths=train_masks,\n", 297 | " transforms=transforms)\n", 298 | "\n", 299 | "valDS = mapAIdataset(imagePaths=val_images,\n", 300 | " maskPaths=val_masks,\n", 301 | " transforms=transforms)\n", 302 | "\n", 303 | "testDS = mapAIdataset(imagePaths=test_images,\n", 304 | " maskPaths=test_masks,\n", 305 | " transforms=transforms)\n", 306 | "\n", 307 | "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n", 308 | "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n", 309 | "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n", 310 | "\n", 311 | "# create the training and test data loaders\n", 312 | "trainLoader = DataLoader(trainDS,\n", 313 | " shuffle=True,\n", 314 | " batch_size=BATCH_SIZE,\n", 315 | " pin_memory=PIN_MEMORY,\n", 316 | " num_workers=os.cpu_count())\n", 317 | "\n", 318 | "valLoader = DataLoader(valDS,\n", 319 | " shuffle=False,\n", 320 | " batch_size=BATCH_SIZE,\n", 321 | " pin_memory=PIN_MEMORY,\n", 322 | " num_workers=os.cpu_count())\n", 323 | "\n", 324 | "testLoader = DataLoader(testDS,\n", 325 | " shuffle=False,\n", 326 | " batch_size=BATCH_SIZE,\n", 327 | " pin_memory=PIN_MEMORY,\n", 328 | " num_workers=os.cpu_count())" 329 | ] 330 | }, 331 | { 332 | "attachments": {}, 333 | "cell_type": "markdown", 334 | "metadata": { 335 | "id": "tAO9M_R4XG6q" 336 | }, 337 | "source": [ 338 | "### Initialize DCSWIN model for training\n" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": { 345 | "colab": { 346 | "base_uri": "https://localhost:8080/" 347 | }, 348 | "id": "2IMsYzUaXJW7", 349 | "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315" 350 | }, 351 | "outputs": [], 352 | "source": [ 353 | "model = DCSwin_model.DCSwin().to(DEVICE)\n", 354 | "\n", 355 | "# loss / optimizer\n", 356 | "lossFunction = BCEWithLogitsLoss()\n", 357 | "opt = Adam(model.parameters(), lr=init_lr)\n", 358 | "\n", 359 | "# calculate steps per epoch for train/val/test\n", 360 | "trainSteps = len(trainDS) // BATCH_SIZE \n", 361 | "valSteps = len(valDS) // BATCH_SIZE\n", 362 | "testSteps = len(testDS) // BATCH_SIZE\n", 363 | "\n", 364 | "print(trainSteps, valSteps, testSteps)\n", 365 | "\n", 366 | "# initialize a dictionary to store training history\n", 367 | "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n", 368 | "H" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": { 375 | "id": "WEP-IVokbWQg" 376 | }, 377 | "outputs": [], 378 | "source": [ 379 | "torch.cuda.empty_cache()" 380 | ] 381 | }, 382 | { 383 | "attachments": {}, 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "id": "xcjuKhMeXLU-" 387 | }, 388 | "source": [ 389 | "### TRAINING THE MODEL\n", 390 | "\n", 391 | "Run this piece of code only if you want to train the model from scratch.\n", 392 | "\n", 393 | "Training locally: BATCH_SIZE = 2 takes 5035 MB of GPU memory.\n", 394 | "\n" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "colab": { 402 | "base_uri": "https://localhost:8080/" 403 | }, 404 | "id": "vWuUyLUgXPNf", 405 | "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1" 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "# loop over epochs\n", 410 | "print(\"[INFO] training DCSwin ...\")\n", 411 | "startTime = time.time()\n", 412 | "\n", 413 | "for epoch in tqdm(range(EPOCHS)):\n", 414 | " model.train()\n", 415 | "\n", 416 | " # initialize total training and validation loss\n", 417 | " totalTrainLoss = 0\n", 418 | " totalValLoss = 0\n", 419 | "\n", 420 | " # loop over the training set\n", 421 | " for (i, (x, y)) in enumerate(trainLoader):\n", 422 | " # send output to device\n", 423 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 424 | "\n", 425 | " # perform a forward pass and calculate the training loss\n", 426 | " pred = model(x)\n", 427 | " loss = lossFunction(pred, y)\n", 428 | "\n", 429 | " # kill previously accumulated gradients then\n", 430 | " # perform backpropagation and update model parameters\n", 431 | " opt.zero_grad()\n", 432 | " loss.backward()\n", 433 | " opt.step()\n", 434 | "\n", 435 | " # add the loss to the total training loss\n", 436 | " totalTrainLoss += loss\n", 437 | "\n", 438 | " # switch of autograd\n", 439 | " with torch.no_grad():\n", 440 | " # set the model in evaluation mode\n", 441 | " model.eval()\n", 442 | "\n", 443 | " # loop over the validation set\n", 444 | " for (x, y) in valLoader:\n", 445 | " # send the input to the device\n", 446 | " (x, y) = (x.to(DEVICE), y.to(DEVICE))\n", 447 | "\n", 448 | " # make the predictions and calculate the validation loss\n", 449 | " pred = model(x)\n", 450 | " totalValLoss += lossFunction(pred, y)\n", 451 | "\n", 452 | " # calculate the average training and validation loss\n", 453 | " avgTrainLoss = totalTrainLoss / trainSteps\n", 454 | " avgValLoss = totalValLoss / valSteps\n", 455 | " \n", 456 | " # update our training history\n", 457 | " H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n", 458 | " H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n", 459 | "\n", 460 | " # print the model training and validation information\n", 461 | " print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n", 462 | " print(\"Train loss: {:.6f}, Val loss: {:.4f}\".format(avgTrainLoss, avgValLoss))\n", 463 | " \n", 464 | "# display the total time needed to perform the training\n", 465 | "endTime = time.time()\n", 466 | "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))" 467 | ] 468 | }, 469 | { 470 | "attachments": {}, 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "Train loss: 0.001194, Val loss: 0.0013\n", 475 | "[INFO] total time taken to train the model: 27115.38s" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": { 482 | "colab": { 483 | "base_uri": "https://localhost:8080/" 484 | }, 485 | "id": "CsJoOVn11rs9", 486 | "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc" 487 | }, 488 | "outputs": [], 489 | "source": [ 490 | "H # show traning/val loss history" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": { 496 | "id": "U6ChLXHuXZHA" 497 | }, 498 | "source": [ 499 | "### Plot the training and validation loss" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": { 506 | "colab": { 507 | "base_uri": "https://localhost:8080/", 508 | "height": 316 509 | }, 510 | "id": "j04HfubrXYvX", 511 | "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50" 512 | }, 513 | "outputs": [], 514 | "source": [ 515 | "# plot the training loss\n", 516 | "print(MODEL_PATH)\n", 517 | "print(PLOT_PATH)\n", 518 | "\n", 519 | "plt.style.use(\"ggplot\")\n", 520 | "plt.figure()\n", 521 | "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n", 522 | "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n", 523 | "plt.title(\"Training Loss on Dataset\")\n", 524 | "plt.xlabel(\"Epoch #\")\n", 525 | "plt.ylabel(\"Loss\")\n", 526 | "plt.legend(loc=\"lower left\")\n", 527 | "plt.savefig(PLOT_PATH)\n", 528 | "# serialize the model to disk\n", 529 | "torch.save(model, MODEL_PATH) # saves the model" 530 | ] 531 | }, 532 | { 533 | "attachments": {}, 534 | "cell_type": "markdown", 535 | "metadata": { 536 | "id": "5Y6Fx2oaWr0q" 537 | }, 538 | "source": [ 539 | "### Prediction part\n", 540 | "\n", 541 | "Here the trained model is loaded and use for prediction on test images." 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": null, 547 | "metadata": { 548 | "colab": { 549 | "base_uri": "https://localhost:8080/" 550 | }, 551 | "id": "qYh4flMu7O-m", 552 | "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd" 553 | }, 554 | "outputs": [], 555 | "source": [ 556 | "# Load saved model for prediction\n", 557 | "\n", 558 | "print(MODEL_PATH)\n", 559 | "\n", 560 | "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n", 561 | "print(\"model loaded for prediction\")\n", 562 | "\n", 563 | "model" 564 | ] 565 | }, 566 | { 567 | "attachments": {}, 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "#### Provide test images for MapAI Dataset" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n", 581 | "PREDICTIONS_DIR" 582 | ] 583 | }, 584 | { 585 | "attachments": {}, 586 | "cell_type": "markdown", 587 | "metadata": {}, 588 | "source": [ 589 | "#### Make predictions on the entire MapAI dataset\n", 590 | "\n", 591 | "Make predictions on test images and save them to the folder named predictions." 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "import random\n", 601 | "import gc\n", 602 | "from pathlib import Path\n", 603 | "import numpy as np\n", 604 | "from PIL import Image\n", 605 | "\n", 606 | "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n", 607 | "\n", 608 | "# Output folder for the predictions\n", 609 | "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n", 610 | "\n", 611 | "# PLOT TEST IMAGES as RGB\n", 612 | "for n in range(len(test_images)):\n", 613 | " gc.collect()\n", 614 | " # Test image number\n", 615 | " testImgName = str(Path(test_images[n]).stem) + '.tif'\n", 616 | " #print('#', testImgName)\n", 617 | "\n", 618 | " # Make predicton on a test image specified with counter n\n", 619 | " test_img = test_images[n]\n", 620 | " test_img_input = np.expand_dims(test_img, 0)\n", 621 | " #print('#', test_img_input[0])\n", 622 | "\n", 623 | " # PyTorch --> works\n", 624 | " model.eval()\n", 625 | " with torch.no_grad():\n", 626 | " image = cv2.imread(test_img_input[0])\n", 627 | " image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n", 628 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", 629 | " image = image.astype(\"float32\") / 255\n", 630 | " \n", 631 | " # print('SIZE: ', image.shape)\n", 632 | "\n", 633 | " # make the channel axis to be the leading one, add batch dimension\n", 634 | " image = np.transpose(image, (2, 0, 1))\n", 635 | " # create a PyTorch tensor\n", 636 | " image = np.expand_dims(image, 0)\n", 637 | " # flash the tensor to the device\n", 638 | " image = torch.from_numpy(image).to(DEVICE)\n", 639 | "\n", 640 | " # make the prediction\n", 641 | " predMask = model(image).squeeze()\n", 642 | " # pass result through sigmoid\n", 643 | " predMask = torch.sigmoid(predMask)\n", 644 | "\n", 645 | " # convert result to numpy array\n", 646 | " predMask = predMask.cpu().numpy()\n", 647 | "\n", 648 | " # filter out the weak predictions and convert them to integers\n", 649 | " predMask = (predMask > THRESHOLD) * 255\n", 650 | " predMask = predMask.astype(np.uint8)\n", 651 | "\n", 652 | " # generate image from array\n", 653 | " pIMG = Image.fromarray(predMask)\n", 654 | " pIMG.save(str(output_folder + testImgName))\n", 655 | "\n", 656 | " print('Prediction:', testImgName, 'saved to:', output_folder)" 657 | ] 658 | }, 659 | { 660 | "attachments": {}, 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "#### Make predictions on single images by choice\n", 665 | "\n", 666 | "Change the parameter n to choose which image to plot." 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": { 673 | "colab": { 674 | "base_uri": "https://localhost:8080/" 675 | }, 676 | "id": "bq7BlbdrcgPB", 677 | "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1" 678 | }, 679 | "outputs": [], 680 | "source": [ 681 | "# ----------------------------------------------------------------------\n", 682 | "\n", 683 | "output_folder = PREDICTIONS_DIR + \"/\" + \"*.tif\"\n", 684 | "\n", 685 | "predictions = glob.glob(output_folder)\n", 686 | "predictions.sort()\n", 687 | "print(\"# IMAGES for prediction: \", len(predictions))\n", 688 | "print(\"Choosen n can be from 0 o 1367! \")\n", 689 | "\n", 690 | "\n", 691 | "# ----------------------------------------------------------------------\n", 692 | "\n", 693 | "n = 900 # change this number depending on which image you want to test\n", 694 | "\n", 695 | "fig = plt.figure(figsize=(18,12))\n", 696 | "ax1 = fig.add_subplot(131)\n", 697 | "\n", 698 | "ax1.set_title('RGB image: ')\n", 699 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 700 | "ax1.imshow(image)\n", 701 | "ax1.set_axis_off()\n", 702 | "\n", 703 | "ax2 = fig.add_subplot(132)\n", 704 | "ax2.set_title('Ground truth: ')\n", 705 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 706 | "image *= 255\n", 707 | "ax2.imshow(image)\n", 708 | "ax2.set_axis_off()\n", 709 | "\n", 710 | "ax3 = fig.add_subplot(133)\n", 711 | "ax3.set_title('Prediction: ')\n", 712 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 713 | "ax3.imshow(image)\n", 714 | "ax3.set_axis_off()" 715 | ] 716 | }, 717 | { 718 | "attachments": {}, 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "### BUILDING FOOTPRINT REGULARIZATION\n", 723 | "\n", 724 | "Used repo: https://github.com/zorzi-s/projectRegularization\n", 725 | "\n", 726 | "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n", 727 | "\n", 728 | "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n", 729 | "\n", 730 | "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu\n", 731 | "\n", 732 | "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows." 733 | ] 734 | }, 735 | { 736 | "cell_type": "code", 737 | "execution_count": null, 738 | "metadata": {}, 739 | "outputs": [], 740 | "source": [ 741 | "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n", 742 | "\n", 743 | "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n", 744 | "print(projectRegDir)\n", 745 | "\n", 746 | "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n", 747 | "print(ptw)\n", 748 | "\n", 749 | "# OUTPUT REGULARIZATIONS DIR\n", 750 | "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n", 751 | "print(REGULARIZATION_DIR)\n", 752 | "\n", 753 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 754 | "ENCODER = os.path.join(ptw, \"E140000_e1\")\n", 755 | "GENERATOR = os.path.join(ptw, \"E140000_net\")\n", 756 | "\n", 757 | "print(ENCODER)\n", 758 | "print(GENERATOR)" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": null, 764 | "metadata": {}, 765 | "outputs": [], 766 | "source": [ 767 | "# CREATE A NEW variables.py WITH USERS PATHS\n", 768 | "\n", 769 | "with open(projectRegDir + 'variables.py', 'w') as f:\n", 770 | " f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n", 771 | " f.write('# TRAINING \\n')\n", 772 | " f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 773 | " f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n", 774 | " f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 775 | " f.write('\\n')\n", 776 | " f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n", 777 | " f.write('\\n')\n", 778 | " f.write('# INFERENCE \\n')\n", 779 | " f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n", 780 | " f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n", 781 | " f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n", 782 | " f.write('\\n')\n", 783 | " f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n", 784 | " f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n", 785 | " f.close()\n", 786 | " \n", 787 | "print(\"variables.py created with users paths...\")\n" 788 | ] 789 | }, 790 | { 791 | "attachments": {}, 792 | "cell_type": "markdown", 793 | "metadata": {}, 794 | "source": [ 795 | "#### Run projectRegularization\n", 796 | "\n", 797 | "Takes around 6-8 minutes.\n", 798 | "\n", 799 | "You only need to change the command below and replace it with the absolute path for regularize.py" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": null, 805 | "metadata": {}, 806 | "outputs": [], 807 | "source": [ 808 | "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py" 809 | ] 810 | }, 811 | { 812 | "attachments": {}, 813 | "cell_type": "markdown", 814 | "metadata": {}, 815 | "source": [ 816 | "### Compare predictions and regularizations on a single image" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": null, 822 | "metadata": {}, 823 | "outputs": [], 824 | "source": [ 825 | "# Read Regularizations to plot and compare results\n", 826 | "\n", 827 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 828 | "regularizations.sort()\n", 829 | "\n", 830 | "print(\"# of predicted images: \", len(predictions))\n", 831 | "print(\"# of regularized images: \", len(regularizations))" 832 | ] 833 | }, 834 | { 835 | "attachments": {}, 836 | "cell_type": "markdown", 837 | "metadata": {}, 838 | "source": [ 839 | "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n", 840 | "\n", 841 | "Change parameter n accordingly." 842 | ] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "metadata": {}, 848 | "outputs": [], 849 | "source": [ 850 | "n = 600\n", 851 | "\n", 852 | "fig = plt.figure(figsize=(18,12))\n", 853 | "ax1 = fig.add_subplot(141)\n", 854 | "\n", 855 | "ax1.set_title('RGB: ')\n", 856 | "image = cv2.imread(test_images[n])[:,:,::-1]\n", 857 | "ax1.imshow(image)\n", 858 | "ax1.set_axis_off()\n", 859 | "\n", 860 | "ax2 = fig.add_subplot(142)\n", 861 | "ax2.set_title('Ground truth: ')\n", 862 | "image = cv2.imread(test_masks[n])[:,:,::-1]\n", 863 | "image *= 255\n", 864 | "ax2.imshow(image)\n", 865 | "ax2.set_axis_off()\n", 866 | "\n", 867 | "ax3 = fig.add_subplot(143)\n", 868 | "ax3.set_title('Prediction: ')\n", 869 | "image = cv2.imread(predictions[n])[:,:,::-1]\n", 870 | "ax3.imshow(image)\n", 871 | "ax3.set_axis_off()\n", 872 | "\n", 873 | "ax4 = fig.add_subplot(144)\n", 874 | "ax4.set_title('Regularization: ')\n", 875 | "image = cv2.imread(regularizations[n])[:,:,::-1]\n", 876 | "ax4.imshow(image)\n", 877 | "ax4.set_axis_off()\n", 878 | "\n", 879 | "# DEFINE PATH FOR PLOTS TO BE SAVED\n", 880 | "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n", 881 | "print(figPath)\n", 882 | "\n", 883 | "# Save plot\n", 884 | "fig.savefig(figPath)" 885 | ] 886 | }, 887 | { 888 | "attachments": {}, 889 | "cell_type": "markdown", 890 | "metadata": {}, 891 | "source": [ 892 | "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n", 893 | "\n", 894 | "GDAL: https://gdal.org/'\n", 895 | "\n", 896 | "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n", 897 | "\n", 898 | "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n", 899 | "\n", 900 | "On Ubuntu you have to follow these steps:\n", 901 | "\n", 902 | "\n", 903 | "\n", 904 | "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n", 905 | "\n", 906 | "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n", 907 | "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n", 908 | "- python -m pip install --upgrade pip setuptools wheel\n", 909 | "- python -m pip install --upgrade gdal\n", 910 | "- conda install -c conda forge libgdal\n", 911 | "- conda install -c conda-forge libgdal\n", 912 | "- conda install -c conda-forge gdal\n", 913 | "- conda install tiledb=2.2\n", 914 | "- conda install poppler\n", 915 | "\n", 916 | "When you have this you can hopefully vectorize the detected masks quite easily." 917 | ] 918 | }, 919 | { 920 | "cell_type": "code", 921 | "execution_count": null, 922 | "metadata": {}, 923 | "outputs": [], 924 | "source": [ 925 | "def get_fname_from_path(path):\n", 926 | " \"\"\"\n", 927 | " Given a path, returns the filename after the last frontslash character.\n", 928 | " \"\"\"\n", 929 | " return path.rsplit('/', 1)[-1]\n", 930 | "\n", 931 | "def get_fname_no_extension(path):\n", 932 | " \"\"\"\n", 933 | " Given a path, returns the filename without its extension.\n", 934 | " \"\"\"\n", 935 | " filename, extension = os.path.splitext(path)\n", 936 | " return filename" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": null, 942 | "metadata": {}, 943 | "outputs": [], 944 | "source": [ 945 | "import osgeo\n", 946 | "from osgeo import gdal\n", 947 | "from osgeo import ogr\n", 948 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 949 | "\n", 950 | "# Choose which image to vectorize\n", 951 | "n = 0\n", 952 | "\n", 953 | "input = regularizations[n]\n", 954 | "print()\n", 955 | "print(\"INPUT: \", input)\n", 956 | "\n", 957 | "# print(get_fname_no_extension(input))\n", 958 | "\n", 959 | "# out\n", 960 | "output = get_fname_from_path(get_fname_no_extension(input)) + \".gpkg\"\n", 961 | "print(\"OUTPUT: \", output)\n", 962 | "\n", 963 | "# Open image with GDAl driver\n", 964 | "ds = gdal.Open(input)\n", 965 | "# Get the band\n", 966 | "band = ds.GetRasterBand(1)\n", 967 | "\n", 968 | "# Create the output shapefile\n", 969 | "driver = ogr.GetDriverByName(\"GPKG\")\n", 970 | "out_ds = driver.CreateDataSource(output)\n", 971 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 972 | "\n", 973 | "# Add a field to the layer to store the pixel values\n", 974 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 975 | "out_layer.CreateField(field_defn)\n", 976 | "\n", 977 | "# Polygonize the PNG file\n", 978 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 979 | "\n", 980 | "# Close the input and output files\n", 981 | "out_ds = None\n", 982 | "ds = None" 983 | ] 984 | }, 985 | { 986 | "attachments": {}, 987 | "cell_type": "markdown", 988 | "metadata": {}, 989 | "source": [ 990 | "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below." 991 | ] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": null, 996 | "metadata": {}, 997 | "outputs": [], 998 | "source": [ 999 | "# ogr2ogr -where ID=\"1\" outfile.gpkg infile.\n", 1000 | "\n", 1001 | "# RUN from the command line inside Ubuntu\n", 1002 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 1003 | ] 1004 | } 1005 | ], 1006 | "metadata": { 1007 | "accelerator": "TPU", 1008 | "colab": { 1009 | "provenance": [] 1010 | }, 1011 | "gpuClass": "premium", 1012 | "kernelspec": { 1013 | "display_name": "torch", 1014 | "language": "python", 1015 | "name": "python3" 1016 | }, 1017 | "language_info": { 1018 | "codemirror_mode": { 1019 | "name": "ipython", 1020 | "version": 3 1021 | }, 1022 | "file_extension": ".py", 1023 | "mimetype": "text/x-python", 1024 | "name": "python", 1025 | "nbconvert_exporter": "python", 1026 | "pygments_lexer": "ipython3", 1027 | "version": "3.10.9" 1028 | }, 1029 | "vscode": { 1030 | "interpreter": { 1031 | "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c" 1032 | } 1033 | } 1034 | }, 1035 | "nbformat": 4, 1036 | "nbformat_minor": 0 1037 | } 1038 | -------------------------------------------------------------------------------- /06-evaluate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### Notebook to evaluate results" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import os\n", 18 | "import cv2\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "from imutils import paths\n", 21 | "import time\n", 22 | "import glob\n", 23 | "\n", 24 | "import tifffile as tiff\n", 25 | "import numpy as np\n", 26 | "from PIL import Image\n", 27 | "\n", 28 | "import numpy as np\n", 29 | "from sklearn.metrics import jaccard_score" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "# TEST IMAGES: 1368\n", 42 | "# PREDICTIONS: 1368\n", 43 | "# REGULARIZATIONS: 1368\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "# CONFIGURE PATHS\n", 49 | "GD_PATH = os.getcwd() + \"/\"\n", 50 | "PLOT_PATH = GD_PATH + \"plots/\"\n", 51 | "\n", 52 | "# TEST\n", 53 | "# Task 1: \n", 54 | "TEST_IMG_DIR = \"/home/shymon/datasets/mapai_full/task1_test/images/\"\n", 55 | "TEST_MASK_DIR = \"/home/shymon/datasets/mapai_full/task1_test/masks/\"\n", 56 | "\n", 57 | "# TEST\n", 58 | "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n", 59 | "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))\n", 60 | "\n", 61 | "PREDICTIONS_DIR = GD_PATH + \"predictions/\"\n", 62 | "REGULARIZATION_DIR = GD_PATH + \"regularizations/\"\n", 63 | "\n", 64 | "# read predictions\n", 65 | "predictions = glob.glob(PREDICTIONS_DIR + \"*.tif\")\n", 66 | "predictions.sort()\n", 67 | "\n", 68 | "# read regularizations\n", 69 | "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n", 70 | "regularizations.sort()\n", 71 | "\n", 72 | "print(\"# TEST IMAGES: \", len(test_images))\n", 73 | "print(\"# PREDICTIONS: \", len(predictions))\n", 74 | "print(\"# REGULARIZATIONS: \", len(predictions))\n", 75 | "\n", 76 | "# Project Regularization directory\n", 77 | "projectRegDir = GD_PATH + \"projectRegularization\" + \"/\"\n", 78 | "\n", 79 | "ptw = projectRegDir + \"pretrained_weights\" + \"/\"\n", 80 | "\n", 81 | "# GET THE PATHS FOR TRAINED GAN MODELS\n", 82 | "ENCODER = ptw + \"E140000_e1\"\n", 83 | "GENERATOR = ptw + \"E140000_net\"\n", 84 | "\n", 85 | "# print(ENCODER)\n", 86 | "# print(GENERATOR)" 87 | ] 88 | }, 89 | { 90 | "attachments": {}, 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "### Calculate Intersection over Union on the test set" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 3, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def iou(gt_mask, pred_mask):\n", 104 | "\n", 105 | " \"\"\"\n", 106 | " Calculates the intersection over union (BIoU) between two binary semantic segmentation masks.\n", 107 | " \n", 108 | " Arguments:\n", 109 | " mask1 -- a 2D numpy array representing the first mask\n", 110 | " mask2 -- a 2D numpy array representing the second mask\n", 111 | " \n", 112 | " Returns:\n", 113 | " iou -- a float representing the BIoU between the two masks\n", 114 | " \"\"\"\n", 115 | "\n", 116 | " intersection = np.logical_and(gt_mask, pred_mask).sum()\n", 117 | " union = np.logical_or(gt_mask, pred_mask).sum()\n", 118 | " iou_score = intersection / union if union != 0 else np.nan\n", 119 | "\n", 120 | " return iou_score\n", 121 | "\n", 122 | "def biou(segA, segB, boundary_width=1):\n", 123 | " \"\"\"\n", 124 | " Calculate the Boundary Intersection over Union (BIoU) metric between two binary segmentation masks.\n", 125 | "\n", 126 | " Parameters:\n", 127 | " segA (numpy array): A 2-dimensional binary numpy array representing the first segmentation mask.\n", 128 | " segB (numpy array): A 2-dimensional binary numpy array representing the second segmentation mask.\n", 129 | " boundary_width (int): The width of the boundary region to be included in the calculation (default is 1).\n", 130 | "\n", 131 | " Returns:\n", 132 | " float: The BIoU metric between the two segmentation masks.\n", 133 | " \"\"\"\n", 134 | "\n", 135 | " # Compute the boundaries of the segmentation masks\n", 136 | " boundaryA = np.zeros_like(segA)\n", 137 | " boundaryA[:,boundary_width:-boundary_width] = segA[:,boundary_width:-boundary_width] ^ segA[:, :-2*boundary_width] ^ segA[:, 2*boundary_width:]\n", 138 | " boundaryA[boundary_width:-boundary_width,:] = boundaryA[boundary_width:-boundary_width,:] ^ segA[:-2*boundary_width,:] ^ segA[2*boundary_width:,:]\n", 139 | "\n", 140 | " boundaryB = np.zeros_like(segB)\n", 141 | " boundaryB[:,boundary_width:-boundary_width] = segB[:,boundary_width:-boundary_width] ^ segB[:, :-2*boundary_width] ^ segB[:, 2*boundary_width:]\n", 142 | " boundaryB[boundary_width:-boundary_width,:] = boundaryB[boundary_width:-boundary_width,:] ^ segB[:-2*boundary_width,:] ^ segB[2*boundary_width:,:]\n", 143 | "\n", 144 | " # Compute the coordinates of the intersection boundary\n", 145 | " intersection_boundary = boundaryA & boundaryB\n", 146 | "\n", 147 | " # Compute the coordinates of the union boundary\n", 148 | " union_boundary = boundaryA | boundaryB\n", 149 | "\n", 150 | " # Compute the area of intersection boundary\n", 151 | " intersection_boundary_area = np.count_nonzero(intersection_boundary)\n", 152 | "\n", 153 | " # Compute the area of union boundary\n", 154 | " union_boundary_area = np.count_nonzero(union_boundary)\n", 155 | "\n", 156 | " # Compute the intersection and union of the interior regions\n", 157 | " intersection = np.logical_and(segA, segB)\n", 158 | " union = np.logical_or(segA, segB)\n", 159 | "\n", 160 | " # Compute the area of intersection and union of the interior regions\n", 161 | " intersection_area = np.count_nonzero(intersection)\n", 162 | " union_area = np.count_nonzero(union)\n", 163 | "\n", 164 | " # Compute the BIoU metric\n", 165 | " biou = (intersection_area + intersection_boundary_area) / (union_area + union_boundary_area + 1e-6)\n", 166 | "\n", 167 | " return biou\n", 168 | "\n", 169 | "# To read the original test images from MapAI\n", 170 | "def test2arr(tif_img):\n", 171 | " img = tiff.imread(tif_img)\n", 172 | " arr = np.array(img)\n", 173 | " return arr\n", 174 | "\n", 175 | "# To read the predictions and regularizations\n", 176 | "def pr2arr(tif_img):\n", 177 | " img = tiff.imread(tif_img)\n", 178 | " img = img / 255\n", 179 | " img = cv2.resize(img, (500, 500))\n", 180 | " arr = np.array(img)\n", 181 | " arr = arr.astype(np.uint8)\n", 182 | " return arr\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### Evaluation on single image by choice" 190 | ] 191 | }, 192 | { 193 | "attachments": {}, 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "(1) Without regularization" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 4, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "Jaccard score or IoU with Scikit-learn: 0.7001\n", 210 | "Jaccard score or IoU with manual function: 0.7001\n", 211 | "Boundary Intersection over Union: 0.6959\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "n = 900\n", 217 | "\n", 218 | "jaccard_sklearn = jaccard_score(test2arr(test_masks[n]), pr2arr(predictions[n]), average='micro')\n", 219 | "print(\"Jaccard score or IoU with Scikit-learn: \", round(jaccard_sklearn, 4))\n", 220 | "\n", 221 | "iou_man = iou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 222 | "print(\"Jaccard score or IoU with manual function: \", round(iou_man, 4))\n", 223 | "\n", 224 | "biou_man = biou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 225 | "print(\"Boundary Intersection over Union: \", round(biou_man, 4))" 226 | ] 227 | }, 228 | { 229 | "attachments": {}, 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "(2) With regularization" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 5, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "Jaccard score or IoU with Scikit-learn: 0.6841\n", 246 | "Jaccard score or IoU with manual function: 0.6841\n", 247 | "Boundary Intersection over Union: 0.6801\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "n = 900\n", 253 | "\n", 254 | "jaccard_sklearn = jaccard_score(test2arr(test_masks[n]), pr2arr(regularizations[n]), average='micro')\n", 255 | "print(\"Jaccard score or IoU with Scikit-learn: \", round(jaccard_sklearn, 4))\n", 256 | "\n", 257 | "iou_man = iou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 258 | "print(\"Jaccard score or IoU with manual function: \", round(iou_man, 4))\n", 259 | "\n", 260 | "biou_man = biou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 261 | "print(\"Boundary Intersection over Union: \", round(biou_man, 4))" 262 | ] 263 | }, 264 | { 265 | "attachments": {}, 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "### Evaluation on entire MapAI dataset" 270 | ] 271 | }, 272 | { 273 | "attachments": {}, 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "(1) Without regularization" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 6, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "Evaluation without regularization: \n", 290 | "Mean IoU for Task 1: 0.3995\n", 291 | "Mean BIoU for Task 1: 0.3766\n", 292 | "S metric for Task 1: 0.3881\n" 293 | ] 294 | } 295 | ], 296 | "source": [ 297 | "iou_mapai = np.array([])\n", 298 | "biou_mapai = np.array([])\n", 299 | "\n", 300 | "for n in range(len(test_masks)):\n", 301 | " \n", 302 | " # Calculate metrics\n", 303 | "\n", 304 | " # IoU\n", 305 | " iou_single = iou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 306 | "\n", 307 | " # BIoU\n", 308 | " biou_single = biou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n", 309 | "\n", 310 | " # Append to whole array\n", 311 | " iou_mapai = np.append(iou_mapai, iou_single)\n", 312 | " biou_mapai = np.append(biou_mapai, biou_single)\n", 313 | "\n", 314 | "#iou_mapai = iou_mapai[iou_mapai != 0]\n", 315 | "#biou_mapai = biou_mapai[biou_mapai != 0]\n", 316 | "\n", 317 | "print(\"Evaluation without regularization: \")\n", 318 | "print(\"Mean IoU for Task 1: \", round(np.nanmean(iou_mapai), 4))\n", 319 | "print(\"Mean BIoU for Task 1: \", round(np.nanmean(biou_mapai), 4))\n", 320 | "print(\"S metric for Task 1: \", round(((np.nanmean(biou_mapai) + np.nanmean(iou_mapai)) / 2 ), 4))" 321 | ] 322 | }, 323 | { 324 | "attachments": {}, 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "(2) With regularization" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 7, 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "name": "stdout", 338 | "output_type": "stream", 339 | "text": [ 340 | "Mean IoU for Task 1: 0.4017\n", 341 | "Mean BIoU for Task 1: 0.378\n", 342 | "S metric for Task 1: 0.3898\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "iou_mapai = np.array([])\n", 348 | "biou_mapai = np.array([])\n", 349 | "\n", 350 | "for n in range(len(test_masks)):\n", 351 | " \n", 352 | " # Calculate metrics\n", 353 | "\n", 354 | " # IoU\n", 355 | " iou_single = iou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 356 | "\n", 357 | " # BIoU\n", 358 | " biou_single = biou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n", 359 | "\n", 360 | " # Append to whole array\n", 361 | " iou_mapai = np.append(iou_mapai, iou_single)\n", 362 | " biou_mapai = np.append(biou_mapai, biou_single)\n", 363 | "\n", 364 | "#iou_mapai = iou_mapai[iou_mapai != 0]\n", 365 | "#biou_mapai = biou_mapai[biou_mapai != 0]\n", 366 | "\n", 367 | "print(\"Mean IoU for Task 1: \", round(np.nanmean(iou_mapai), 4))\n", 368 | "print(\"Mean BIoU for Task 1: \", round(np.nanmean(biou_mapai), 4))\n", 369 | "print(\"S metric for Task 1: \", round(((np.nanmean(biou_mapai) + np.nanmean(iou_mapai)) / 2 ), 4))" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [] 378 | } 379 | ], 380 | "metadata": { 381 | "kernelspec": { 382 | "display_name": "torch", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.10.9" 397 | }, 398 | "orig_nbformat": 4, 399 | "vscode": { 400 | "interpreter": { 401 | "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c" 402 | } 403 | } 404 | }, 405 | "nbformat": 4, 406 | "nbformat_minor": 2 407 | } 408 | -------------------------------------------------------------------------------- /07-vectorize-building-footprint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "### Vectorize the predicted building footprints with GDAL" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import osgeo\n", 18 | "from osgeo import gdal\n", 19 | "from osgeo import ogr\n", 20 | "print('GDAL version: ', osgeo.gdal.__version__)\n", 21 | "\n", 22 | "# PROVIDE SEGMENTATION PATH\n", 23 | "input = \"INSERT PATH HERE\"\n", 24 | "\n", 25 | "# PROVIDE OUTPUT VECTOR FILE\n", 26 | "output = \"INSERT PATH HERE\"\n", 27 | "\n", 28 | "# Open image with GDAl driver\n", 29 | "ds = gdal.Open(input)\n", 30 | "\n", 31 | "# Get GeoTransform\n", 32 | "ds.SetGeoTransform([0,1,0,0,0,-1])\n", 33 | "\n", 34 | "# Get the band\n", 35 | "band = ds.GetRasterBand(1)\n", 36 | "# Create the output shapefile\n", 37 | "driver = ogr.GetDriverByName(\"GeoPackage\")\n", 38 | "out_ds = driver.CreateDataSource(output)\n", 39 | "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n", 40 | "\n", 41 | "# Add a field to the layer to store the pixel values\n", 42 | "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n", 43 | "out_layer.CreateField(field_defn)\n", 44 | "\n", 45 | "# Polygonize the PNG file\n", 46 | "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n", 47 | "\n", 48 | "# Close the input and output files\n", 49 | "out_ds = None\n", 50 | "ds = None" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n", 60 | "\n", 61 | "# RUN from the command line inside Ubuntu\n", 62 | "# Change name of input and output according to user needs\n", 63 | "\n", 64 | "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg" 65 | ] 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python 3", 71 | "language": "python", 72 | "name": "python3" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython3", 84 | "version": "3.10.6" 85 | }, 86 | "orig_nbformat": 4, 87 | "vscode": { 88 | "interpreter": { 89 | "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" 90 | } 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 2 95 | } 96 | -------------------------------------------------------------------------------- /DATASET/info.txt: -------------------------------------------------------------------------------- 1 | STORE THE MAPAI DATASET IN THIS FOLDER. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## mapAI-regularization 2 | 3 | The repository stores the code for our presented work at Foss4G 2023 with the title: **AN END-TO-END DEEP LEARNING WORKFLOW FOR BUILDING SEGMENTATION, 4 | BOUNDARY REGULARIZATION AND VECTORIZATION OF BUILDING FOOTPRINTS.** 5 | 6 | ## Introduction 7 | 8 | The purpose of our research is to develop and end-to-end workflow for accurate segmentation of building footprints including three major steps: 9 | - (1) binary semantic segmentation with a CNN, 10 | - (2) applying building boundary regularization and 11 | - (3) vectorization. 12 | 13 | The dataset used for building segmentation is the NORA MapAI: Precision in Building Segmentation dataset. We have developed an implementation for building footprint segmentation. Our approach extends the segmentation by implementing projectRegularization from (Zorzi and Fraundorfer, 2019, Zorzi et al., 2021) on a semantic segmentation task. The link to the official repository can be accessed here: https://github.com/zorzi-s/projectRegularization. Note that this is already included in our repository. 14 | 15 | ## MapAI dataset 16 | 17 | The original MapAI: Precision in Building Segmentation dataset can be downloaded manually from Huggingface: https://huggingface.co/datasets/sjyhne/mapai_training_data 18 | 19 | or by running our first notebook. 20 | 21 | ## Installation 22 | 23 | ``` 24 | git clone https://github.com/s1m0nS/mapAI-regularization.git 25 | cd mapAI-regularization 26 | conda create --name mapai python=3.10 27 | conda activate mapai 28 | pip install -r requirements.txt 29 | ``` 30 | Installing GDAL inside a conda environment can be tricky. Follow the steps below according to your OS. 31 | 32 | **Linux:** 33 | 34 | ``` 35 | sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove 36 | sudo apt-get install -y cdo nco gdal-bin libgdal-dev- 37 | python -m pip install --upgrade pip setuptools wheel 38 | python -m pip install --upgrade gdal 39 | conda install -c conda forge libgdal 40 | conda install -c conda-forge libgdal 41 | conda install -c conda-forge gdal 42 | conda install tiledb=2.2 43 | conda install poppler 44 | ``` 45 | 46 | **Windows:** 47 | 48 | Get the appropriate .whl file for your Python version from: https://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal 49 | For Python 3.10 use either: 50 | - GDAL‑3.4.3‑cp310‑cp310‑win_amd64.whl or 51 | - GDAL‑3.4.3‑cp310‑cp310‑win32.whl. 52 | 53 | then install the appropriate one as: 54 | ``` 55 | conda activate mapai 56 | python -m pip install C:\Users\...\GDAL‑3.4.3‑cp310‑cp310‑win_amd64.whl 57 | ``` 58 | 59 | Run our Jupyter Notebooks and enjoy the process. If you encounter errors post an issue. 60 | 61 | Feedback and new ideas are welcome. 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /models/UNetFormer_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from einops import rearrange, repeat 5 | 6 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_ 7 | import timm 8 | 9 | 10 | class ConvBNReLU(nn.Sequential): 11 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False): 12 | super(ConvBNReLU, self).__init__( 13 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, 14 | dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2), 15 | norm_layer(out_channels), 16 | nn.ReLU6() 17 | ) 18 | 19 | 20 | class ConvBN(nn.Sequential): 21 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False): 22 | super(ConvBN, self).__init__( 23 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, 24 | dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2), 25 | norm_layer(out_channels) 26 | ) 27 | 28 | 29 | class Conv(nn.Sequential): 30 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False): 31 | super(Conv, self).__init__( 32 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, 33 | dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2) 34 | ) 35 | 36 | 37 | class SeparableConvBNReLU(nn.Sequential): 38 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, 39 | norm_layer=nn.BatchNorm2d): 40 | super(SeparableConvBNReLU, self).__init__( 41 | nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation, 42 | padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, 43 | groups=in_channels, bias=False), 44 | norm_layer(out_channels), 45 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), 46 | nn.ReLU6() 47 | ) 48 | 49 | 50 | class SeparableConvBN(nn.Sequential): 51 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, 52 | norm_layer=nn.BatchNorm2d): 53 | super(SeparableConvBN, self).__init__( 54 | nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation, 55 | padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, 56 | groups=in_channels, bias=False), 57 | norm_layer(out_channels), 58 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 59 | ) 60 | 61 | 62 | class SeparableConv(nn.Sequential): 63 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1): 64 | super(SeparableConv, self).__init__( 65 | nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation, 66 | padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, 67 | groups=in_channels, bias=False), 68 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 69 | ) 70 | 71 | 72 | class Mlp(nn.Module): 73 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.): 74 | super().__init__() 75 | out_features = out_features or in_features 76 | hidden_features = hidden_features or in_features 77 | self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True) 78 | self.act = act_layer() 79 | self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True) 80 | self.drop = nn.Dropout(drop, inplace=True) 81 | 82 | def forward(self, x): 83 | x = self.fc1(x) 84 | x = self.act(x) 85 | x = self.drop(x) 86 | x = self.fc2(x) 87 | x = self.drop(x) 88 | return x 89 | 90 | 91 | class GlobalLocalAttention(nn.Module): 92 | def __init__(self, 93 | dim=256, 94 | num_heads=16, 95 | qkv_bias=False, 96 | window_size=8, 97 | relative_pos_embedding=True 98 | ): 99 | super().__init__() 100 | self.num_heads = num_heads 101 | head_dim = dim // self.num_heads 102 | self.scale = head_dim ** -0.5 103 | self.ws = window_size 104 | 105 | self.qkv = Conv(dim, 3*dim, kernel_size=1, bias=qkv_bias) 106 | self.local1 = ConvBN(dim, dim, kernel_size=3) 107 | self.local2 = ConvBN(dim, dim, kernel_size=1) 108 | self.proj = SeparableConvBN(dim, dim, kernel_size=window_size) 109 | 110 | self.attn_x = nn.AvgPool2d(kernel_size=(window_size, 1), stride=1, padding=(window_size//2 - 1, 0)) 111 | self.attn_y = nn.AvgPool2d(kernel_size=(1, window_size), stride=1, padding=(0, window_size//2 - 1)) 112 | 113 | self.relative_pos_embedding = relative_pos_embedding 114 | 115 | if self.relative_pos_embedding: 116 | # define a parameter table of relative position bias 117 | self.relative_position_bias_table = nn.Parameter( 118 | torch.zeros((2 * window_size - 1) * (2 * window_size - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH 119 | 120 | # get pair-wise relative position index for each token inside the window 121 | coords_h = torch.arange(self.ws) 122 | coords_w = torch.arange(self.ws) 123 | coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww 124 | coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww 125 | relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww 126 | relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 127 | relative_coords[:, :, 0] += self.ws - 1 # shift to start from 0 128 | relative_coords[:, :, 1] += self.ws - 1 129 | relative_coords[:, :, 0] *= 2 * self.ws - 1 130 | relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww 131 | self.register_buffer("relative_position_index", relative_position_index) 132 | 133 | trunc_normal_(self.relative_position_bias_table, std=.02) 134 | 135 | def pad(self, x, ps): 136 | _, _, H, W = x.size() 137 | if W % ps != 0: 138 | x = F.pad(x, (0, ps - W % ps), mode='reflect') 139 | if H % ps != 0: 140 | x = F.pad(x, (0, 0, 0, ps - H % ps), mode='reflect') 141 | return x 142 | 143 | def pad_out(self, x): 144 | x = F.pad(x, pad=(0, 1, 0, 1), mode='reflect') 145 | return x 146 | 147 | def forward(self, x): 148 | B, C, H, W = x.shape 149 | 150 | local = self.local2(x) + self.local1(x) 151 | 152 | x = self.pad(x, self.ws) 153 | B, C, Hp, Wp = x.shape 154 | qkv = self.qkv(x) 155 | 156 | q, k, v = rearrange(qkv, 'b (qkv h d) (hh ws1) (ww ws2) -> qkv (b hh ww) h (ws1 ws2) d', h=self.num_heads, 157 | d=C//self.num_heads, hh=Hp//self.ws, ww=Wp//self.ws, qkv=3, ws1=self.ws, ws2=self.ws) 158 | 159 | dots = (q @ k.transpose(-2, -1)) * self.scale 160 | 161 | if self.relative_pos_embedding: 162 | relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( 163 | self.ws * self.ws, self.ws * self.ws, -1) # Wh*Ww,Wh*Ww,nH 164 | relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww 165 | dots += relative_position_bias.unsqueeze(0) 166 | 167 | attn = dots.softmax(dim=-1) 168 | attn = attn @ v 169 | 170 | attn = rearrange(attn, '(b hh ww) h (ws1 ws2) d -> b (h d) (hh ws1) (ww ws2)', h=self.num_heads, 171 | d=C//self.num_heads, hh=Hp//self.ws, ww=Wp//self.ws, ws1=self.ws, ws2=self.ws) 172 | 173 | attn = attn[:, :, :H, :W] 174 | 175 | out = self.attn_x(F.pad(attn, pad=(0, 0, 0, 1), mode='reflect')) + \ 176 | self.attn_y(F.pad(attn, pad=(0, 1, 0, 0), mode='reflect')) 177 | 178 | out = out + local 179 | out = self.pad_out(out) 180 | out = self.proj(out) 181 | # print(out.size()) 182 | out = out[:, :, :H, :W] 183 | 184 | return out 185 | 186 | 187 | class Block(nn.Module): 188 | def __init__(self, dim=256, num_heads=16, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., 189 | drop_path=0., act_layer=nn.ReLU6, norm_layer=nn.BatchNorm2d, window_size=8): 190 | super().__init__() 191 | self.norm1 = norm_layer(dim) 192 | self.attn = GlobalLocalAttention(dim, num_heads=num_heads, qkv_bias=qkv_bias, window_size=window_size) 193 | 194 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 195 | mlp_hidden_dim = int(dim * mlp_ratio) 196 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, out_features=dim, act_layer=act_layer, drop=drop) 197 | self.norm2 = norm_layer(dim) 198 | 199 | def forward(self, x): 200 | 201 | x = x + self.drop_path(self.attn(self.norm1(x))) 202 | x = x + self.drop_path(self.mlp(self.norm2(x))) 203 | 204 | return x 205 | 206 | 207 | class WF(nn.Module): 208 | def __init__(self, in_channels=128, decode_channels=128, eps=1e-8): 209 | super(WF, self).__init__() 210 | self.pre_conv = Conv(in_channels, decode_channels, kernel_size=1) 211 | 212 | self.weights = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True) 213 | self.eps = eps 214 | self.post_conv = ConvBNReLU(decode_channels, decode_channels, kernel_size=3) 215 | 216 | def forward(self, x, res): 217 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 218 | weights = nn.ReLU()(self.weights) 219 | fuse_weights = weights / (torch.sum(weights, dim=0) + self.eps) 220 | x = fuse_weights[0] * self.pre_conv(res) + fuse_weights[1] * x 221 | x = self.post_conv(x) 222 | return x 223 | 224 | 225 | class FeatureRefinementHead(nn.Module): 226 | def __init__(self, in_channels=64, decode_channels=64): 227 | super().__init__() 228 | self.pre_conv = Conv(in_channels, decode_channels, kernel_size=1) 229 | 230 | self.weights = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True) 231 | self.eps = 1e-8 232 | self.post_conv = ConvBNReLU(decode_channels, decode_channels, kernel_size=3) 233 | 234 | self.pa = nn.Sequential(nn.Conv2d(decode_channels, decode_channels, kernel_size=3, padding=1, groups=decode_channels), 235 | nn.Sigmoid()) 236 | self.ca = nn.Sequential(nn.AdaptiveAvgPool2d(1), 237 | Conv(decode_channels, decode_channels//16, kernel_size=1), 238 | nn.ReLU6(), 239 | Conv(decode_channels//16, decode_channels, kernel_size=1), 240 | nn.Sigmoid()) 241 | 242 | self.shortcut = ConvBN(decode_channels, decode_channels, kernel_size=1) 243 | self.proj = SeparableConvBN(decode_channels, decode_channels, kernel_size=3) 244 | self.act = nn.ReLU6() 245 | 246 | def forward(self, x, res): 247 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False) 248 | weights = nn.ReLU()(self.weights) 249 | fuse_weights = weights / (torch.sum(weights, dim=0) + self.eps) 250 | x = fuse_weights[0] * self.pre_conv(res) + fuse_weights[1] * x 251 | x = self.post_conv(x) 252 | shortcut = self.shortcut(x) 253 | pa = self.pa(x) * x 254 | ca = self.ca(x) * x 255 | x = pa + ca 256 | x = self.proj(x) + shortcut 257 | x = self.act(x) 258 | 259 | return x 260 | 261 | 262 | class AuxHead(nn.Module): 263 | 264 | def __init__(self, in_channels=64, num_classes=8): 265 | super().__init__() 266 | self.conv = ConvBNReLU(in_channels, in_channels) 267 | self.drop = nn.Dropout(0.1) 268 | self.conv_out = Conv(in_channels, num_classes, kernel_size=1) 269 | 270 | def forward(self, x, h, w): 271 | feat = self.conv(x) 272 | feat = self.drop(feat) 273 | feat = self.conv_out(feat) 274 | feat = F.interpolate(feat, size=(h, w), mode='bilinear', align_corners=False) 275 | return feat 276 | 277 | 278 | class Decoder(nn.Module): 279 | def __init__(self, 280 | encoder_channels=(64, 128, 256, 512), 281 | decode_channels=64, 282 | dropout=0.1, 283 | window_size=8, 284 | num_classes=6): 285 | super(Decoder, self).__init__() 286 | 287 | self.pre_conv = ConvBN(encoder_channels[-1], decode_channels, kernel_size=1) 288 | self.b4 = Block(dim=decode_channels, num_heads=8, window_size=window_size) 289 | 290 | self.b3 = Block(dim=decode_channels, num_heads=8, window_size=window_size) 291 | self.p3 = WF(encoder_channels[-2], decode_channels) 292 | 293 | self.b2 = Block(dim=decode_channels, num_heads=8, window_size=window_size) 294 | self.p2 = WF(encoder_channels[-3], decode_channels) 295 | 296 | if self.training: 297 | self.up4 = nn.UpsamplingBilinear2d(scale_factor=4) 298 | self.up3 = nn.UpsamplingBilinear2d(scale_factor=2) 299 | self.aux_head = AuxHead(decode_channels, num_classes) 300 | 301 | self.p1 = FeatureRefinementHead(encoder_channels[-4], decode_channels) 302 | 303 | self.segmentation_head = nn.Sequential(ConvBNReLU(decode_channels, decode_channels), 304 | nn.Dropout2d(p=dropout, inplace=True), 305 | Conv(decode_channels, num_classes, kernel_size=1)) 306 | self.init_weight() 307 | 308 | def forward(self, res1, res2, res3, res4, h, w): 309 | if self.training: 310 | x = self.b4(self.pre_conv(res4)) 311 | h4 = self.up4(x) 312 | 313 | x = self.p3(x, res3) 314 | x = self.b3(x) 315 | h3 = self.up3(x) 316 | 317 | x = self.p2(x, res2) 318 | x = self.b2(x) 319 | h2 = x 320 | x = self.p1(x, res1) 321 | x = self.segmentation_head(x) 322 | x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=False) 323 | 324 | ah = h4 + h3 + h2 325 | ah = self.aux_head(ah, h, w) 326 | 327 | return x, ah 328 | else: 329 | x = self.b4(self.pre_conv(res4)) 330 | x = self.p3(x, res3) 331 | x = self.b3(x) 332 | 333 | x = self.p2(x, res2) 334 | x = self.b2(x) 335 | 336 | x = self.p1(x, res1) 337 | 338 | x = self.segmentation_head(x) 339 | x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=False) 340 | 341 | return x 342 | 343 | def init_weight(self): 344 | for m in self.children(): 345 | if isinstance(m, nn.Conv2d): 346 | nn.init.kaiming_normal_(m.weight, a=1) 347 | if m.bias is not None: 348 | nn.init.constant_(m.bias, 0) 349 | 350 | 351 | class UNetFormer(nn.Module): 352 | def __init__(self, 353 | decode_channels=64, 354 | dropout=0.5, 355 | backbone_name='swsl_resnet18', 356 | pretrained=True, 357 | window_size=8, 358 | num_classes=1 359 | ): 360 | super().__init__() 361 | 362 | self.backbone = timm.create_model(backbone_name, features_only=True, output_stride=32, 363 | out_indices=(1, 2, 3, 4), pretrained=pretrained) 364 | encoder_channels = self.backbone.feature_info.channels() 365 | 366 | self.decoder = Decoder(encoder_channels, decode_channels, dropout, window_size, num_classes) 367 | 368 | def forward(self, x): 369 | h, w = x.size()[-2:] 370 | res1, res2, res3, res4 = self.backbone(x) 371 | if self.training: 372 | x, ah = self.decoder(res1, res2, res3, res4, h, w) 373 | return x 374 | else: 375 | x = self.decoder(res1, res2, res3, res4, h, w) 376 | return x -------------------------------------------------------------------------------- /models/__pycache__/DCSwin_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/DCSwin_model.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/FTUNetFormer_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/FTUNetFormer_model.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/UNetFormer_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/UNetFormer_model.cpython-310.pyc -------------------------------------------------------------------------------- /plots/compare-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1.png -------------------------------------------------------------------------------- /plots/compare-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-10.png -------------------------------------------------------------------------------- /plots/compare-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-100.png -------------------------------------------------------------------------------- /plots/compare-1000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1000.png -------------------------------------------------------------------------------- /plots/compare-1100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1100.png -------------------------------------------------------------------------------- /plots/compare-1200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1200.png -------------------------------------------------------------------------------- /plots/compare-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-2.png -------------------------------------------------------------------------------- /plots/compare-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-22.png -------------------------------------------------------------------------------- /plots/compare-250.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-250.png -------------------------------------------------------------------------------- /plots/compare-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-3.png -------------------------------------------------------------------------------- /plots/compare-33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-33.png -------------------------------------------------------------------------------- /plots/compare-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-4.png -------------------------------------------------------------------------------- /plots/compare-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-5.png -------------------------------------------------------------------------------- /plots/compare-500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-500.png -------------------------------------------------------------------------------- /plots/compare-600.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-600.png -------------------------------------------------------------------------------- /plots/compare-750.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-750.png -------------------------------------------------------------------------------- /plots/compare-800.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-800.png -------------------------------------------------------------------------------- /plots/compare-900.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-900.png -------------------------------------------------------------------------------- /plots/dcswin-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/dcswin-25-epochs.png -------------------------------------------------------------------------------- /plots/ft-unet-former-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/ft-unet-former-25-epochs.png -------------------------------------------------------------------------------- /plots/unet-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/unet-25-epochs.png -------------------------------------------------------------------------------- /plots/unet-former-25-epochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/unet-former-25-epochs.png -------------------------------------------------------------------------------- /predictions/bergen_-5943_1104.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5943_1104.tif -------------------------------------------------------------------------------- /predictions/bergen_-5944_1104.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5944_1104.tif -------------------------------------------------------------------------------- /predictions/bergen_-5948_1107.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5948_1107.tif -------------------------------------------------------------------------------- /predictions/kristiansand_-4712_-1562.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1562.tif -------------------------------------------------------------------------------- /predictions/kristiansand_-4712_-1563.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1563.tif -------------------------------------------------------------------------------- /predictions/kristiansand_-4712_-1568.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1568.tif -------------------------------------------------------------------------------- /predictions/oslo_-3133_244.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/oslo_-3133_244.tif -------------------------------------------------------------------------------- /predictions/tromso_923_11083.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11083.tif -------------------------------------------------------------------------------- /predictions/tromso_923_11084.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11084.tif -------------------------------------------------------------------------------- /predictions/tromso_923_11086.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11086.tif -------------------------------------------------------------------------------- /projectRegularization/INFO.txt: -------------------------------------------------------------------------------- 1 | projectRegularization was downloaded from: 2 | https://github.com/zorzi-s/projectRegularization 3 | 4 | Download and the pretrained_weights from: 5 | https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu 6 | 7 | Save them to folder: pretrained_weights 8 | -------------------------------------------------------------------------------- /projectRegularization/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | SOFTWARE LICENSE AGREEMENT 3 | 4 | ICG Software – 2021, all rights reserved, hereinafter "the Software". 5 | 6 | This software has been developed by researchers of ICG (Institute of Computer Graphics and Vision). 7 | 8 | Institute of Computer Graphics and Vision (ICG), Inffeldgasse 16/II, 9 | 8010 Graz, Austria 10 | 11 | ICG holds all the ownership rights on the Software. 12 | 13 | The Software is still being currently developed. It is the ICG’s aim for the Software 14 | to be used by the scientific community so as to test it and, evaluate it so that ICG may improve it. 15 | 16 | For these reasons ICG has decided to distribute the Software. 17 | 18 | ICG grants to the academic user, a free of charge, without right to sub license non-exclusive right 19 | to use the Software for research purposes for a period of one (1) year from the date of the download 20 | of the source code. Any other use without of prior consent of ICG is prohibited. 21 | 22 | The academic user explicitly acknowledges having received from ICG all information allowing him 23 | to appreciate the adequacy between of the Software and his needs and to undertake all necessary 24 | precautions for his execution and use. 25 | 26 | The Software is provided only as a source. 27 | 28 | In case of using the Software for a publication or other results obtained through the use of the Software, 29 | user should cite the Software as follows : 30 | 31 | @inproceedings{zorzi2021machine, 32 | title={Machine-learned regularization and polygonization of building segmentation masks}, 33 | author={Zorzi, Stefano and Bittner, Ksenia and Fraundorfer, Friedrich}, 34 | booktitle={2020 25th International Conference on Pattern Recognition (ICPR)}, 35 | pages={3098--3105}, 36 | year={2021}, 37 | organization={IEEE} 38 | } 39 | 40 | Every user of the Software could communicate to the developers [stefano.zorzi@icg.tugraz.at] 41 | his or her remarks as to the use of the Software. 42 | 43 | THE USER CANNOT USE, EXPLOIT OR COMMERCIALLY DISTRIBUTE THE SOFTWARE WITHOUT PRIOR AND EXPLICIT CONSENT 44 | OF ICG (fraundorfer@icg.tugraz.at). ANY SUCH ACTION WILL CONSTITUTE A FORGERY. 45 | 46 | THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, 47 | WITH REGARDS TO COMMERCIAL USE, PROFESSIONAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALIZATION OR ADAPTATION. 48 | 49 | UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL ICG OR THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 50 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 51 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 52 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, 53 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 54 | -------------------------------------------------------------------------------- /projectRegularization/README.md: -------------------------------------------------------------------------------- 1 | # Regularization of Building Boundaries in Satellite and Aerial Images 2 | This repository contains the implementation for our publication "Machine-learned regularization and polygonization of building segmentation masks", ICPR 2021. 3 | If you use this implementation please cite the following publication: 4 | 5 | ~~~ 6 | @inproceedings{zorzi2021machine, 7 | title={Machine-learned regularization and polygonization of building segmentation masks}, 8 | author={Zorzi, Stefano and Bittner, Ksenia and Fraundorfer, Friedrich}, 9 | booktitle={2020 25th International Conference on Pattern Recognition (ICPR)}, 10 | pages={3098--3105}, 11 | year={2021}, 12 | organization={IEEE} 13 | } 14 | ~~~ 15 | and 16 | ~~~ 17 | @inproceedings{zorzi2019regularization, 18 | title={Regularization of building boundaries in satellite images using adversarial and regularized losses}, 19 | author={Zorzi, Stefano and Fraundorfer, Friedrich}, 20 | booktitle={IGARSS 2019-2019 IEEE International Geoscience and Remote Sensing Symposium}, 21 | pages={5140--5143}, 22 | year={2019}, 23 | organization={IEEE} 24 | } 25 | ~~~ 26 | 27 |

28 | 29 | Explanatory video of the approach: 30 | 31 | [![Watch the video](https://img.youtube.com/vi/07YQOlwIOMs/0.jpg)](https://www.youtube.com/watch?v=07YQOlwIOMs) 32 | 33 | # Dependencies 34 | 35 | * cuda 10.2 36 | * pytorch >= 1.3 37 | * opencv 38 | * gdal 39 | 40 | # Running the implementation 41 | After installing all of the required dependencies above you can download the pretrained weights from [here](https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu?usp=sharing). 42 | 43 | Unzip the archive and place *saved_models_gan* folder in the main *projectRegularization* directory. 44 | 45 | Please note that the polygonization step is not yet available! 46 | 47 | ## Evaluation 48 | Modify *variables.py* accordingly, then run the prediction issuing the command 49 | 50 | ~~~ 51 | python regularize.py 52 | ~~~ 53 | 54 | ## Training 55 | Modify *variables.py* accordingly, then run the training issuing the command 56 | 57 | ~~~ 58 | python train_gan_net.py 59 | ~~~ 60 | -------------------------------------------------------------------------------- /projectRegularization/README.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/README.png -------------------------------------------------------------------------------- /projectRegularization/__pycache__/crf_loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/crf_loss.cpython-310.pyc -------------------------------------------------------------------------------- /projectRegularization/__pycache__/data_loader_gan.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/data_loader_gan.cpython-310.pyc -------------------------------------------------------------------------------- /projectRegularization/__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /projectRegularization/__pycache__/training_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/training_utils.cpython-310.pyc -------------------------------------------------------------------------------- /projectRegularization/__pycache__/variables.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/variables.cpython-310.pyc -------------------------------------------------------------------------------- /projectRegularization/crf_loss.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import numpy as np 4 | import math 5 | import itertools 6 | import time 7 | import datetime 8 | import sys 9 | from math import exp 10 | import random 11 | 12 | #from torchvision.utils import save_image 13 | #from torchvision import datasets 14 | 15 | from torch.utils.data import DataLoader 16 | from torch.autograd import Variable 17 | 18 | import torch.nn as nn 19 | import torch.nn.functional as F 20 | import torch 21 | 22 | kernel_size = 9 #gaussian kernel dimension 23 | dilation = 1 #cheating :) The "real" dimension of the gaussian kernel is kernel size, but the "effective" dimension is (kernel_size*dilation + 1) 24 | padding = (kernel_size // 2) * dilation #do not touch this 25 | bs = 4 #batch size 26 | win = 256 #window size 27 | 28 | sigma_X = 3.0 #for distance gaussian 29 | sigma_I = 0.1 #for RGB/grayscale gaussian 30 | 31 | sample_interval = 20 # sample image every 32 | 33 | class kernel_loss(torch.nn.Module): 34 | 35 | def sub_kernel(self): 36 | filters = kernel_size * kernel_size 37 | middle = kernel_size // 2 38 | kernel = Variable(torch.zeros((filters, 1, kernel_size, kernel_size))).cuda() 39 | for i in range(kernel_size): 40 | for j in range(kernel_size): 41 | kernel[i*kernel_size+j, 0, i, j] = -1 42 | kernel[i*kernel_size+j, 0, middle, middle] = kernel[i*kernel_size+j, 0, middle, middle] + 1 43 | return kernel 44 | 45 | def dist_kernel(self): 46 | filters = kernel_size * kernel_size 47 | middle = kernel_size // 2 48 | kernel = Variable(torch.zeros((bs, filters, 1, 1))).cuda() 49 | 50 | for i in range(kernel_size): 51 | for j in range(kernel_size): 52 | ii = i - middle 53 | jj = j - middle 54 | distance = pow(ii,2) + pow(jj,2) 55 | kernel[:, i*kernel_size+j, 0, 0] = exp(-distance / pow(sigma_X,2)) 56 | #print(kernel.view(4,1,kernel_size,kernel_size)) 57 | return kernel 58 | 59 | def central_kernel(self): 60 | filters = kernel_size * kernel_size 61 | middle = kernel_size // 2 62 | kernel = Variable(torch.zeros((filters, 1, kernel_size, kernel_size))).cuda() 63 | for i in range(kernel_size): 64 | for j in range(kernel_size): 65 | kernel[i*kernel_size+j, 0, middle, middle] = 1 66 | return kernel 67 | 68 | def select_kernel(self): 69 | filters = kernel_size * kernel_size 70 | middle = kernel_size // 2 71 | kernel = Variable(torch.zeros((filters, 1, kernel_size, kernel_size))).cuda() 72 | for i in range(kernel_size): 73 | for j in range(kernel_size): 74 | kernel[i*kernel_size+j, 0, i, j] = 1 75 | return kernel 76 | 77 | def color_tensor(self, x): 78 | result = Variable(torch.zeros((bs, kernel_size*kernel_size, win-2*padding, win-2*padding))).cuda() 79 | 80 | for i in range(x.shape[1]): 81 | channel = x[:,i,:,:].unsqueeze(1) 82 | sub = nn.Conv2d(in_channels=1, out_channels=kernel_size*kernel_size, kernel_size=kernel_size, bias=False, padding=0, dilation=dilation) 83 | sub.weight.data = self.sub_matrix 84 | color = sub(channel) 85 | color = torch.pow(color,2) 86 | result = result + color 87 | 88 | result = torch.exp(-result / pow(sigma_I,2)) 89 | return result 90 | 91 | def probability_tensor(self, y): 92 | conv = nn.Conv2d(in_channels=1, out_channels=kernel_size*kernel_size, kernel_size=kernel_size, bias=False, padding=0, dilation=dilation) 93 | conv.weight.data = self.select_matrix 94 | prob = conv(y) 95 | return prob 96 | 97 | #def probability_central(self, y): 98 | # conv = nn.Conv2d(in_channels=1, out_channels=kernel_size*kernel_size, kernel_size=kernel_size, bias=False, padding=padding) 99 | # conv.weight.data = self.one_matrix 100 | # prob = conv(y) 101 | # return prob 102 | 103 | def __init__(self): 104 | super(kernel_loss,self).__init__() 105 | #self.softmax = nn.Softmax(dim=1) 106 | self.dist_tensor = self.dist_kernel() 107 | #self.one_matrix = self.central_kernel() 108 | self.select_matrix = self.select_kernel() 109 | self.sub_matrix = self.sub_kernel() #shape: [filters, 1, h, w] 110 | 111 | 112 | def forward(self,x,y): 113 | """ 114 | x --> Image. It can also have just 1 channel (grayscale). Values between 0 and 1 115 | y --> Mask. Values between 0 and 1 116 | """ 117 | #y = self.softmax(y) 118 | y0 = y[:,0,:,:].unsqueeze(1) #build: 0, background: 1, default 1 119 | y1 = y[:,1,:,:].unsqueeze(1) #build: 1, background: 0, default 0 120 | y0p = y0[:,:,padding:-padding,padding:-padding] 121 | y1p = y1[:,:,padding:-padding,padding:-padding] 122 | 123 | W = self.color_tensor(x) 124 | W = (W * self.dist_tensor.expand_as(W)) 125 | 126 | potts_loss_0 = y0p.expand_as(W) * W * self.probability_tensor(y1) 127 | potts_loss_1 = y1p.expand_as(W) * W * self.probability_tensor(y0) 128 | 129 | numel = potts_loss_0.numel() 130 | #ncut_loss_0 = (potts_loss_0 / (self.probability_tensor(y0) * W)).mean() 131 | #ncut_loss_1 = (potts_loss_1 / (self.probability_tensor(y1) * W)).mean() 132 | 133 | """ 134 | if random.randint(0,sample_interval) == 0: 135 | r = random.randint(0,20) 136 | 137 | img = torch.mean(W, dim=1).unsqueeze(1) 138 | #amin = torch.min(img) 139 | #amax = torch.max(img) 140 | #img = (img - amin) / (amax - amin) 141 | save_image(img, "./debug/%d_img.png" % r, nrow=2) 142 | 143 | #img2 = torch.mean(potts_loss_0, dim=1).unsqueeze(1) 144 | #amin = torch.min(img2) 145 | #amax = torch.max(img2) 146 | #img2 = (img2 - amin) / (amax - amin) 147 | #save_image(img2, "./debug/%d_b.png" % r, nrow=2) 148 | 149 | img3 = torch.mean(potts_loss_0, dim=1).unsqueeze(1) 150 | #amin = torch.min(img3) 151 | #amax = torch.max(img3) 152 | #img3 = (img3 - amin) / (amax - amin) 153 | save_image(img3, "./debug/%d_loss.png" % r, nrow=2) 154 | 155 | #img4 = torch.mean(loss_matrix, dim=1).unsqueeze(1) 156 | ##amin = torch.min(img4) 157 | ##amax = torch.max(img4) 158 | ##img4 = (img4 - amin) / (amax - amin) 159 | #save_image(img4, "./debug/%d_d.png" % r, nrow=2) 160 | save_image(x, "./debug/%d_map.png" % r, nrow=2) 161 | """ 162 | 163 | potts_loss_0 = (potts_loss_0).mean() 164 | potts_loss_1 = (potts_loss_1).mean() 165 | potts_loss = potts_loss_0 + potts_loss_1 166 | 167 | return potts_loss 168 | 169 | """ 170 | #ncut_loss_0 = potts_loss_0 / (self.probability_tensor(y0) * W).mean() 171 | #ncut_loss_1 = potts_loss_1 / (self.probability_tensor(y1) * W).mean() 172 | ncut_loss_0 = potts_loss_0 / (y0p.expand_as(W) * W).mean() 173 | ncut_loss_1 = potts_loss_1 / (y1p.expand_as(W) * W).mean() 174 | 175 | #ncut_loss_0 = ncut_loss_0.mean() 176 | #ncut_loss_1 = ncut_loss_1.mean() 177 | ncut_loss = ncut_loss_0 + ncut_loss_1 178 | 179 | #potts_loss = potts_loss_0 + potts_loss_1 180 | #ncut_loss = ncut_loss_0 + ncut_loss_1 181 | 182 | return (potts_loss, ncut_loss, numel) 183 | """ 184 | 185 | -------------------------------------------------------------------------------- /projectRegularization/data_loader_gan.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from glob import glob 4 | from tqdm import tqdm 5 | import random 6 | from skimage import io 7 | from skimage.segmentation import mark_boundaries 8 | from skimage.transform import rotate 9 | import variables as var 10 | 11 | TEST = False 12 | 13 | def to_categorical(y, num_classes=None, dtype='float32'): 14 | 15 | y = np.array(y, dtype='int') 16 | input_shape = y.shape 17 | if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: 18 | input_shape = tuple(input_shape[:-1]) 19 | y = y.ravel() 20 | if not num_classes: 21 | num_classes = np.max(y) + 1 22 | n = y.shape[0] 23 | categorical = np.zeros((n, num_classes), dtype=dtype) 24 | categorical[np.arange(n), y] = 1 25 | output_shape = input_shape + (num_classes,) 26 | categorical = np.reshape(categorical, output_shape) 27 | return categorical 28 | 29 | class DataLoader(): 30 | 31 | def __init__(self, ws=512, nb=10000, bs=8): 32 | self.nb = nb 33 | self.bs = bs 34 | self.ws = ws 35 | 36 | #self.rgb_files = self.rgb_files[:10] 37 | #self.dsm_files = self.dsm_files[:10] 38 | #self.gti_files = self.gti_files[:10] 39 | 40 | self.load_data() 41 | self.num_tiles = len(self.rgb_imgs) 42 | self.sliding_index = 0 43 | 44 | def generator(self): 45 | for _ in range(self.nb): 46 | batch_rgb = [] 47 | batch_gti = [] 48 | batch_seg = [] 49 | for _ in range(self.bs): 50 | rgb, gti, seg = self.extract_image() 51 | 52 | batch_rgb.append(rgb) 53 | 54 | # the ground truth is categorized 55 | gti = to_categorical(gti != 0, 2) 56 | batch_gti.append(gti) 57 | 58 | # the segmentation is categorized 59 | seg = to_categorical(seg != 0, 2) 60 | batch_seg.append(seg) 61 | 62 | batch_rgb = np.asarray(batch_rgb) 63 | batch_gti = np.asarray(batch_gti) 64 | batch_seg = np.asarray(batch_seg) 65 | batch_rgb = batch_rgb / 255.0 66 | 67 | #batch_gti = batch_gti[:,:,:,np.newaxis] / 255.0 68 | 69 | yield (batch_rgb, batch_gti, batch_seg) 70 | 71 | 72 | def test_shape(self, a): 73 | ri = a.shape[0] % self.ws 74 | rj = a.shape[1] % self.ws 75 | return a[:-ri,:-rj] 76 | 77 | 78 | def random_hsv(self, img, value_h=30, value_s=30, value_v=30): 79 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 80 | h, s, v = cv2.split(hsv) 81 | 82 | h = np.int16(h) 83 | s = np.int16(s) 84 | v = np.int16(v) 85 | 86 | h += value_h 87 | h[h < 0] = 0 88 | h[h > 255] = 255 89 | 90 | s += value_s 91 | s[s < 0] = 0 92 | s[s > 255] = 255 93 | 94 | v += value_v 95 | v[v < 0] = 0 96 | v[v > 255] = 255 97 | 98 | h = np.uint8(h) 99 | s = np.uint8(s) 100 | v = np.uint8(v) 101 | 102 | final_hsv = cv2.merge((h, s, v)) 103 | img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR) 104 | return img 105 | 106 | 107 | def extract_image(self, mode="sequential"): 108 | if mode == "random": 109 | rand_t = random.randint(0, self.num_tiles-1) 110 | else: 111 | if self.sliding_index < self.num_tiles: 112 | rand_t = self.sliding_index 113 | self.sliding_index = self.sliding_index + 1 114 | else: 115 | rand_t = 0 116 | self.sliding_index = 0 117 | 118 | rgb = self.rgb_imgs[rand_t].copy() 119 | gti = self.gti_imgs[rand_t].copy() 120 | seg = self.seg_imgs[rand_t].copy() 121 | 122 | h = rgb.shape[1] 123 | w = rgb.shape[0] 124 | 125 | void = True 126 | while void: 127 | rot = random.randint(0,90) 128 | ri = random.randint(0, int(h-self.ws*2)) 129 | rj = random.randint(0, int(w-self.ws*2)) 130 | win_rgb = rgb[ri:ri+int(self.ws*2), rj:rj+int(self.ws*2)] 131 | win_gti = gti[ri:ri+int(self.ws*2), rj:rj+int(self.ws*2)] 132 | win_seg = seg[ri:ri+int(self.ws*2), rj:rj+int(self.ws*2)] 133 | 134 | win_rgb = np.uint8(rotate(win_rgb, rot, resize=False, preserve_range=True)) 135 | win_gti = np.uint8(rotate(win_gti, rot, resize=False, preserve_range=True)) 136 | win_seg = np.uint8(rotate(win_seg, rot, resize=False, preserve_range=True)) 137 | 138 | win_rgb = win_rgb[self.ws//2:-self.ws//2, self.ws//2:-self.ws//2] 139 | win_gti = win_gti[self.ws//2:-self.ws//2, self.ws//2:-self.ws//2] 140 | win_seg = win_seg[self.ws//2:-self.ws//2, self.ws//2:-self.ws//2] 141 | 142 | if np.count_nonzero(win_seg): 143 | void = False 144 | 145 | # Perform some data augmentation 146 | rot = random.randint(0,3) 147 | win_rgb = np.rot90(win_rgb, k=rot) 148 | win_gti = np.rot90(win_gti, k=rot) 149 | win_seg = np.rot90(win_seg, k=rot) 150 | if random.randint(0,1) == 1: 151 | win_rgb = np.fliplr(win_rgb) 152 | win_gti = np.fliplr(win_gti) 153 | win_seg = np.fliplr(win_seg) 154 | 155 | r_h = random.randint(-20,20) 156 | r_s = random.randint(-20,20) 157 | r_v = random.randint(-20,20) 158 | win_rgb = self.random_hsv(win_rgb, r_h, r_s, r_v) 159 | 160 | win_rgb = win_rgb.astype(np.float32) 161 | win_gti = win_gti.astype(np.float32) 162 | win_seg = win_seg.astype(np.float32) 163 | return (win_rgb, win_gti, win_seg) 164 | 165 | 166 | def load_data(self): 167 | self.rgb_imgs = [] 168 | self.gti_imgs = [] 169 | self.seg_imgs = [] 170 | 171 | rgb_files = glob(var.DATASET_RGB) 172 | gti_files = glob(var.DATASET_GTI) 173 | seg_files = glob(var.DATASET_SEG) 174 | 175 | rgb_files.sort() 176 | gti_files.sort() 177 | seg_files.sort() 178 | 179 | combined = list(zip(rgb_files, gti_files, seg_files)) 180 | random.shuffle(combined) 181 | 182 | rgb_files[:], gti_files[:], seg_files[:] = zip(*combined) 183 | 184 | if TEST: 185 | rgb_files = rgb_files[:4] 186 | gti_files = gti_files[:4] 187 | seg_files = seg_files[:4] 188 | 189 | for rgb_name, gti_name, seg_name in tqdm(zip(rgb_files, gti_files, seg_files), total=len(rgb_files), desc="Loading dataset into RAM"): 190 | 191 | tmp = io.imread(rgb_name) 192 | tmp = tmp.astype(np.uint8) 193 | self.rgb_imgs.append(tmp) 194 | 195 | tmp = io.imread(gti_name) 196 | tmp = tmp.astype(np.uint8) 197 | self.gti_imgs.append(tmp) 198 | 199 | tmp = io.imread(seg_name) 200 | tmp = tmp.astype(np.uint8) 201 | self.seg_imgs.append(tmp) 202 | 203 | -------------------------------------------------------------------------------- /projectRegularization/gdal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/gdal -------------------------------------------------------------------------------- /projectRegularization/models.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | 5 | 6 | def weights_init_normal(m): 7 | classname = m.__class__.__name__ 8 | if classname.find("Conv") != -1: 9 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02) 10 | if hasattr(m, "bias") and m.bias is not None: 11 | torch.nn.init.constant_(m.bias.data, 0.0) 12 | elif classname.find("BatchNorm2d") != -1: 13 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 14 | torch.nn.init.constant_(m.bias.data, 0.0) 15 | 16 | 17 | 18 | class ResidualBlock(nn.Module): 19 | def __init__(self, in_features): 20 | super(ResidualBlock, self).__init__() 21 | 22 | self.block = nn.Sequential( 23 | #nn.ReflectionPad2d(1), 24 | nn.Conv2d(in_features, in_features, 3, stride=1, padding=1), 25 | nn.InstanceNorm2d(in_features), 26 | nn.ReLU(inplace=True), 27 | #nn.ReflectionPad2d(1), 28 | nn.Conv2d(in_features, in_features, 3, stride=1, padding=1), 29 | nn.InstanceNorm2d(in_features), 30 | nn.ReLU(inplace=True), 31 | ) 32 | 33 | def forward(self, x): 34 | return x + self.block(x) 35 | 36 | 37 | 38 | class GeneratorResNet(nn.Module): 39 | def __init__(self, num_residual_blocks=8, in_features=256): 40 | super(GeneratorResNet, self).__init__() 41 | 42 | out_features = in_features 43 | 44 | model = [] 45 | 46 | # Residual blocks 47 | for _ in range(num_residual_blocks): 48 | model += [ResidualBlock(out_features)] 49 | 50 | # Upsampling 51 | for _ in range(2): 52 | out_features //= 2 53 | model += [ 54 | nn.Upsample(scale_factor=2), 55 | nn.Conv2d(in_features, out_features, 3, stride=1, padding=1), 56 | nn.InstanceNorm2d(out_features), 57 | nn.ReLU(inplace=True), 58 | nn.Conv2d(out_features, out_features, 3, stride=1, padding=1), 59 | nn.InstanceNorm2d(out_features), 60 | nn.ReLU(inplace=True), 61 | nn.Conv2d(out_features, out_features, 3, stride=1, padding=1), 62 | nn.InstanceNorm2d(out_features), 63 | nn.ReLU(inplace=True), 64 | ] 65 | in_features = out_features 66 | 67 | # Output layer 68 | #model += [nn.ReflectionPad2d(2), nn.Conv2d(out_features, 2, 7), nn.Softmax()] 69 | model += [nn.Conv2d(out_features, 2, 7, stride=1, padding=3), nn.Sigmoid()] 70 | 71 | self.model = nn.Sequential(*model) 72 | 73 | def forward(self, feature_map): 74 | x = self.model(feature_map) 75 | return x 76 | 77 | 78 | class Encoder(nn.Module): 79 | def __init__(self, channels=3+2): 80 | super(Encoder, self).__init__() 81 | 82 | # Initial convolution block 83 | out_features = 64 84 | model = [ 85 | nn.Conv2d(channels, out_features, 7, stride=1, padding=3), 86 | nn.InstanceNorm2d(out_features), 87 | nn.ReLU(inplace=True), 88 | ] 89 | in_features = out_features 90 | 91 | # Downsampling 92 | for _ in range(2): 93 | out_features *= 2 94 | model += [ 95 | nn.Conv2d(in_features, out_features, 3, stride=1, padding=1), 96 | nn.InstanceNorm2d(out_features), 97 | nn.ReLU(inplace=True), 98 | nn.Conv2d(out_features, out_features, 3, stride=1, padding=1), 99 | nn.InstanceNorm2d(out_features), 100 | nn.ReLU(inplace=True), 101 | nn.MaxPool2d(2, stride=2), 102 | ] 103 | in_features = out_features 104 | 105 | self.model = nn.Sequential(*model) 106 | 107 | def forward(self, arguments): 108 | x = torch.cat(arguments, dim=1) 109 | x = self.model(x) 110 | return x 111 | 112 | 113 | class Discriminator(nn.Module): 114 | def __init__(self): 115 | super(Discriminator, self).__init__() 116 | 117 | channels = 2 118 | out_channels = 2 119 | 120 | def discriminator_block(in_filters, out_filters, normalize=True): 121 | """Returns downsampling layers of each discriminator block""" 122 | layers = [nn.Conv2d(in_filters, out_filters, 3, stride=1, padding=1)] 123 | if normalize: 124 | layers.append(nn.InstanceNorm2d(out_filters)) 125 | layers.append(nn.ReLU()) 126 | 127 | layers.append(nn.Conv2d(out_filters, out_filters, 3, stride=1, padding=1)) 128 | if normalize: 129 | layers.append(nn.InstanceNorm2d(out_filters)) 130 | layers.append(nn.ReLU()) 131 | layers.append(nn.MaxPool2d(2, stride=2)) 132 | return layers 133 | 134 | self.model = nn.Sequential( 135 | *discriminator_block(channels, 64, normalize=False), 136 | *discriminator_block(64, 128), 137 | *discriminator_block(128, 256), 138 | *discriminator_block(256, 512), 139 | nn.Conv2d(512, out_channels, 3, padding=1), 140 | nn.Sigmoid() 141 | ) 142 | 143 | def forward(self, img): 144 | #img = torch.cat((rgb, mask), dim=1) 145 | img = self.model(img) 146 | return img 147 | -------------------------------------------------------------------------------- /projectRegularization/regularize.py: -------------------------------------------------------------------------------- 1 | import random 2 | from skimage import io 3 | from skimage.transform import rotate 4 | import numpy as np 5 | import torch 6 | from tqdm import tqdm 7 | from osgeo import gdal 8 | import os 9 | import glob 10 | from skimage.segmentation import mark_boundaries 11 | from PIL import Image, ImageDraw, ImageFont 12 | from numpy.linalg import svd 13 | import cv2 14 | from skimage import measure 15 | 16 | from models import GeneratorResNet, Encoder 17 | from skimage.transform import rescale 18 | import variables as var 19 | 20 | 21 | 22 | 23 | def compute_IoU(mask, pred): 24 | mask = mask!=0 25 | pred = pred!=0 26 | 27 | m1 = np.logical_and(mask, pred) 28 | m2 = np.logical_and(np.logical_not(mask), np.logical_not(pred)) 29 | m3 = np.logical_and(mask==0, pred==1) 30 | m4 = np.logical_and(mask==1, pred==0) 31 | m5 = np.logical_or(mask, pred) 32 | 33 | tp = np.count_nonzero(m1) 34 | fp = np.count_nonzero(m3) 35 | fn = np.count_nonzero(m4) 36 | 37 | IoU = tp/(tp+(fn+fp)) 38 | return IoU 39 | 40 | 41 | def to_categorical(y, num_classes=None, dtype='float32'): 42 | y = np.array(y, dtype='int') 43 | input_shape = y.shape 44 | if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: 45 | input_shape = tuple(input_shape[:-1]) 46 | y = y.ravel() 47 | if not num_classes: 48 | num_classes = np.max(y) + 1 49 | n = y.shape[0] 50 | categorical = np.zeros((n, num_classes), dtype=dtype) 51 | categorical[np.arange(n), y] = 1 52 | output_shape = input_shape + (num_classes,) 53 | categorical = np.reshape(categorical, output_shape) 54 | return categorical 55 | 56 | 57 | def predict_building(rgb, mask, model): 58 | Tensor = torch.cuda.FloatTensor 59 | 60 | mask = to_categorical(mask, 2) 61 | 62 | rgb = rgb[np.newaxis, :, :, :] 63 | mask = mask[np.newaxis, :, :, :] 64 | 65 | E, G = model 66 | 67 | rgb = Tensor(rgb) 68 | mask = Tensor(mask) 69 | rgb = rgb.permute(0,3,1,2) 70 | mask = mask.permute(0,3,1,2) 71 | 72 | rgb = rgb / 255.0 73 | 74 | # PREDICTION 75 | pred = G(E([rgb, mask])) 76 | pred = pred.permute(0,2,3,1) 77 | 78 | pred = pred.detach().cpu().numpy() 79 | 80 | pred = np.argmax(pred[0,:,:,:], axis=-1) 81 | return pred 82 | 83 | 84 | 85 | def fix_limits(i_min, i_max, j_min, j_max, min_image_size=256): 86 | 87 | def closest_divisible_size(size, factor=4): 88 | while size % factor: 89 | size += 1 90 | return size 91 | 92 | height = i_max - i_min 93 | width = j_max - j_min 94 | 95 | # pad the rows 96 | if height < min_image_size: 97 | diff = min_image_size - height 98 | else: 99 | diff = closest_divisible_size(height) - height + 16 100 | 101 | i_min -= (diff // 2) 102 | i_max += (diff // 2 + diff % 2) 103 | 104 | # pad the columns 105 | if width < min_image_size: 106 | diff = min_image_size - width 107 | else: 108 | diff = closest_divisible_size(width) - width + 16 109 | 110 | j_min -= (diff // 2) 111 | j_max += (diff // 2 + diff % 2) 112 | 113 | return i_min, i_max, j_min, j_max 114 | 115 | 116 | 117 | def regularization(rgb, ins_segmentation, model, in_mode="semantic", out_mode="semantic", min_size=10): 118 | assert in_mode == "instance" or in_mode == "semantic" 119 | assert out_mode == "instance" or out_mode == "semantic" 120 | 121 | if in_mode == "semantic": 122 | ins_segmentation = np.uint16(measure.label(ins_segmentation, background=0)) 123 | 124 | max_instance = np.amax(ins_segmentation) 125 | border = 256 126 | 127 | ins_segmentation = np.uint16(cv2.copyMakeBorder(ins_segmentation,border,border,border,border,cv2.BORDER_CONSTANT,value=0)) 128 | rgb = np.uint8(cv2.copyMakeBorder(rgb,border,border,border,border,cv2.BORDER_CONSTANT,value=(0,0,0))) 129 | 130 | regularization = np.zeros(ins_segmentation.shape, dtype=np.uint16) 131 | 132 | for ins in tqdm(range(1, max_instance+1), desc="Regularization"): 133 | indices = np.argwhere(ins_segmentation==ins) 134 | building_size = indices.shape[0] 135 | if building_size > min_size: 136 | i_min = np.amin(indices[:,0]) 137 | i_max = np.amax(indices[:,0]) 138 | j_min = np.amin(indices[:,1]) 139 | j_max = np.amax(indices[:,1]) 140 | 141 | i_min, i_max, j_min, j_max = fix_limits(i_min, i_max, j_min, j_max) 142 | 143 | mask = np.copy(ins_segmentation[i_min:i_max, j_min:j_max] == ins) 144 | rgb_mask = np.copy(rgb[i_min:i_max, j_min:j_max, :]) 145 | 146 | 147 | 148 | max_building_size = 1024 149 | rescaled = False 150 | if mask.shape[0] > max_building_size and mask.shape[0] >= mask.shape[1]: 151 | f = max_building_size / mask.shape[0] 152 | mask = rescale(mask, f, anti_aliasing=False, preserve_range=True) 153 | rgb_mask = rescale(rgb_mask, f, anti_aliasing=False) 154 | rescaled = True 155 | elif mask.shape[1] > max_building_size and mask.shape[1] >= mask.shape[0]: 156 | f = max_building_size / mask.shape[1] 157 | mask = rescale(mask, f, anti_aliasing=False) 158 | rgb_mask = rescale(rgb_mask, f, anti_aliasing=False, preserve_range=True) 159 | rescaled = True 160 | 161 | pred = predict_building(rgb_mask, mask, model) 162 | 163 | if rescaled: 164 | pred = rescale(pred, 1/f, anti_aliasing=False, preserve_range=True) 165 | 166 | 167 | 168 | pred_indices = np.argwhere(pred != 0) 169 | 170 | if pred_indices.shape[0] > 0: 171 | pred_indices[:,0] = pred_indices[:,0] + i_min 172 | pred_indices[:,1] = pred_indices[:,1] + j_min 173 | x, y = zip(*pred_indices) 174 | if out_mode == "semantic": 175 | regularization[x,y] = 1 176 | else: 177 | regularization[x,y] = ins 178 | 179 | return regularization[border:-border, border:-border] 180 | 181 | 182 | 183 | def copyGeoreference(inp, output): 184 | dataset = gdal.Open(inp) 185 | if dataset is None: 186 | print('Unable to open', inp, 'for reading') 187 | sys.exit(1) 188 | 189 | projection = dataset.GetProjection() 190 | geotransform = dataset.GetGeoTransform() 191 | 192 | if projection is None and geotransform is None: 193 | print('No projection or geotransform found on file' + input) 194 | sys.exit(1) 195 | 196 | dataset2 = gdal.Open(output, gdal.GA_Update) 197 | 198 | if dataset2 is None: 199 | print('Unable to open', output, 'for writing') 200 | sys.exit(1) 201 | 202 | if geotransform is not None and geotransform != (0, 1, 0, 0, 0, 1): 203 | dataset2.SetGeoTransform(geotransform) 204 | 205 | if projection is not None and projection != '': 206 | dataset2.SetProjection(projection) 207 | 208 | gcp_count = dataset.GetGCPCount() 209 | if gcp_count != 0: 210 | dataset2.SetGCPs(dataset.GetGCPs(), dataset.GetGCPProjection()) 211 | 212 | dataset = None 213 | dataset2 = None 214 | 215 | 216 | 217 | def regularize_segmentations(img_folder, seg_folder, out_folder, in_mode="semantic", out_mode="semantic", samples=None): 218 | """ 219 | BUILDING REGULARIZATION 220 | Inputs: 221 | - satellite image (3 channels) 222 | - building segmentation (1 channel) 223 | Output: 224 | - regularized mask 225 | """ 226 | 227 | img_files = glob.glob(img_folder) 228 | seg_files = glob.glob(seg_folder) 229 | 230 | img_files.sort() 231 | seg_files.sort() 232 | 233 | for num, (satellite_image_file, building_segmentation_file) in enumerate(zip(img_files, seg_files)): 234 | print(satellite_image_file, building_segmentation_file) 235 | _, rgb_name = os.path.split(satellite_image_file) 236 | _, seg_name = os.path.split(building_segmentation_file) 237 | assert rgb_name == seg_name 238 | 239 | output_file = out_folder + seg_name 240 | 241 | E1 = Encoder() 242 | G = GeneratorResNet() 243 | G.load_state_dict(torch.load(var.MODEL_GENERATOR)) 244 | E1.load_state_dict(torch.load(var.MODEL_ENCODER)) 245 | E1 = E1.cuda() 246 | G = G.cuda() 247 | 248 | model = [E1,G] 249 | 250 | M = io.imread(building_segmentation_file) 251 | M = np.uint16(M) 252 | P = io.imread(satellite_image_file) 253 | P = np.uint8(P) 254 | 255 | R = regularization(P, M, model, in_mode=in_mode, out_mode=out_mode) 256 | 257 | if out_mode == "instance": 258 | io.imsave(output_file, np.uint16(R)) 259 | else: 260 | io.imsave(output_file, np.uint8(R*255)) 261 | 262 | if samples is not None: 263 | i = 1000 264 | j = 1000 265 | h, w = 1080, 1920 266 | P = P[i:i+h, j:j+w] 267 | R = R[i:i+h, j:j+w] 268 | M = M[i:i+h, j:j+w] 269 | 270 | R = mark_boundaries(P, R, mode="thick") 271 | M = mark_boundaries(P, M, mode="thick") 272 | 273 | R = np.uint8(R*255) 274 | M = np.uint8(M*255) 275 | 276 | font = cv2.FONT_HERSHEY_SIMPLEX 277 | bottomLeftCornerOfText = (20,1060) 278 | fontScale = 1 279 | fontColor = (255,255,0) 280 | lineType = 2 281 | 282 | cv2.putText(R, "INRIA dataset, " + rgb_name + ", regularization", 283 | bottomLeftCornerOfText, 284 | font, 285 | fontScale, 286 | fontColor, 287 | lineType) 288 | 289 | cv2.putText(M, "INRIA dataset, " + rgb_name + ", segmentation", 290 | bottomLeftCornerOfText, 291 | font, 292 | fontScale, 293 | fontColor, 294 | lineType) 295 | 296 | io.imsave(samples + "./%d_2reg.png" % num, np.uint8(R)) 297 | io.imsave(samples + "./%d_1seg.png" % num, np.uint8(M)) 298 | 299 | copyGeoreference(satellite_image_file, output_file) 300 | copyGeoreference(satellite_image_file, building_segmentation_file) 301 | 302 | 303 | regularize_segmentations(img_folder=var.INF_RGB, seg_folder=var.INF_SEG, out_folder=var.INF_OUT, in_mode="semantic", out_mode="semantic", samples=None) 304 | -------------------------------------------------------------------------------- /projectRegularization/train_gan_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | import torch 5 | from torch import nn 6 | from torch import optim 7 | from torch.optim.lr_scheduler import MultiStepLR 8 | from torch.autograd import Variable 9 | from torch.utils.data import DataLoader 10 | 11 | from tqdm import tqdm 12 | import click 13 | import numpy as np 14 | import cv2 15 | from skimage.segmentation import mark_boundaries 16 | from skimage import io 17 | import itertools 18 | 19 | from models import GeneratorResNet, Encoder, Discriminator 20 | from data_loader_gan import DataLoader 21 | from training_utils import sample_images, LossBuffer, LambdaLR 22 | import variables as var 23 | from crf_loss import kernel_loss 24 | 25 | 26 | 27 | def crf_factor(batch_index, start_crf_batch, end_crf_batch, crf_initial_factor, crf_final_factor): 28 | if batch_index <= start_crf_batch: 29 | return 0.0 30 | elif start_crf_batch < batch_index < end_crf_batch: 31 | return crf_initial_factor + ((crf_final_factor - crf_initial_factor) * (batch_index - start_crf_batch) / (end_crf_batch - start_crf_batch)) 32 | else: 33 | return crf_final_factor 34 | 35 | 36 | def train( 37 | models_path='./saved_models_gan/', \ 38 | restore=False, \ 39 | batch_size=4, \ 40 | start_batch=0, n_batches=140000, \ 41 | start_crf_batch=60000, end_crf_batch=120000, crf_initial_factor=0.0, crf_final_factor=175.0, \ 42 | start_lr_decay=120000, \ 43 | start_lr=0.00004, win_size=256, sample_interval=20, backup_interval=5000): 44 | 45 | patch_size = int(win_size / pow(2, 4)) 46 | 47 | Tensor = torch.cuda.FloatTensor 48 | 49 | e1 = Encoder(channels=3+2) 50 | e2 = Encoder(channels=2) 51 | net = GeneratorResNet() 52 | disc = Discriminator() 53 | 54 | if restore: 55 | print("Restoring model number %d" % start_batch) 56 | e1.load_state_dict(torch.load(models_path + "E%d_e1" % start_batch)) 57 | e2.load_state_dict(torch.load(models_path + "E%d_e2" % start_batch)) 58 | net.load_state_dict(torch.load(models_path + "E%d_net" % start_batch)) 59 | disc.load_state_dict(torch.load(models_path + "E%d_disc" % start_batch)) 60 | 61 | e1 = e1.cuda() 62 | e2 = e2.cuda() 63 | net = net.cuda() 64 | disc = disc.cuda() 65 | 66 | os.makedirs(models_path, exist_ok=True) 67 | 68 | loss_0_buffer = LossBuffer() 69 | loss_1_buffer = LossBuffer() 70 | loss_2_buffer = LossBuffer() 71 | loss_3_buffer = LossBuffer() 72 | loss_4_buffer = LossBuffer() 73 | loss_5_buffer = LossBuffer() 74 | 75 | gen_obj = DataLoader(bs=batch_size, nb=n_batches, ws=win_size) 76 | 77 | # Optimizers 78 | optimizer_G = torch.optim.Adam(itertools.chain(net.parameters(), e1.parameters(), e2.parameters()), lr=start_lr) 79 | optimizer_D = torch.optim.Adam(disc.parameters(), lr=start_lr) 80 | 81 | # Learning rate update schedulers 82 | lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(optimizer_G, lr_lambda=LambdaLR(n_batches, start_lr_decay).step) 83 | lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR(optimizer_D, lr_lambda=LambdaLR(n_batches, start_lr_decay).step) 84 | 85 | bce_criterion = nn.BCELoss() 86 | bce_criterion = bce_criterion.cuda() 87 | 88 | densecrflosslayer = kernel_loss() 89 | densecrflosslayer = densecrflosslayer.cuda() 90 | 91 | loader = gen_obj.generator() 92 | train_iterator = tqdm(loader, total=(n_batches + 1 - start_batch)) 93 | img_index = 0 94 | 95 | for batch_index, (rgb, gti, seg) in enumerate(train_iterator): 96 | 97 | batch_index = batch_index + start_batch 98 | 99 | rgb = Variable(Tensor(rgb)) 100 | gti = Variable(Tensor(gti)) 101 | seg = Variable(Tensor(seg)) 102 | 103 | rgb = rgb.permute(0,3,1,2) 104 | gti = gti.permute(0,3,1,2) 105 | seg = seg.permute(0,3,1,2) 106 | 107 | # Adversarial ground truths 108 | ones = Variable(Tensor(np.ones((batch_size, 1, patch_size, patch_size))), requires_grad=False) 109 | zeros = Variable(Tensor(np.zeros((batch_size, 1, patch_size, patch_size))), requires_grad=False) 110 | valid = torch.cat((ones, zeros), dim=1) 111 | fake = torch.cat((zeros, ones), dim=1) 112 | 113 | # ------------------ 114 | # Train Generators 115 | # ------------------ 116 | 117 | #e1.train() 118 | #e2.train() 119 | #net.train() 120 | 121 | optimizer_G.zero_grad() 122 | 123 | reg = net(e1([rgb, seg])) 124 | rec = net(e2([gti])) 125 | 126 | # Identity loss (reconstruction loss) 127 | loss_rec_1 = bce_criterion(reg, seg) 128 | loss_rec_2 = bce_criterion(rec, gti) 129 | 130 | # GAN loss 131 | loss_GAN = bce_criterion(disc(reg), valid) 132 | 133 | # CRF loss 134 | pot_multiplier = crf_factor(batch_index, start_crf_batch, end_crf_batch, crf_initial_factor, crf_final_factor) 135 | loss_pot = densecrflosslayer(rgb, reg) 136 | loss_pot = loss_pot.cuda() 137 | 138 | # Total loss 139 | loss_G = 3 * loss_GAN + 1 * loss_rec_1 + 3 * loss_rec_2 + pot_multiplier * loss_pot 140 | 141 | loss_G.backward() 142 | optimizer_G.step() 143 | 144 | 145 | # ----------------------- 146 | # Train Discriminator A 147 | # ----------------------- 148 | 149 | #disc.train() 150 | 151 | optimizer_D.zero_grad() 152 | 153 | loss_real = bce_criterion(disc(rec.detach()), valid) 154 | loss_fake = bce_criterion(disc(reg.detach()), fake) 155 | 156 | # Total loss 157 | loss_D = (loss_real + loss_fake) / 2 158 | 159 | loss_D.backward() 160 | optimizer_D.step() 161 | 162 | # -------------- 163 | # Update LR 164 | # -------------- 165 | 166 | lr_scheduler_G.step(batch_index) 167 | lr_scheduler_D.step(batch_index) 168 | 169 | for g in optimizer_D.param_groups: 170 | current_lr = g['lr'] 171 | 172 | # -------------- 173 | # Log Progress 174 | # -------------- 175 | 176 | status = "[Batch %d][D loss: %f][G loss: %f, adv: %f, rec1: %f, rec2: %f][pot: %f, pot_mul: %f][lr: %f]" % \ 177 | (batch_index, \ 178 | loss_0_buffer.push(loss_D.item()), \ 179 | loss_1_buffer.push(loss_G.item()), loss_2_buffer.push(loss_GAN.item()), loss_3_buffer.push(loss_rec_1.item()), loss_4_buffer.push(loss_rec_2.item()), 180 | loss_5_buffer.push(loss_pot.item()), pot_multiplier, current_lr, ) 181 | 182 | train_iterator.set_description(status) 183 | 184 | if (batch_index % sample_interval == 0): 185 | img_index += 1 186 | void_mask = torch.zeros(gti.shape).cuda() 187 | sample_images(img_index, rgb, [void_mask, gti, rec, seg, reg]) 188 | if img_index >= 100: 189 | img_index = 0 190 | 191 | if (batch_index % backup_interval == 0): 192 | torch.save(e1.state_dict(), models_path + "E" + str(batch_index) + "_e1") 193 | torch.save(e2.state_dict(), models_path + "E" + str(batch_index) + "_e2") 194 | torch.save(net.state_dict(), models_path + "E" + str(batch_index) + "_net") 195 | torch.save(disc.state_dict(), models_path + "E" + str(batch_index) + "_disc") 196 | 197 | 198 | if __name__ == '__main__': 199 | train() 200 | -------------------------------------------------------------------------------- /projectRegularization/training_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import glob 4 | from tqdm import tqdm 5 | import random 6 | from skimage import io 7 | from skimage.segmentation import mark_boundaries 8 | 9 | import random 10 | import time 11 | import datetime 12 | import sys 13 | 14 | from torch.autograd import Variable 15 | import torch 16 | import numpy as np 17 | 18 | from osgeo import gdal 19 | 20 | import variables as var 21 | 22 | 23 | def sample_images(sample_index, img, masks): 24 | batch = img.shape[0] 25 | 26 | img = img.permute(0,2,3,1) 27 | 28 | for i in range(len(masks)): 29 | masks[i] = masks[i].permute(0,2,3,1) 30 | 31 | img = img.cpu().numpy() 32 | ip = np.uint8(img * 255) 33 | for i in range(len(masks)): 34 | masks[i] = masks[i].detach().cpu().numpy() 35 | masks[i] = np.argmax(masks[i], axis=-1) 36 | masks[i] = np.uint8(masks[i] * 255) 37 | 38 | line_mode = "inner" 39 | 40 | for i in range(len(masks)): 41 | row = np.copy(ip[0,:,:,:]) 42 | line = cv2.Canny(masks[i][0,:,:], 0, 255) 43 | row = mark_boundaries(row, line, color=(1,1,0), mode=line_mode) * 255#, outline_color=(self.red,self.greed,0)) 44 | for b in range(1,batch): 45 | pic = np.copy(ip[b,:,:,:]) 46 | line = cv2.Canny(masks[i][b,:,:], 0, 255) 47 | pic = mark_boundaries(pic, line, color=(1,1,0), mode=line_mode) * 255#, outline_color=(self.red,self.greed,0)) 48 | row = np.concatenate((row, pic), 1) 49 | masks[i] = row 50 | 51 | img = np.concatenate(masks, 0) 52 | img = np.uint8(img) 53 | io.imsave(var.DEBUG_DIR + "debug_%s.png" % str(sample_index), img) 54 | 55 | 56 | class LossBuffer(): 57 | def __init__(self, max_size=100): 58 | self.data = [] 59 | self.max_size = max_size 60 | 61 | def push(self, data): 62 | self.data.append(data) 63 | if len(self.data) > self.max_size: 64 | self.data = self.data[1:] 65 | return sum(self.data) / len(self.data) 66 | 67 | 68 | class LambdaLR(): 69 | def __init__(self, n_batches, decay_start_batch): 70 | assert ((n_batches - decay_start_batch) > 0), "Decay must start before the training session ends!" 71 | self.n_batches = n_batches 72 | self.decay_start_batch = decay_start_batch 73 | 74 | def step(self, batch): 75 | if batch > self.decay_start_batch: 76 | factor = 1.0 - (batch - self.decay_start_batch) / (self.n_batches - self.decay_start_batch) 77 | if factor > 0: 78 | return factor 79 | else: 80 | return 0.0 81 | else: 82 | return 1.0 83 | -------------------------------------------------------------------------------- /projectRegularization/variables.py: -------------------------------------------------------------------------------- 1 | # CONFIGURE THE PATHS HERE: 2 | 3 | # TRAINING 4 | DATASET_RGB = "/home/shymon/datasets/mapai_full/train/images/*.tif" 5 | DATASET_GTI = "/home/shymon/datasets/mapai_full/train/masks/*.tif" 6 | DATASET_SEG = "/home/shymon/Documents/mapAI-regularization/predictions/*.tif" 7 | 8 | DEBUG_DIR = "./debug/" 9 | 10 | # INFERENCE 11 | INF_RGB = "/home/shymon/datasets/mapai_full/task1_test/images/*.tif" 12 | INF_SEG = "/home/shymon/Documents/mapAI-regularization/predictions/*.tif" 13 | INF_OUT = "/home/shymon/Documents/mapAI-regularization/regularizations/" 14 | 15 | MODEL_ENCODER = "/home/shymon/Documents/mapAI-regularization/projectRegularization/pretrained_weights/E140000_e1" 16 | MODEL_GENERATOR = "/home/shymon/Documents/mapAI-regularization/projectRegularization/pretrained_weights/E140000_net" 17 | -------------------------------------------------------------------------------- /regularizations/bergen_-5943_1104.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/bergen_-5943_1104.tif -------------------------------------------------------------------------------- /regularizations/bergen_-5944_1104.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/bergen_-5944_1104.tif -------------------------------------------------------------------------------- /regularizations/bergen_-5948_1107.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/bergen_-5948_1107.tif -------------------------------------------------------------------------------- /regularizations/kristiansand_-4712_-1562.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/kristiansand_-4712_-1562.tif -------------------------------------------------------------------------------- /regularizations/kristiansand_-4712_-1563.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/kristiansand_-4712_-1563.tif -------------------------------------------------------------------------------- /regularizations/kristiansand_-4712_-1568.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/kristiansand_-4712_-1568.tif -------------------------------------------------------------------------------- /regularizations/oslo_-3133_244.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/oslo_-3133_244.tif -------------------------------------------------------------------------------- /regularizations/tromso_923_11083.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/tromso_923_11083.tif -------------------------------------------------------------------------------- /regularizations/tromso_923_11084.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/tromso_923_11084.tif -------------------------------------------------------------------------------- /regularizations/tromso_923_11086.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/tromso_923_11086.tif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | anyio==3.6.2 4 | argon2-cffi==21.3.0 5 | argon2-cffi-bindings==21.2.0 6 | asttokens==2.2.1 7 | async-timeout==4.0.2 8 | attrs==22.2.0 9 | backcall==0.2.0 10 | beautifulsoup4==4.11.2 11 | bleach==6.0.0 12 | brotlipy==0.7.0 13 | certifi==2022.12.7 14 | cffi==1.15.1 15 | charset-normalizer==2.0.4 16 | click==8.1.3 17 | click-plugins==1.1.1 18 | cligj==0.7.2 19 | comm==0.1.3 20 | contourpy==1.0.7 21 | cryptography==39.0.1 22 | cycler==0.11.0 23 | datasets==2.10.1 24 | debugpy==1.6.7 25 | decorator==5.1.1 26 | defusedxml==0.7.1 27 | dill==0.3.6 28 | efficientnet-pytorch==0.7.1 29 | einops==0.6.0 30 | entrypoints==0.4 31 | executing==1.2.0 32 | fastjsonschema==2.16.3 33 | filelock==3.9.0 34 | Fiona==1.9.1 35 | flit_core==3.6.0 36 | fonttools==4.39.0 37 | frozenlist==1.3.3 38 | fsspec==2023.3.0 39 | GDAL==3.6.3 40 | geopandas==0.12.2 41 | huggingface-hub==0.12.1 42 | idna==3.4 43 | imagecodecs==2023.3.16 44 | imageio==2.26.0 45 | importlib-metadata==6.0.0 46 | importlib-resources==5.12.0 47 | imutils==0.5.4 48 | ipykernel==6.22.0 49 | ipython==8.12.0 50 | ipython-genutils==0.2.0 51 | jedi==0.18.2 52 | Jinja2==3.1.2 53 | joblib==1.2.0 54 | jsonschema==4.17.3 55 | jupyter_client==8.1.0 56 | jupyter_core==5.3.0 57 | jupyter-server==1.23.6 58 | jupyterlab-pygments==0.2.2 59 | kiwisolver==1.4.4 60 | lazy_loader==0.1 61 | MarkupSafe==2.1.1 62 | matplotlib==3.7.1 63 | matplotlib-inline==0.1.6 64 | mistune==2.0.5 65 | mkl-fft==1.3.1 66 | mkl-random==1.2.2 67 | mkl-service==2.4.0 68 | multidict==6.0.4 69 | multiprocess==0.70.14 70 | munch==2.5.0 71 | nbclassic==0.5.2 72 | nbclient==0.7.2 73 | nbconvert==7.2.9 74 | nbformat==5.7.3 75 | nest-asyncio==1.5.6 76 | networkx==3.0 77 | notebook==6.5.3 78 | notebook_shim==0.2.2 79 | numpy==1.23.5 80 | opencv-python==4.7.0.72 81 | packaging==23.0 82 | pandas==1.5.3 83 | pandocfilters==1.5.0 84 | parso==0.8.3 85 | pexpect==4.8.0 86 | pickleshare==0.7.5 87 | Pillow==9.4.0 88 | pip==23.0.1 89 | pkgutil_resolve_name==1.3.10 90 | platformdirs==3.2.0 91 | potrace==0.3 92 | pretrainedmodels==0.7.4 93 | prometheus-client==0.16.0 94 | prompt-toolkit==3.0.38 95 | psutil==5.9.0 96 | ptyprocess==0.7.0 97 | pure-eval==0.2.2 98 | pyarrow==11.0.0 99 | pycparser==2.21 100 | Pygments==2.11.2 101 | pylsd-nova==1.2.0 102 | pyOpenSSL==23.0.0 103 | pyparsing==3.0.9 104 | pyproj==3.4.1 105 | pyrsistent==0.18.0 106 | PySocks==1.7.1 107 | python-dateutil==2.8.2 108 | pytz==2022.7.1 109 | PyWavelets==1.4.1 110 | PyYAML==6.0 111 | pyzmq==25.0.2 112 | requests==2.28.1 113 | responses==0.18.0 114 | scikit-image==0.20.0 115 | scikit-learn==1.2.2 116 | scipy==1.10.1 117 | segmentation-models-pytorch==0.3.2 118 | Send2Trash==1.8.0 119 | setuptools==67.6.0 120 | shapely==2.0.1 121 | six==1.16.0 122 | sniffio==1.3.0 123 | soupsieve==2.3.2.post1 124 | stack-data==0.6.2 125 | terminado==0.17.1 126 | threadpoolctl==3.1.0 127 | tifffile==2023.2.28 128 | timm==0.6.12 129 | tinycss2==1.2.1 130 | torch==1.13.1 131 | torchaudio==0.13.1 132 | torchvision==0.14.1 133 | tornado==6.2 134 | tqdm==4.65.0 135 | traitlets==5.9.0 136 | typing_extensions==4.4.0 137 | urllib3==1.26.14 138 | wcwidth==0.2.6 139 | webencodings==0.5.1 140 | websocket-client==1.5.1 141 | wheel==0.38.4 142 | xxhash==3.2.0 143 | yarl==1.8.2 144 | zipp==3.15.0 145 | -------------------------------------------------------------------------------- /trained_models/Link_to_download_trained_models.txt: -------------------------------------------------------------------------------- 1 | DOWNLOAD TRAINED MODELS FROM: 2 | https://drive.google.com/drive/folders/1P6xTVq-o_mChcaDKBYugEnAX7Q2ZAzuM?usp=sharing 3 | 4 | Have a nice day. --------------------------------------------------------------------------------