├── 01-download-and-prepare-mapAI.ipynb
├── 02-mapai-unet-regularization.ipynb
├── 03-mapai-unet-former-regularization.ipynb
├── 04-mapai-ft-unet-former-regularization.ipynb
├── 05-mapai-dcswin-regularization.ipynb
├── 06-evaluate.ipynb
├── 07-vectorize-building-footprint.ipynb
├── DATASET
    └── info.txt
├── README.md
├── models
    ├── DCSwin_model.py
    ├── FTUNetFormer_model.py
    ├── UNetFormer_model.py
    └── __pycache__
    │   ├── DCSwin_model.cpython-310.pyc
    │   ├── FTUNetFormer_model.cpython-310.pyc
    │   └── UNetFormer_model.cpython-310.pyc
├── plots
    ├── compare-1.png
    ├── compare-10.png
    ├── compare-100.png
    ├── compare-1000.png
    ├── compare-1100.png
    ├── compare-1200.png
    ├── compare-2.png
    ├── compare-22.png
    ├── compare-250.png
    ├── compare-3.png
    ├── compare-33.png
    ├── compare-4.png
    ├── compare-5.png
    ├── compare-500.png
    ├── compare-600.png
    ├── compare-750.png
    ├── compare-800.png
    ├── compare-900.png
    ├── dcswin-25-epochs.png
    ├── ft-unet-former-25-epochs.png
    ├── unet-25-epochs.png
    └── unet-former-25-epochs.png
├── predictions
    ├── bergen_-5943_1104.tif
    ├── bergen_-5944_1104.tif
    ├── bergen_-5948_1107.tif
    ├── kristiansand_-4712_-1562.tif
    ├── kristiansand_-4712_-1563.tif
    ├── kristiansand_-4712_-1568.tif
    ├── oslo_-3133_244.tif
    ├── tromso_923_11083.tif
    ├── tromso_923_11084.tif
    └── tromso_923_11086.tif
├── projectRegularization
    ├── INFO.txt
    ├── LICENSE
    ├── README.md
    ├── README.png
    ├── __pycache__
    │   ├── crf_loss.cpython-310.pyc
    │   ├── data_loader_gan.cpython-310.pyc
    │   ├── models.cpython-310.pyc
    │   ├── training_utils.cpython-310.pyc
    │   └── variables.cpython-310.pyc
    ├── crf_loss.py
    ├── data_loader_gan.py
    ├── gdal
    ├── models.py
    ├── regularize.py
    ├── train_gan_net.py
    ├── training_utils.py
    └── variables.py
├── regularizations
    ├── bergen_-5943_1104.tif
    ├── bergen_-5944_1104.tif
    ├── bergen_-5948_1107.tif
    ├── kristiansand_-4712_-1562.tif
    ├── kristiansand_-4712_-1563.tif
    ├── kristiansand_-4712_-1568.tif
    ├── oslo_-3133_244.tif
    ├── tromso_923_11083.tif
    ├── tromso_923_11084.tif
    └── tromso_923_11086.tif
├── requirements.txt
└── trained_models
    └── Link_to_download_trained_models.txt


/03-mapai-unet-former-regularization.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "attachments": {},
   5 |       "cell_type": "markdown",
   6 |       "metadata": {
   7 |         "id": "1KJnP7zSGQg5"
   8 |       },
   9 |       "source": [
  10 |         "## Binary semantic segmentation example using U-Net-Former\n",
  11 |         "Preparation of dataset and model training code from here:\n",
  12 |         "\n",
  13 |         "https://pyimagesearch.com/2021/11/08/u-net-training-image-segmentation-models-in-pytorch/"
  14 |       ]
  15 |     },
  16 |     {
  17 |       "cell_type": "code",
  18 |       "execution_count": null,
  19 |       "metadata": {
  20 |         "colab": {
  21 |           "base_uri": "https://localhost:8080/"
  22 |         },
  23 |         "id": "cFxHJWmXlcZk",
  24 |         "outputId": "b755d1a8-3650-42d9-8718-401b4458f049"
  25 |       },
  26 |       "outputs": [],
  27 |       "source": [
  28 |         "import os\n",
  29 |         "import glob\n",
  30 |         "import matplotlib.pyplot as plt\n",
  31 |         "\n",
  32 |         "import torch\n",
  33 |         "import torchvision\n",
  34 |         "from tqdm import tqdm\n",
  35 |         "\n",
  36 |         "print(torch.__version__)\n",
  37 |         "print(torchvision.__version__)\n",
  38 |         "\n",
  39 |         "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
  40 |         "print(DEVICE)\n",
  41 |         "\n",
  42 |         "# determine if we will be pinning memory during data loading\n",
  43 |         "PIN_MEMORY = True if DEVICE == \"cuda\" else False"
  44 |       ]
  45 |     },
  46 |     {
  47 |       "attachments": {},
  48 |       "cell_type": "markdown",
  49 |       "metadata": {
  50 |         "id": "KVfaGZrWG63Q"
  51 |       },
  52 |       "source": [
  53 |         "#### CONFIGURE PATHS AND HYPERPARAMETERS FOR TRAINING BELOW."
  54 |       ]
  55 |     },
  56 |     {
  57 |       "cell_type": "code",
  58 |       "execution_count": null,
  59 |       "metadata": {
  60 |         "colab": {
  61 |           "base_uri": "https://localhost:8080/",
  62 |           "height": 235
  63 |         },
  64 |         "id": "OjlBC-raVM2K",
  65 |         "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745"
  66 |       },
  67 |       "outputs": [],
  68 |       "source": [
  69 |         "GD_PATH = os.getcwd() # get current working directory for the repo\n",
  70 |         "print(GD_PATH)\n",
  71 |         "\n",
  72 |         "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n",
  73 |         "DATASET_PATH = \"/home/shymon/datasets/\"\n",
  74 |         "\n",
  75 |         "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n",
  76 |         "\n",
  77 |         "print(DATASET_PATH)\n",
  78 |         "\n",
  79 |         "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n",
  80 |         "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n",
  81 |         "\n",
  82 |         "print(TRAIN_IMG_DIR)\n",
  83 |         "print(TRAIN_MASK_DIR)\n",
  84 |         "\n",
  85 |         "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n",
  86 |         "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n",
  87 |         "\n",
  88 |         "print(VAL_IMG_DIR)\n",
  89 |         "print(VAL_MASK_DIR)\n",
  90 |         "\n",
  91 |         "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n",
  92 |         "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n",
  93 |         "\n",
  94 |         "print(TEST_IMG_DIR)\n",
  95 |         "print(TEST_MASK_DIR)\n",
  96 |         "\n",
  97 |         "# CONFIGURE MapAI DATASET\n",
  98 |         "NUM_CHANNELS = 3\n",
  99 |         "NUM_LEVELS  = 3\n",
 100 |         "NUM_CLASSES = 1\n",
 101 |         "\n",
 102 |         "# IMAGE SHAPE\n",
 103 |         "IMG_WIDTH = 512\n",
 104 |         "IMG_HEIGHT = 512\n",
 105 |         "\n",
 106 |         "#---------------------------------------------------------------------------------------------------#\n",
 107 |         "\n",
 108 |         "# CONFIGURE parameters for training\n",
 109 |         "EPOCHS = 25\n",
 110 |         "init_lr = 1e-4 # learning rate\n",
 111 |         "BATCH_SIZE = 2\n",
 112 |         "\n",
 113 |         "THRESHOLD  = 0.5\n",
 114 |         "base_output = \"out\"\n",
 115 |         "\n",
 116 |         "model_name = \"unet-former-25-epochs.pth\" # provide name for model\n",
 117 |         "training_plot_name = \"unet-former-25-epochs.png\"\n",
 118 |         "\n",
 119 |         "#---------------------------------------------------------------------------------------------------#\n",
 120 |         "\n",
 121 |         "# OUTPUT PATHS\n",
 122 |         "\n",
 123 |         "# Trained model path\n",
 124 |         "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n",
 125 |         "print(MODEL_PATH)\n",
 126 |         "PLOT_PATH  = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n",
 127 |         "print(PLOT_PATH)"
 128 |       ]
 129 |     },
 130 |     {
 131 |       "cell_type": "markdown",
 132 |       "metadata": {
 133 |         "id": "IfSMUZbWWdJn"
 134 |       },
 135 |       "source": [
 136 |         "### Load and read the MapAI dataset"
 137 |       ]
 138 |     },
 139 |     {
 140 |       "cell_type": "code",
 141 |       "execution_count": null,
 142 |       "metadata": {
 143 |         "id": "TPiACQ_6VyQP"
 144 |       },
 145 |       "outputs": [],
 146 |       "source": [
 147 |         "import tifffile\n",
 148 |         "from torch.utils.data import Dataset\n",
 149 |         "import cv2\n",
 150 |         "\n",
 151 |         "\n",
 152 |         "class mapAIdataset(Dataset):\n",
 153 |         "    def __init__(self, imagePaths, maskPaths, transforms):\n",
 154 |         "        # store the image and mask filepaths, and augmentation\n",
 155 |         "        # transforms\n",
 156 |         "        self.imagePaths = imagePaths\n",
 157 |         "        self.maskPaths = maskPaths\n",
 158 |         "        self.transforms = transforms\n",
 159 |         "        \n",
 160 |         "    def __len__(self):\n",
 161 |         "        # return the number of total samples contained in the dataset\n",
 162 |         "        return len(self.imagePaths)\n",
 163 |         "    \n",
 164 |         "    def __getitem__(self, idx):\n",
 165 |         "        # grab the image path from the current index\n",
 166 |         "        imagePath = self.imagePaths[idx]\n",
 167 |         "        # load the image from disk, swap its channels from BGR to RGB,\n",
 168 |         "        # and read the associated mask from disk\n",
 169 |         "        image = cv2.imread(imagePath)\n",
 170 |         "        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
 171 |         "        mask = tifffile.imread(self.maskPaths[idx])\n",
 172 |         "        # convert the mask to a float32 tensor with values in the range [0, 1]\n",
 173 |         "        mask = mask.astype('float32')\n",
 174 |         "        # check to see if we are applying any transformations\n",
 175 |         "        if self.transforms is not None:\n",
 176 |         "            # apply the transformations to both image and its mask\n",
 177 |         "            image = self.transforms(image)\n",
 178 |         "            mask = self.transforms(mask)\n",
 179 |         "    \n",
 180 |         "        # return a tuple of the image and its mask\n",
 181 |         "        return (image, mask)"
 182 |       ]
 183 |     },
 184 |     {
 185 |       "attachments": {},
 186 |       "cell_type": "markdown",
 187 |       "metadata": {
 188 |         "id": "AKXL9bO8WnNg"
 189 |       },
 190 |       "source": [
 191 |         "### U-Net Former architecture\n",
 192 |         "\n",
 193 |         "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/UNetFormer.py\n",
 194 |         "saved into UNetFormer.py file, from where we import the model."
 195 |       ]
 196 |     },
 197 |     {
 198 |       "cell_type": "code",
 199 |       "execution_count": null,
 200 |       "metadata": {
 201 |         "id": "9urE3W1iWp7v"
 202 |       },
 203 |       "outputs": [],
 204 |       "source": [
 205 |         "import sys\n",
 206 |         "subfolder = os.path.join(GD_PATH, \"models\")\n",
 207 |         "sys.path.insert(0, subfolder)\n",
 208 |         "\n",
 209 |         "import UNetFormer_model"
 210 |       ]
 211 |     },
 212 |     {
 213 |       "cell_type": "markdown",
 214 |       "metadata": {
 215 |         "id": "22hbANvfWxmX"
 216 |       },
 217 |       "source": [
 218 |         "### Training the segmentation model\n",
 219 |         "Below we append the paths for TRAIN/VAL/TEST sets - images/masks."
 220 |       ]
 221 |     },
 222 |     {
 223 |       "cell_type": "code",
 224 |       "execution_count": null,
 225 |       "metadata": {
 226 |         "colab": {
 227 |           "base_uri": "https://localhost:8080/"
 228 |         },
 229 |         "id": "G2Jha-LCW0ir",
 230 |         "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2"
 231 |       },
 232 |       "outputs": [],
 233 |       "source": [
 234 |         "from torch.nn import BCEWithLogitsLoss\n",
 235 |         "from torch.optim import Adam\n",
 236 |         "from torch.utils.data import DataLoader\n",
 237 |         "from imutils import paths\n",
 238 |         "import time\n",
 239 |         "\n",
 240 |         "# TRAINING\n",
 241 |         "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n",
 242 |         "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n",
 243 |         "\n",
 244 |         "# VALIDATION\n",
 245 |         "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n",
 246 |         "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n",
 247 |         "\n",
 248 |         "\n",
 249 |         "# TEST\n",
 250 |         "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n",
 251 |         "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))"
 252 |       ]
 253 |     },
 254 |     {
 255 |       "cell_type": "markdown",
 256 |       "metadata": {
 257 |         "id": "gtqUNGR1XCa5"
 258 |       },
 259 |       "source": [
 260 |         "### Define transformations\n",
 261 |         "\n",
 262 |         "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques."
 263 |       ]
 264 |     },
 265 |     {
 266 |       "attachments": {},
 267 |       "cell_type": "markdown",
 268 |       "metadata": {
 269 |         "id": "ghW7Nj0OEQMc"
 270 |       },
 271 |       "source": [
 272 |         "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n",
 273 |         "\n",
 274 |         "https://albumentations.ai/docs/getting_started/mask_augmentation/"
 275 |       ]
 276 |     },
 277 |     {
 278 |       "cell_type": "code",
 279 |       "execution_count": null,
 280 |       "metadata": {
 281 |         "colab": {
 282 |           "base_uri": "https://localhost:8080/"
 283 |         },
 284 |         "id": "WR_dzdpCXCHY",
 285 |         "outputId": "4e9b1681-2846-489f-edf6-a6240af65563"
 286 |       },
 287 |       "outputs": [],
 288 |       "source": [
 289 |         "import torchvision.transforms as T\n",
 290 |         "\n",
 291 |         "# T.RandomHorizontalFlip(p=0.5),\n",
 292 |         "# T.RandomVerticalFlip(p=0.1),\n",
 293 |         "\n",
 294 |         "# Image augmentations applied\n",
 295 |         "transforms = T.Compose([T.ToPILImage(),\n",
 296 |         "                        T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n",
 297 |         "                        T.ToTensor()])\n",
 298 |         "\n",
 299 |         "# create the train and test datasets\n",
 300 |         "trainDS = mapAIdataset(imagePaths=train_images,\n",
 301 |         "                       maskPaths=train_masks,\n",
 302 |         "                       transforms=transforms)\n",
 303 |         "\n",
 304 |         "valDS = mapAIdataset(imagePaths=val_images,\n",
 305 |         "                     maskPaths=val_masks,\n",
 306 |         "                     transforms=transforms)\n",
 307 |         "\n",
 308 |         "testDS = mapAIdataset(imagePaths=test_images,\n",
 309 |         "                      maskPaths=test_masks,\n",
 310 |         "                      transforms=transforms)\n",
 311 |         "\n",
 312 |         "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n",
 313 |         "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n",
 314 |         "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n",
 315 |         "\n",
 316 |         "# create the training and test data loaders\n",
 317 |         "trainLoader = DataLoader(trainDS,\n",
 318 |         "                         shuffle=True,\n",
 319 |         "                         batch_size=BATCH_SIZE,\n",
 320 |         "                         pin_memory=PIN_MEMORY,\n",
 321 |         "                         num_workers=os.cpu_count())\n",
 322 |         "\n",
 323 |         "valLoader = DataLoader(valDS,\n",
 324 |         "                       shuffle=False,\n",
 325 |         "                       batch_size=BATCH_SIZE,\n",
 326 |         "                       pin_memory=PIN_MEMORY,\n",
 327 |         "                       num_workers=os.cpu_count())\n",
 328 |         "\n",
 329 |         "testLoader = DataLoader(testDS,\n",
 330 |         "                        shuffle=False,\n",
 331 |         "                        batch_size=BATCH_SIZE,\n",
 332 |         "                        pin_memory=PIN_MEMORY,\n",
 333 |         "                        num_workers=os.cpu_count())"
 334 |       ]
 335 |     },
 336 |     {
 337 |       "attachments": {},
 338 |       "cell_type": "markdown",
 339 |       "metadata": {
 340 |         "id": "tAO9M_R4XG6q"
 341 |       },
 342 |       "source": [
 343 |         "### Initialize UNET-FORMER model for training"
 344 |       ]
 345 |     },
 346 |     {
 347 |       "cell_type": "code",
 348 |       "execution_count": null,
 349 |       "metadata": {
 350 |         "colab": {
 351 |           "base_uri": "https://localhost:8080/"
 352 |         },
 353 |         "id": "2IMsYzUaXJW7",
 354 |         "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315"
 355 |       },
 356 |       "outputs": [],
 357 |       "source": [
 358 |         "model = UNetFormer_model.UNetFormer().to(DEVICE)\n",
 359 |         "\n",
 360 |         "# loss / optimizer\n",
 361 |         "lossFunction = BCEWithLogitsLoss()\n",
 362 |         "opt = Adam(model.parameters(), lr=init_lr, weight_decay=0.001)\n",
 363 |         "\n",
 364 |         "# calculate steps per epoch for train/val/test\n",
 365 |         "trainSteps = len(trainDS) // BATCH_SIZE \n",
 366 |         "valSteps = len(valDS) // BATCH_SIZE\n",
 367 |         "testSteps = len(testDS) // BATCH_SIZE\n",
 368 |         "\n",
 369 |         "print(trainSteps, valSteps, testSteps)\n",
 370 |         "\n",
 371 |         "# initialize a dictionary to store training history\n",
 372 |         "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n",
 373 |         "H"
 374 |       ]
 375 |     },
 376 |     {
 377 |       "cell_type": "code",
 378 |       "execution_count": null,
 379 |       "metadata": {
 380 |         "id": "WEP-IVokbWQg"
 381 |       },
 382 |       "outputs": [],
 383 |       "source": [
 384 |         "torch.cuda.empty_cache()"
 385 |       ]
 386 |     },
 387 |     {
 388 |       "attachments": {},
 389 |       "cell_type": "markdown",
 390 |       "metadata": {
 391 |         "id": "xcjuKhMeXLU-"
 392 |       },
 393 |       "source": [
 394 |         "### TRAINING THE MODEL\n",
 395 |         "\n",
 396 |         "Run this piece of code only if you want to train the model from scratch.\n",
 397 |         "\n",
 398 |         "Training locally: BATCH_SIZE  = 2 takes 5035 MB of GPU memory.\n",
 399 |         "\n"
 400 |       ]
 401 |     },
 402 |     {
 403 |       "cell_type": "code",
 404 |       "execution_count": null,
 405 |       "metadata": {
 406 |         "colab": {
 407 |           "base_uri": "https://localhost:8080/"
 408 |         },
 409 |         "id": "vWuUyLUgXPNf",
 410 |         "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1"
 411 |       },
 412 |       "outputs": [],
 413 |       "source": [
 414 |         "# loop over epochs\n",
 415 |         "print(\"[INFO] training UNET-FORMER ...\")\n",
 416 |         "startTime = time.time()\n",
 417 |         "\n",
 418 |         "for epoch in tqdm(range(EPOCHS)):\n",
 419 |         "    model.train()\n",
 420 |         "\n",
 421 |         "    # initialize total training and validation loss\n",
 422 |         "    totalTrainLoss = 0\n",
 423 |         "    totalValLoss = 0\n",
 424 |         "    totalTrainAcc = 0\n",
 425 |         "    totalValAcc = 0\n",
 426 |         "\n",
 427 |         "    # loop over the training set\n",
 428 |         "    for (i, (x, y)) in enumerate(trainLoader):\n",
 429 |         "        # send output to device\n",
 430 |         "        (x, y) = (x.to(DEVICE), y.to(DEVICE))\n",
 431 |         "\n",
 432 |         "        # perform a forward pass and calculate the training loss\n",
 433 |         "        pred = model(x)\n",
 434 |         "        loss = lossFunction(pred, y)\n",
 435 |         "\n",
 436 |         "        # calculate the accuracy\n",
 437 |         "        acc = ((pred > 0.5) == y).float().mean()\n",
 438 |         "\n",
 439 |         "        # kill previously accumulated gradients then\n",
 440 |         "        # perform backpropagation and update model parameters\n",
 441 |         "        opt.zero_grad()\n",
 442 |         "        loss.backward()\n",
 443 |         "        opt.step()\n",
 444 |         "\n",
 445 |         "        # add the loss and accuracy to the total training loss and accuracy\n",
 446 |         "        totalTrainLoss += loss\n",
 447 |         "        totalTrainAcc += acc\n",
 448 |         "\n",
 449 |         "    # switch of autograd\n",
 450 |         "    with torch.no_grad():\n",
 451 |         "        # set the model in evaluation mode\n",
 452 |         "        model.eval()\n",
 453 |         "\n",
 454 |         "        # loop over the validation set\n",
 455 |         "        for (x, y) in valLoader:\n",
 456 |         "            # send the input to the device\n",
 457 |         "            (x, y) = (x.to(DEVICE), y.to(DEVICE))\n",
 458 |         "\n",
 459 |         "            # make the predictions and calculate the validation loss\n",
 460 |         "            pred = model(x)\n",
 461 |         "            loss = lossFunction(pred, y)\n",
 462 |         "\n",
 463 |         "            # calculate the accuracy\n",
 464 |         "            acc = ((pred > 0.5) == y).float().mean()\n",
 465 |         "\n",
 466 |         "            # add the loss and accuracy to the total validation loss and accuracy\n",
 467 |         "            totalValLoss += loss\n",
 468 |         "            totalValAcc += acc\n",
 469 |         "\n",
 470 |         "    # calculate the average training and validation loss and accuracy\n",
 471 |         "    avgTrainLoss = totalTrainLoss / trainSteps\n",
 472 |         "    avgValLoss = totalValLoss / valSteps\n",
 473 |         "    avgTrainAcc = totalTrainAcc / trainSteps\n",
 474 |         "    avgValAcc = totalValAcc / valSteps\n",
 475 |         "        \n",
 476 |         "    # update our training history\n",
 477 |         "    H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n",
 478 |         "    H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n",
 479 |         "\n",
 480 |         "    # print the model training and validation information\n",
 481 |         "    print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n",
 482 |         "    print(\"Train loss: {:.6f}, Train acc: {:.6f}, Val loss: {:.4f}, Val acc: {:.4f}\".format(\n",
 483 |         "        avgTrainLoss, avgTrainAcc, avgValLoss, avgValAcc))\n",
 484 |         "        \n",
 485 |         "# display the total time needed to perform the training\n",
 486 |         "endTime = time.time()\n",
 487 |         "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))\n",
 488 |         "    "
 489 |       ]
 490 |     },
 491 |     {
 492 |       "cell_type": "code",
 493 |       "execution_count": null,
 494 |       "metadata": {
 495 |         "colab": {
 496 |           "base_uri": "https://localhost:8080/"
 497 |         },
 498 |         "id": "CsJoOVn11rs9",
 499 |         "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc"
 500 |       },
 501 |       "outputs": [],
 502 |       "source": [
 503 |         "H # show traning/val loss history"
 504 |       ]
 505 |     },
 506 |     {
 507 |       "cell_type": "markdown",
 508 |       "metadata": {
 509 |         "id": "U6ChLXHuXZHA"
 510 |       },
 511 |       "source": [
 512 |         "### Plot the training and validation loss"
 513 |       ]
 514 |     },
 515 |     {
 516 |       "cell_type": "code",
 517 |       "execution_count": null,
 518 |       "metadata": {
 519 |         "colab": {
 520 |           "base_uri": "https://localhost:8080/",
 521 |           "height": 316
 522 |         },
 523 |         "id": "j04HfubrXYvX",
 524 |         "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50"
 525 |       },
 526 |       "outputs": [],
 527 |       "source": [
 528 |         "# plot the training loss\n",
 529 |         "print(MODEL_PATH)\n",
 530 |         "print(PLOT_PATH)\n",
 531 |         "\n",
 532 |         "plt.style.use(\"ggplot\")\n",
 533 |         "plt.figure()\n",
 534 |         "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n",
 535 |         "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n",
 536 |         "plt.title(\"Training Loss on Dataset\")\n",
 537 |         "plt.xlabel(\"Epoch #\")\n",
 538 |         "plt.ylabel(\"Loss\")\n",
 539 |         "plt.legend(loc=\"lower left\")\n",
 540 |         "plt.savefig(PLOT_PATH)\n",
 541 |         "# serialize the model to disk\n",
 542 |         "torch.save(model, MODEL_PATH) # saves the model"
 543 |       ]
 544 |     },
 545 |     {
 546 |       "cell_type": "markdown",
 547 |       "metadata": {
 548 |         "id": "5Y6Fx2oaWr0q"
 549 |       },
 550 |       "source": [
 551 |         "### Prediction part\n",
 552 |         "\n",
 553 |         "Here the trained model is loaded and use for prediction on test images."
 554 |       ]
 555 |     },
 556 |     {
 557 |       "cell_type": "code",
 558 |       "execution_count": null,
 559 |       "metadata": {
 560 |         "colab": {
 561 |           "base_uri": "https://localhost:8080/"
 562 |         },
 563 |         "id": "qYh4flMu7O-m",
 564 |         "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd"
 565 |       },
 566 |       "outputs": [],
 567 |       "source": [
 568 |         "# Load saved model for prediction\n",
 569 |         "\n",
 570 |         "print(MODEL_PATH)\n",
 571 |         "\n",
 572 |         "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n",
 573 |         "print(\"model loaded for prediction\")\n",
 574 |         "\n",
 575 |         "model"
 576 |       ]
 577 |     },
 578 |     {
 579 |       "attachments": {},
 580 |       "cell_type": "markdown",
 581 |       "metadata": {},
 582 |       "source": [
 583 |         "#### Provide test images for MapAI Dataset"
 584 |       ]
 585 |     },
 586 |     {
 587 |       "cell_type": "code",
 588 |       "execution_count": null,
 589 |       "metadata": {},
 590 |       "outputs": [],
 591 |       "source": [
 592 |         "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n",
 593 |         "PREDICTIONS_DIR"
 594 |       ]
 595 |     },
 596 |     {
 597 |       "attachments": {},
 598 |       "cell_type": "markdown",
 599 |       "metadata": {},
 600 |       "source": [
 601 |         "#### Make predictions on the entire MapAI dataset\n",
 602 |         "\n",
 603 |         "Make predictions on test images and save them to the folder named predictions."
 604 |       ]
 605 |     },
 606 |     {
 607 |       "cell_type": "code",
 608 |       "execution_count": null,
 609 |       "metadata": {
 610 |         "colab": {
 611 |           "base_uri": "https://localhost:8080/"
 612 |         },
 613 |         "id": "bq7BlbdrcgPB",
 614 |         "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1"
 615 |       },
 616 |       "outputs": [],
 617 |       "source": [
 618 |         "import random\n",
 619 |         "import gc\n",
 620 |         "from pathlib import Path\n",
 621 |         "import numpy as np\n",
 622 |         "from PIL import Image\n",
 623 |         "\n",
 624 |         "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n",
 625 |         "\n",
 626 |         "# Output folder for the predictions\n",
 627 |         "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n",
 628 |         "\n",
 629 |         "# PLOT TEST IMAGES as RGB\n",
 630 |         "for n in range(len(test_images)):\n",
 631 |         "  gc.collect()\n",
 632 |         "  # Test image number\n",
 633 |         "  testImgName = str(Path(test_images[n]).stem) + '.tif'\n",
 634 |         "  #print('#', testImgName)\n",
 635 |         "\n",
 636 |         "   # Make predicton on a test image specified with counter n\n",
 637 |         "  test_img = test_images[n]\n",
 638 |         "  test_img_input = np.expand_dims(test_img, 0)\n",
 639 |         "  #print('#', test_img_input[0])\n",
 640 |         "\n",
 641 |         "  # PyTorch --> works\n",
 642 |         "  model.eval()\n",
 643 |         "  with torch.no_grad():\n",
 644 |         "    image = cv2.imread(test_img_input[0])\n",
 645 |         "    image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n",
 646 |         "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
 647 |         "    image = image.astype(\"float32\") / 255\n",
 648 |         "    \n",
 649 |         "    # print('SIZE: ', image.shape)\n",
 650 |         "\n",
 651 |         "    # make the channel axis to be the leading one, add batch dimension\n",
 652 |         "    image = np.transpose(image, (2, 0, 1))\n",
 653 |         "    # create a PyTorch tensor\n",
 654 |         "    image = np.expand_dims(image, 0)\n",
 655 |         "    # flash the tensor to the device\n",
 656 |         "    image  = torch.from_numpy(image).to(DEVICE)\n",
 657 |         "\n",
 658 |         "    # make the prediction\n",
 659 |         "    predMask = model(image).squeeze()\n",
 660 |         "    # pass result through sigmoid\n",
 661 |         "    predMask = torch.sigmoid(predMask)\n",
 662 |         "\n",
 663 |         "    # convert result to numpy array\n",
 664 |         "    predMask = predMask.cpu().numpy()\n",
 665 |         "\n",
 666 |         "    # filter out the weak predictions and convert them to integers\n",
 667 |         "    predMask = (predMask > THRESHOLD) * 255\n",
 668 |         "    predMask = predMask.astype(np.uint8)\n",
 669 |         "\n",
 670 |         "    # generate image from array\n",
 671 |         "    pIMG = Image.fromarray(predMask)\n",
 672 |         "    pIMG.save(str(output_folder + testImgName))\n",
 673 |         "\n",
 674 |         "    print('Prediction:', testImgName, 'saved to:', output_folder)"
 675 |       ]
 676 |     },
 677 |     {
 678 |       "attachments": {},
 679 |       "cell_type": "markdown",
 680 |       "metadata": {},
 681 |       "source": [
 682 |         "#### Make predictions on single images by choice\n",
 683 |         "\n",
 684 |         "Change the parameter n to choose which image to plot."
 685 |       ]
 686 |     },
 687 |     {
 688 |       "cell_type": "code",
 689 |       "execution_count": null,
 690 |       "metadata": {},
 691 |       "outputs": [],
 692 |       "source": [
 693 |         "# ----------------------------------------------------------------------\n",
 694 |         "\n",
 695 |         "predictions = glob.glob(output_folder + \"*.tif\")\n",
 696 |         "predictions.sort()\n",
 697 |         "print(\"# IMAGES for prediction: \", len(predictions))\n",
 698 |         "print(\"Choosen n can be from 0 o 1367! \")\n",
 699 |         "\n",
 700 |         "# ----------------------------------------------------------------------\n",
 701 |         "\n",
 702 |         "n = 900 # change this number depending on which image you want to test\n",
 703 |         "\n",
 704 |         "fig = plt.figure(figsize=(18,12))\n",
 705 |         "ax1 = fig.add_subplot(131)\n",
 706 |         "\n",
 707 |         "ax1.set_title('RGB image: ')\n",
 708 |         "image = cv2.imread(test_images[n])[:,:,::-1]\n",
 709 |         "ax1.imshow(image)\n",
 710 |         "ax1.set_axis_off()\n",
 711 |         "\n",
 712 |         "ax2 = fig.add_subplot(132)\n",
 713 |         "ax2.set_title('Ground truth: ')\n",
 714 |         "image = cv2.imread(test_masks[n])[:,:,::-1]\n",
 715 |         "image *= 255\n",
 716 |         "ax2.imshow(image)\n",
 717 |         "ax2.set_axis_off()\n",
 718 |         "\n",
 719 |         "ax3 = fig.add_subplot(133)\n",
 720 |         "ax3.set_title('Prediction: ')\n",
 721 |         "image = cv2.imread(predictions[n])[:,:,::-1]\n",
 722 |         "ax3.imshow(image)\n",
 723 |         "ax3.set_axis_off()"
 724 |       ]
 725 |     },
 726 |     {
 727 |       "attachments": {},
 728 |       "cell_type": "markdown",
 729 |       "metadata": {
 730 |         "id": "Tg_0qxbcjzfw"
 731 |       },
 732 |       "source": [
 733 |         "### BUILDING FOOTPRINT REGULARIZATION\n",
 734 |         "\n",
 735 |         "Used repo: https://github.com/zorzi-s/projectRegularization\n",
 736 |         "\n",
 737 |         "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n",
 738 |         "\n",
 739 |         "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n",
 740 |         "\n",
 741 |         "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu"
 742 |       ]
 743 |     },
 744 |     {
 745 |       "attachments": {},
 746 |       "cell_type": "markdown",
 747 |       "metadata": {},
 748 |       "source": [
 749 |         "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows."
 750 |       ]
 751 |     },
 752 |     {
 753 |       "cell_type": "code",
 754 |       "execution_count": null,
 755 |       "metadata": {
 756 |         "id": "yp8uKrNUjyGn"
 757 |       },
 758 |       "outputs": [],
 759 |       "source": [
 760 |         "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n",
 761 |         "\n",
 762 |         "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n",
 763 |         "print(projectRegDir)\n",
 764 |         "\n",
 765 |         "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n",
 766 |         "print(ptw)\n",
 767 |         "\n",
 768 |         "# OUTPUT REGULARIZATIONS DIR\n",
 769 |         "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n",
 770 |         "print(REGULARIZATION_DIR)\n",
 771 |         "\n",
 772 |         "# GET THE PATHS FOR TRAINED GAN MODELS\n",
 773 |         "ENCODER = os.path.join(ptw, \"E140000_e1\")\n",
 774 |         "GENERATOR = os.path.join(ptw, \"E140000_net\")\n",
 775 |         "\n",
 776 |         "print(ENCODER)\n",
 777 |         "print(GENERATOR)"
 778 |       ]
 779 |     },
 780 |     {
 781 |       "cell_type": "code",
 782 |       "execution_count": null,
 783 |       "metadata": {},
 784 |       "outputs": [],
 785 |       "source": [
 786 |         "# CREATE A NEW variables.py WITH USERS PATHS\n",
 787 |         "\n",
 788 |         "with open(projectRegDir + 'variables.py', 'w') as f:\n",
 789 |         "    f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n",
 790 |         "    f.write('# TRAINING \\n')\n",
 791 |         "    f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n",
 792 |         "    f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n",
 793 |         "    f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n",
 794 |         "    f.write('\\n')\n",
 795 |         "    f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n",
 796 |         "    f.write('\\n')\n",
 797 |         "    f.write('# INFERENCE \\n')\n",
 798 |         "    f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n",
 799 |         "    f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n",
 800 |         "    f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n",
 801 |         "    f.write('\\n')\n",
 802 |         "    f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n",
 803 |         "    f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n",
 804 |         "    f.close()\n",
 805 |         " \n",
 806 |         "print(\"variables.py created with users paths...\")\n"
 807 |       ]
 808 |     },
 809 |     {
 810 |       "attachments": {},
 811 |       "cell_type": "markdown",
 812 |       "metadata": {},
 813 |       "source": [
 814 |         "#### Run projectRegularization\n",
 815 |         "\n",
 816 |         "Takes around 6-8 minutes.\n",
 817 |         "\n",
 818 |         "You only need to change the command below and replace it with the absolute path for regularize.py"
 819 |       ]
 820 |     },
 821 |     {
 822 |       "cell_type": "code",
 823 |       "execution_count": null,
 824 |       "metadata": {},
 825 |       "outputs": [],
 826 |       "source": [
 827 |         "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py"
 828 |       ]
 829 |     },
 830 |     {
 831 |       "attachments": {},
 832 |       "cell_type": "markdown",
 833 |       "metadata": {},
 834 |       "source": [
 835 |         "### Compare predictions and regularizations on a single image"
 836 |       ]
 837 |     },
 838 |     {
 839 |       "cell_type": "code",
 840 |       "execution_count": null,
 841 |       "metadata": {
 842 |         "colab": {
 843 |           "base_uri": "https://localhost:8080/"
 844 |         },
 845 |         "id": "89nW6Q7F6aga",
 846 |         "outputId": "976d62a2-76a7-4b52-a4bc-218f63d8a122"
 847 |       },
 848 |       "outputs": [],
 849 |       "source": [
 850 |         "# Read Regularizations to plot and compare results\n",
 851 |         "\n",
 852 |         "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n",
 853 |         "regularizations.sort()\n",
 854 |         "\n",
 855 |         "print(\"# of predicted images: \", len(predictions))\n",
 856 |         "print(\"# of regularized images: \", len(regularizations))"
 857 |       ]
 858 |     },
 859 |     {
 860 |       "attachments": {},
 861 |       "cell_type": "markdown",
 862 |       "metadata": {},
 863 |       "source": [
 864 |         "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n",
 865 |         "\n",
 866 |         "Change parameter n accordingly."
 867 |       ]
 868 |     },
 869 |     {
 870 |       "cell_type": "code",
 871 |       "execution_count": null,
 872 |       "metadata": {},
 873 |       "outputs": [],
 874 |       "source": [
 875 |         "n = 600\n",
 876 |         "\n",
 877 |         "fig = plt.figure(figsize=(18,12))\n",
 878 |         "ax1 = fig.add_subplot(141)\n",
 879 |         "\n",
 880 |         "ax1.set_title('RGB: ')\n",
 881 |         "image = cv2.imread(test_images[n])[:,:,::-1]\n",
 882 |         "ax1.imshow(image)\n",
 883 |         "ax1.set_axis_off()\n",
 884 |         "\n",
 885 |         "ax2 = fig.add_subplot(142)\n",
 886 |         "ax2.set_title('Ground truth: ')\n",
 887 |         "image = cv2.imread(test_masks[n])[:,:,::-1]\n",
 888 |         "image *= 255\n",
 889 |         "ax2.imshow(image)\n",
 890 |         "ax2.set_axis_off()\n",
 891 |         "\n",
 892 |         "ax3 = fig.add_subplot(143)\n",
 893 |         "ax3.set_title('Prediction: ')\n",
 894 |         "image = cv2.imread(predictions[n])[:,:,::-1]\n",
 895 |         "ax3.imshow(image)\n",
 896 |         "ax3.set_axis_off()\n",
 897 |         "\n",
 898 |         "ax4 = fig.add_subplot(144)\n",
 899 |         "ax4.set_title('Regularization: ')\n",
 900 |         "image = cv2.imread(regularizations[n])[:,:,::-1]\n",
 901 |         "ax4.imshow(image)\n",
 902 |         "ax4.set_axis_off()\n",
 903 |         "\n",
 904 |         "# DEFINE PATH FOR PLOTS TO BE SAVED\n",
 905 |         "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n",
 906 |         "print(figPath)\n",
 907 |         "\n",
 908 |         "# Save plot\n",
 909 |         "fig.savefig(figPath)"
 910 |       ]
 911 |     },
 912 |     {
 913 |       "attachments": {},
 914 |       "cell_type": "markdown",
 915 |       "metadata": {},
 916 |       "source": [
 917 |         "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n",
 918 |         "\n",
 919 |         "GDAL: https://gdal.org/'\n",
 920 |         "\n",
 921 |         "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n",
 922 |         "\n",
 923 |         "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n",
 924 |         "\n",
 925 |         "On Ubuntu you have to follow these steps:\n",
 926 |         "\n",
 927 |         "\n",
 928 |         "\n",
 929 |         "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n",
 930 |         "\n",
 931 |         "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n",
 932 |         "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n",
 933 |         "- python -m pip install --upgrade pip setuptools wheel\n",
 934 |         "- python -m pip install --upgrade gdal\n",
 935 |         "- conda install -c conda forge libgdal\n",
 936 |         "- conda install -c conda-forge libgdal\n",
 937 |         "- conda install -c conda-forge gdal\n",
 938 |         "- conda install tiledb=2.2\n",
 939 |         "- conda install poppler\n",
 940 |         "\n",
 941 |         "When you have this you can hopefully vectorize the detected masks quite easily."
 942 |       ]
 943 |     },
 944 |     {
 945 |       "cell_type": "code",
 946 |       "execution_count": null,
 947 |       "metadata": {},
 948 |       "outputs": [],
 949 |       "source": [
 950 |         "def get_fname_from_path(path):\n",
 951 |         "    \"\"\"\n",
 952 |         "    Given a path, returns the filename after the last frontslash character.\n",
 953 |         "    \"\"\"\n",
 954 |         "    return path.rsplit('/', 1)[-1]\n",
 955 |         "\n",
 956 |         "def get_fname_no_extension(path):\n",
 957 |         "    \"\"\"\n",
 958 |         "    Given a path, returns the filename without its extension.\n",
 959 |         "    \"\"\"\n",
 960 |         "    filename, extension = os.path.splitext(path)\n",
 961 |         "    return filename"
 962 |       ]
 963 |     },
 964 |     {
 965 |       "cell_type": "code",
 966 |       "execution_count": null,
 967 |       "metadata": {
 968 |         "id": "TDWUhUkJaYl8"
 969 |       },
 970 |       "outputs": [],
 971 |       "source": [
 972 |         "import osgeo\n",
 973 |         "from osgeo import gdal\n",
 974 |         "from osgeo import ogr\n",
 975 |         "print('GDAL version: ', osgeo.gdal.__version__)\n",
 976 |         "\n",
 977 |         "# Choose which image to vectorize\n",
 978 |         "n  = 0\n",
 979 |         "\n",
 980 |         "input = regularizations[n]\n",
 981 |         "print()\n",
 982 |         "print(\"INPUT: \", input)\n",
 983 |         "\n",
 984 |         "# print(get_fname_no_extension(input))\n",
 985 |         "\n",
 986 |         "# out\n",
 987 |         "output = get_fname_from_path(get_fname_no_extension(input)) + \".gpkg\"\n",
 988 |         "print(\"OUTPUT: \", output)\n",
 989 |         "\n",
 990 |         "# Open image with GDAl driver\n",
 991 |         "ds = gdal.Open(input)\n",
 992 |         "# Get the band\n",
 993 |         "band = ds.GetRasterBand(1)\n",
 994 |         "\n",
 995 |         "# Create the output shapefile\n",
 996 |         "driver = ogr.GetDriverByName(\"GPKG\")\n",
 997 |         "out_ds = driver.CreateDataSource(output)\n",
 998 |         "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n",
 999 |         "\n",
1000 |         "# Add a field to the layer to store the pixel values\n",
1001 |         "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n",
1002 |         "out_layer.CreateField(field_defn)\n",
1003 |         "\n",
1004 |         "# Polygonize the PNG file\n",
1005 |         "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n",
1006 |         "\n",
1007 |         "# Close the input and output files\n",
1008 |         "out_ds = None\n",
1009 |         "ds = None"
1010 |       ]
1011 |     },
1012 |     {
1013 |       "attachments": {},
1014 |       "cell_type": "markdown",
1015 |       "metadata": {},
1016 |       "source": [
1017 |         "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below."
1018 |       ]
1019 |     },
1020 |     {
1021 |       "cell_type": "code",
1022 |       "execution_count": null,
1023 |       "metadata": {},
1024 |       "outputs": [],
1025 |       "source": [
1026 |         "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n",
1027 |         "\n",
1028 |         "# RUN from the command line inside Ubuntu\n",
1029 |         "# Change name of input and output according to user needs\n",
1030 |         "\n",
1031 |         "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg"
1032 |       ]
1033 |     }
1034 |   ],
1035 |   "metadata": {
1036 |     "accelerator": "TPU",
1037 |     "colab": {
1038 |       "provenance": []
1039 |     },
1040 |     "gpuClass": "premium",
1041 |     "kernelspec": {
1042 |       "display_name": "torch",
1043 |       "language": "python",
1044 |       "name": "python3"
1045 |     },
1046 |     "language_info": {
1047 |       "codemirror_mode": {
1048 |         "name": "ipython",
1049 |         "version": 3
1050 |       },
1051 |       "file_extension": ".py",
1052 |       "mimetype": "text/x-python",
1053 |       "name": "python",
1054 |       "nbconvert_exporter": "python",
1055 |       "pygments_lexer": "ipython3",
1056 |       "version": "3.10.9"
1057 |     }
1058 |   },
1059 |   "nbformat": 4,
1060 |   "nbformat_minor": 0
1061 | }
1062 | 


--------------------------------------------------------------------------------
/04-mapai-ft-unet-former-regularization.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "attachments": {},
   5 |       "cell_type": "markdown",
   6 |       "metadata": {
   7 |         "id": "1KJnP7zSGQg5"
   8 |       },
   9 |       "source": [
  10 |         "## Binary semantic segmentation example using FT-U-Net-Former\n",
  11 |         "Preparation of dataset and model training code from here:\n",
  12 |         "\n",
  13 |         "https://pyimagesearch.com/2021/11/08/u-net-training-image-segmentation-models-in-pytorch/"
  14 |       ]
  15 |     },
  16 |     {
  17 |       "cell_type": "code",
  18 |       "execution_count": null,
  19 |       "metadata": {
  20 |         "colab": {
  21 |           "base_uri": "https://localhost:8080/"
  22 |         },
  23 |         "id": "cFxHJWmXlcZk",
  24 |         "outputId": "b755d1a8-3650-42d9-8718-401b4458f049"
  25 |       },
  26 |       "outputs": [],
  27 |       "source": [
  28 |         "import os\n",
  29 |         "import glob\n",
  30 |         "import matplotlib.pyplot as plt\n",
  31 |         "\n",
  32 |         "import torch\n",
  33 |         "import torchvision\n",
  34 |         "from tqdm import tqdm\n",
  35 |         "\n",
  36 |         "print(torch.__version__)\n",
  37 |         "print(torchvision.__version__)\n",
  38 |         "\n",
  39 |         "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
  40 |         "print(DEVICE)\n",
  41 |         "\n",
  42 |         "# determine if we will be pinning memory during data loading\n",
  43 |         "PIN_MEMORY = True if DEVICE == \"cuda\" else False"
  44 |       ]
  45 |     },
  46 |     {
  47 |       "cell_type": "markdown",
  48 |       "metadata": {
  49 |         "id": "KVfaGZrWG63Q"
  50 |       },
  51 |       "source": [
  52 |         "#### CONFIGURE YOUR PATHS AND HYPERPARAMETERS FOR TRAINING BELOW."
  53 |       ]
  54 |     },
  55 |     {
  56 |       "cell_type": "code",
  57 |       "execution_count": null,
  58 |       "metadata": {
  59 |         "colab": {
  60 |           "base_uri": "https://localhost:8080/",
  61 |           "height": 235
  62 |         },
  63 |         "id": "OjlBC-raVM2K",
  64 |         "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745"
  65 |       },
  66 |       "outputs": [],
  67 |       "source": [
  68 |         "GD_PATH = os.getcwd() # get current working directory for the repo\n",
  69 |         "print(GD_PATH)\n",
  70 |         "\n",
  71 |         "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n",
  72 |         "DATASET_PATH = \"/home/shymon/datasets/\"\n",
  73 |         "\n",
  74 |         "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n",
  75 |         "\n",
  76 |         "print(DATASET_PATH)\n",
  77 |         "\n",
  78 |         "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n",
  79 |         "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n",
  80 |         "\n",
  81 |         "print(TRAIN_IMG_DIR)\n",
  82 |         "print(TRAIN_MASK_DIR)\n",
  83 |         "\n",
  84 |         "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n",
  85 |         "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n",
  86 |         "\n",
  87 |         "print(VAL_IMG_DIR)\n",
  88 |         "print(VAL_MASK_DIR)\n",
  89 |         "\n",
  90 |         "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n",
  91 |         "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n",
  92 |         "\n",
  93 |         "print(TEST_IMG_DIR)\n",
  94 |         "print(TEST_MASK_DIR)\n",
  95 |         "\n",
  96 |         "# CONFIGURE MapAI DATASET\n",
  97 |         "NUM_CHANNELS = 3\n",
  98 |         "NUM_LEVELS  = 3\n",
  99 |         "NUM_CLASSES = 1\n",
 100 |         "\n",
 101 |         "# IMAGE SHAPE\n",
 102 |         "IMG_WIDTH = 512\n",
 103 |         "IMG_HEIGHT = 512\n",
 104 |         "\n",
 105 |         "#---------------------------------------------------------------------------------------------------#\n",
 106 |         "\n",
 107 |         "# CONFIGURE parameters for training\n",
 108 |         "EPOCHS = 25\n",
 109 |         "init_lr = 1e-4 # learning rate\n",
 110 |         "BATCH_SIZE = 2\n",
 111 |         "\n",
 112 |         "THRESHOLD  = 0.5\n",
 113 |         "base_output = \"out\"\n",
 114 |         "\n",
 115 |         "model_name = \"ft-unet-former-25-epochs.pth\" # provide name for model\n",
 116 |         "training_plot_name = \"ft-unet-former-25-epochs.png\"\n",
 117 |         "\n",
 118 |         "#---------------------------------------------------------------------------------------------------#\n",
 119 |         "\n",
 120 |         "# OUTPUT PATHS\n",
 121 |         "\n",
 122 |         "# Trained model path\n",
 123 |         "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n",
 124 |         "print(MODEL_PATH)\n",
 125 |         "PLOT_PATH  = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n",
 126 |         "print(PLOT_PATH)"
 127 |       ]
 128 |     },
 129 |     {
 130 |       "cell_type": "markdown",
 131 |       "metadata": {
 132 |         "id": "IfSMUZbWWdJn"
 133 |       },
 134 |       "source": [
 135 |         "### Load and read the MapAI dataset"
 136 |       ]
 137 |     },
 138 |     {
 139 |       "cell_type": "code",
 140 |       "execution_count": null,
 141 |       "metadata": {
 142 |         "id": "TPiACQ_6VyQP"
 143 |       },
 144 |       "outputs": [],
 145 |       "source": [
 146 |         "import tifffile\n",
 147 |         "from torch.utils.data import Dataset\n",
 148 |         "import cv2\n",
 149 |         "\n",
 150 |         "\n",
 151 |         "class mapAIdataset(Dataset):\n",
 152 |         "    def __init__(self, imagePaths, maskPaths, transforms):\n",
 153 |         "        # store the image and mask filepaths, and augmentation\n",
 154 |         "        # transforms\n",
 155 |         "        self.imagePaths = imagePaths\n",
 156 |         "        self.maskPaths = maskPaths\n",
 157 |         "        self.transforms = transforms\n",
 158 |         "        \n",
 159 |         "    def __len__(self):\n",
 160 |         "        # return the number of total samples contained in the dataset\n",
 161 |         "        return len(self.imagePaths)\n",
 162 |         "    \n",
 163 |         "    def __getitem__(self, idx):\n",
 164 |         "        # grab the image path from the current index\n",
 165 |         "        imagePath = self.imagePaths[idx]\n",
 166 |         "        # load the image from disk, swap its channels from BGR to RGB,\n",
 167 |         "        # and read the associated mask from disk\n",
 168 |         "        image = cv2.imread(imagePath)\n",
 169 |         "        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
 170 |         "        mask = tifffile.imread(self.maskPaths[idx])\n",
 171 |         "        # convert the mask to a float32 tensor with values in the range [0, 1]\n",
 172 |         "        mask = mask.astype('float32')\n",
 173 |         "        # check to see if we are applying any transformations\n",
 174 |         "        if self.transforms is not None:\n",
 175 |         "            # apply the transformations to both image and its mask\n",
 176 |         "            image = self.transforms(image)\n",
 177 |         "            mask = self.transforms(mask)\n",
 178 |         "    \n",
 179 |         "        # return a tuple of the image and its mask\n",
 180 |         "        return (image, mask)"
 181 |       ]
 182 |     },
 183 |     {
 184 |       "attachments": {},
 185 |       "cell_type": "markdown",
 186 |       "metadata": {
 187 |         "id": "AKXL9bO8WnNg"
 188 |       },
 189 |       "source": [
 190 |         "### Build FT-UNET-FORMER architecture\n",
 191 |         "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/FTUNetFormer.py"
 192 |       ]
 193 |     },
 194 |     {
 195 |       "cell_type": "code",
 196 |       "execution_count": null,
 197 |       "metadata": {
 198 |         "id": "9urE3W1iWp7v"
 199 |       },
 200 |       "outputs": [],
 201 |       "source": [
 202 |         "import sys\n",
 203 |         "subfolder = os.path.join(GD_PATH, \"models\")\n",
 204 |         "sys.path.insert(0, subfolder)\n",
 205 |         "\n",
 206 |         "import FTUNetFormer_model"
 207 |       ]
 208 |     },
 209 |     {
 210 |       "cell_type": "markdown",
 211 |       "metadata": {
 212 |         "id": "22hbANvfWxmX"
 213 |       },
 214 |       "source": [
 215 |         "### Training the segmentation model\n",
 216 |         "Below we append the paths for TRAIN/VAL/TEST sets - images/masks."
 217 |       ]
 218 |     },
 219 |     {
 220 |       "cell_type": "code",
 221 |       "execution_count": null,
 222 |       "metadata": {
 223 |         "colab": {
 224 |           "base_uri": "https://localhost:8080/"
 225 |         },
 226 |         "id": "G2Jha-LCW0ir",
 227 |         "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2"
 228 |       },
 229 |       "outputs": [],
 230 |       "source": [
 231 |         "from torch.nn import BCEWithLogitsLoss\n",
 232 |         "from torch.optim import Adam\n",
 233 |         "from torch.utils.data import DataLoader\n",
 234 |         "from imutils import paths\n",
 235 |         "import time\n",
 236 |         "\n",
 237 |         "# TRAINING\n",
 238 |         "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n",
 239 |         "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n",
 240 |         "\n",
 241 |         "# VALIDATION\n",
 242 |         "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n",
 243 |         "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n",
 244 |         "\n",
 245 |         "\n",
 246 |         "# TEST\n",
 247 |         "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n",
 248 |         "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))"
 249 |       ]
 250 |     },
 251 |     {
 252 |       "cell_type": "markdown",
 253 |       "metadata": {
 254 |         "id": "gtqUNGR1XCa5"
 255 |       },
 256 |       "source": [
 257 |         "### Define transformations\n",
 258 |         "\n",
 259 |         "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques."
 260 |       ]
 261 |     },
 262 |     {
 263 |       "attachments": {},
 264 |       "cell_type": "markdown",
 265 |       "metadata": {
 266 |         "id": "ghW7Nj0OEQMc"
 267 |       },
 268 |       "source": [
 269 |         "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n",
 270 |         "\n",
 271 |         "https://albumentations.ai/docs/getting_started/mask_augmentation/"
 272 |       ]
 273 |     },
 274 |     {
 275 |       "cell_type": "code",
 276 |       "execution_count": null,
 277 |       "metadata": {
 278 |         "colab": {
 279 |           "base_uri": "https://localhost:8080/"
 280 |         },
 281 |         "id": "WR_dzdpCXCHY",
 282 |         "outputId": "4e9b1681-2846-489f-edf6-a6240af65563"
 283 |       },
 284 |       "outputs": [],
 285 |       "source": [
 286 |         "import torchvision.transforms as T\n",
 287 |         "\n",
 288 |         "# T.RandomHorizontalFlip(p=0.5),\n",
 289 |         "# T.RandomVerticalFlip(p=0.1),\n",
 290 |         "\n",
 291 |         "# Image augmentations applied\n",
 292 |         "transforms = T.Compose([T.ToPILImage(),\n",
 293 |         "                        T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n",
 294 |         "                        T.ToTensor()])\n",
 295 |         "\n",
 296 |         "# create the train and test datasets\n",
 297 |         "trainDS = mapAIdataset(imagePaths=train_images,\n",
 298 |         "                       maskPaths=train_masks,\n",
 299 |         "                       transforms=transforms)\n",
 300 |         "\n",
 301 |         "valDS = mapAIdataset(imagePaths=val_images,\n",
 302 |         "                     maskPaths=val_masks,\n",
 303 |         "                     transforms=transforms)\n",
 304 |         "\n",
 305 |         "testDS = mapAIdataset(imagePaths=test_images,\n",
 306 |         "                      maskPaths=test_masks,\n",
 307 |         "                      transforms=transforms)\n",
 308 |         "\n",
 309 |         "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n",
 310 |         "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n",
 311 |         "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n",
 312 |         "\n",
 313 |         "# create the training and test data loaders\n",
 314 |         "trainLoader = DataLoader(trainDS,\n",
 315 |         "                         shuffle=True,\n",
 316 |         "                         batch_size=BATCH_SIZE,\n",
 317 |         "                         pin_memory=PIN_MEMORY,\n",
 318 |         "                         num_workers=os.cpu_count())\n",
 319 |         "\n",
 320 |         "valLoader = DataLoader(valDS,\n",
 321 |         "                       shuffle=False,\n",
 322 |         "                       batch_size=BATCH_SIZE,\n",
 323 |         "                       pin_memory=PIN_MEMORY,\n",
 324 |         "                       num_workers=os.cpu_count())\n",
 325 |         "\n",
 326 |         "testLoader = DataLoader(testDS,\n",
 327 |         "                        shuffle=False,\n",
 328 |         "                        batch_size=BATCH_SIZE,\n",
 329 |         "                        pin_memory=PIN_MEMORY,\n",
 330 |         "                        num_workers=os.cpu_count())"
 331 |       ]
 332 |     },
 333 |     {
 334 |       "cell_type": "markdown",
 335 |       "metadata": {
 336 |         "id": "tAO9M_R4XG6q"
 337 |       },
 338 |       "source": [
 339 |         "### Initialize UNET model for training\n",
 340 |         "\n",
 341 |         "Here we initialize the defined UNET model for training and calculate the steps per epoch for train/val/test set."
 342 |       ]
 343 |     },
 344 |     {
 345 |       "cell_type": "code",
 346 |       "execution_count": null,
 347 |       "metadata": {
 348 |         "colab": {
 349 |           "base_uri": "https://localhost:8080/"
 350 |         },
 351 |         "id": "2IMsYzUaXJW7",
 352 |         "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315"
 353 |       },
 354 |       "outputs": [],
 355 |       "source": [
 356 |         "model = FTUNetFormer_model.FTUNetFormer().to(DEVICE)\n",
 357 |         "\n",
 358 |         "# loss / optimizer\n",
 359 |         "lossFunction = BCEWithLogitsLoss()\n",
 360 |         "opt = Adam(model.parameters(), lr=init_lr, weight_decay=0.001)\n",
 361 |         "\n",
 362 |         "# calculate steps per epoch for train/val/test\n",
 363 |         "trainSteps = len(trainDS) // BATCH_SIZE \n",
 364 |         "valSteps = len(valDS) // BATCH_SIZE\n",
 365 |         "testSteps = len(testDS) // BATCH_SIZE\n",
 366 |         "\n",
 367 |         "print(trainSteps, valSteps, testSteps)\n",
 368 |         "\n",
 369 |         "# initialize a dictionary to store training history\n",
 370 |         "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n",
 371 |         "H"
 372 |       ]
 373 |     },
 374 |     {
 375 |       "cell_type": "code",
 376 |       "execution_count": null,
 377 |       "metadata": {
 378 |         "id": "WEP-IVokbWQg"
 379 |       },
 380 |       "outputs": [],
 381 |       "source": [
 382 |         "torch.cuda.empty_cache() # PyTorch thing to empty cache"
 383 |       ]
 384 |     },
 385 |     {
 386 |       "attachments": {},
 387 |       "cell_type": "markdown",
 388 |       "metadata": {
 389 |         "id": "xcjuKhMeXLU-"
 390 |       },
 391 |       "source": [
 392 |         "### TRAINING THE MODEL\n",
 393 |         "\n",
 394 |         "Run this piece of code only if you want to train the model from scratch.\n",
 395 |         "\n",
 396 |         "Training locally: BATCH_SIZE  = 2 takes 5035 MB of GPU memory.\n",
 397 |         "\n"
 398 |       ]
 399 |     },
 400 |     {
 401 |       "cell_type": "code",
 402 |       "execution_count": null,
 403 |       "metadata": {
 404 |         "colab": {
 405 |           "base_uri": "https://localhost:8080/"
 406 |         },
 407 |         "id": "vWuUyLUgXPNf",
 408 |         "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1"
 409 |       },
 410 |       "outputs": [],
 411 |       "source": [
 412 |         "# loop over epochs\n",
 413 |         "print(\"[INFO] training UNET ...\")\n",
 414 |         "startTime = time.time()\n",
 415 |         "\n",
 416 |         "for epoch in tqdm(range(EPOCHS)):\n",
 417 |         "    model.train()\n",
 418 |         "\n",
 419 |         "    # initialize total training and validation loss\n",
 420 |         "    totalTrainLoss = 0\n",
 421 |         "    totalValLoss = 0\n",
 422 |         "    totalTrainAcc = 0\n",
 423 |         "    totalValAcc = 0\n",
 424 |         "\n",
 425 |         "    # loop over the training set\n",
 426 |         "    for (i, (x, y)) in enumerate(trainLoader):\n",
 427 |         "        # send output to device\n",
 428 |         "        (x, y) = (x.to(DEVICE), y.to(DEVICE))\n",
 429 |         "\n",
 430 |         "        # perform a forward pass and calculate the training loss\n",
 431 |         "        pred = model(x)\n",
 432 |         "        loss = lossFunction(pred, y)\n",
 433 |         "\n",
 434 |         "        # calculate the accuracy\n",
 435 |         "        acc = ((pred > 0.5) == y).float().mean()\n",
 436 |         "\n",
 437 |         "        # kill previously accumulated gradients then\n",
 438 |         "        # perform backpropagation and update model parameters\n",
 439 |         "        opt.zero_grad()\n",
 440 |         "        loss.backward()\n",
 441 |         "        opt.step()\n",
 442 |         "\n",
 443 |         "        # add the loss and accuracy to the total training loss and accuracy\n",
 444 |         "        totalTrainLoss += loss\n",
 445 |         "        totalTrainAcc += acc\n",
 446 |         "\n",
 447 |         "    # switch of autograd\n",
 448 |         "    with torch.no_grad():\n",
 449 |         "        # set the model in evaluation mode\n",
 450 |         "        model.eval()\n",
 451 |         "\n",
 452 |         "        # loop over the validation set\n",
 453 |         "        for (x, y) in valLoader:\n",
 454 |         "            # send the input to the device\n",
 455 |         "            (x, y) = (x.to(DEVICE), y.to(DEVICE))\n",
 456 |         "\n",
 457 |         "            # make the predictions and calculate the validation loss\n",
 458 |         "            pred = model(x)\n",
 459 |         "            loss = lossFunction(pred, y)\n",
 460 |         "\n",
 461 |         "            # calculate the accuracy\n",
 462 |         "            acc = ((pred > 0.5) == y).float().mean()\n",
 463 |         "\n",
 464 |         "            # add the loss and accuracy to the total validation loss and accuracy\n",
 465 |         "            totalValLoss += loss\n",
 466 |         "            totalValAcc += acc\n",
 467 |         "\n",
 468 |         "    # calculate the average training and validation loss and accuracy\n",
 469 |         "    avgTrainLoss = totalTrainLoss / trainSteps\n",
 470 |         "    avgValLoss = totalValLoss / valSteps\n",
 471 |         "    avgTrainAcc = totalTrainAcc / trainSteps\n",
 472 |         "    avgValAcc = totalValAcc / valSteps\n",
 473 |         "        \n",
 474 |         "    # update our training history\n",
 475 |         "    H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n",
 476 |         "    H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n",
 477 |         "\n",
 478 |         "    # print the model training and validation information\n",
 479 |         "    print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n",
 480 |         "    print(\"Train loss: {:.6f}, Train acc: {:.6f}, Val loss: {:.4f}, Val acc: {:.4f}\".format(\n",
 481 |         "        avgTrainLoss, avgTrainAcc, avgValLoss, avgValAcc))\n",
 482 |         "        \n",
 483 |         "# display the total time needed to perform the training\n",
 484 |         "endTime = time.time()\n",
 485 |         "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))\n",
 486 |         "    "
 487 |       ]
 488 |     },
 489 |     {
 490 |       "attachments": {},
 491 |       "cell_type": "markdown",
 492 |       "metadata": {},
 493 |       "source": [
 494 |         "Train loss: 0.001194, Val loss: 0.0013\n",
 495 |         "[INFO] total time taken to train the model: 27115.38s"
 496 |       ]
 497 |     },
 498 |     {
 499 |       "cell_type": "code",
 500 |       "execution_count": null,
 501 |       "metadata": {
 502 |         "colab": {
 503 |           "base_uri": "https://localhost:8080/"
 504 |         },
 505 |         "id": "CsJoOVn11rs9",
 506 |         "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc"
 507 |       },
 508 |       "outputs": [],
 509 |       "source": [
 510 |         "H # show traning/val loss history"
 511 |       ]
 512 |     },
 513 |     {
 514 |       "cell_type": "markdown",
 515 |       "metadata": {
 516 |         "id": "U6ChLXHuXZHA"
 517 |       },
 518 |       "source": [
 519 |         "### Plot the training and validation loss"
 520 |       ]
 521 |     },
 522 |     {
 523 |       "cell_type": "code",
 524 |       "execution_count": null,
 525 |       "metadata": {
 526 |         "colab": {
 527 |           "base_uri": "https://localhost:8080/",
 528 |           "height": 316
 529 |         },
 530 |         "id": "j04HfubrXYvX",
 531 |         "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50"
 532 |       },
 533 |       "outputs": [],
 534 |       "source": [
 535 |         "# plot the training loss\n",
 536 |         "print(MODEL_PATH)\n",
 537 |         "print(PLOT_PATH)\n",
 538 |         "\n",
 539 |         "\n",
 540 |         "plt.style.use(\"ggplot\")\n",
 541 |         "plt.figure()\n",
 542 |         "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n",
 543 |         "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n",
 544 |         "plt.title(\"Training Loss on Dataset\")\n",
 545 |         "plt.xlabel(\"Epoch #\")\n",
 546 |         "plt.ylabel(\"Loss\")\n",
 547 |         "plt.legend(loc=\"lower left\")\n",
 548 |         "plt.savefig(PLOT_PATH)\n",
 549 |         "# serialize the model to disk\n",
 550 |         "torch.save(model, MODEL_PATH) # saves the model"
 551 |       ]
 552 |     },
 553 |     {
 554 |       "attachments": {},
 555 |       "cell_type": "markdown",
 556 |       "metadata": {
 557 |         "id": "5Y6Fx2oaWr0q"
 558 |       },
 559 |       "source": [
 560 |         "### Prediction part\n",
 561 |         "\n",
 562 |         "Here the trained model is loaded and use for prediction on test images."
 563 |       ]
 564 |     },
 565 |     {
 566 |       "cell_type": "code",
 567 |       "execution_count": null,
 568 |       "metadata": {
 569 |         "colab": {
 570 |           "base_uri": "https://localhost:8080/"
 571 |         },
 572 |         "id": "qYh4flMu7O-m",
 573 |         "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd"
 574 |       },
 575 |       "outputs": [],
 576 |       "source": [
 577 |         "# Load saved model for prediction\n",
 578 |         "\n",
 579 |         "print(MODEL_PATH)\n",
 580 |         "\n",
 581 |         "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n",
 582 |         "print(\"model loaded for prediction\")\n",
 583 |         "\n",
 584 |         "model"
 585 |       ]
 586 |     },
 587 |     {
 588 |       "attachments": {},
 589 |       "cell_type": "markdown",
 590 |       "metadata": {},
 591 |       "source": [
 592 |         "#### Provide test images for MapAI Dataset"
 593 |       ]
 594 |     },
 595 |     {
 596 |       "cell_type": "code",
 597 |       "execution_count": null,
 598 |       "metadata": {},
 599 |       "outputs": [],
 600 |       "source": [
 601 |         "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n",
 602 |         "PREDICTIONS_DIR"
 603 |       ]
 604 |     },
 605 |     {
 606 |       "attachments": {},
 607 |       "cell_type": "markdown",
 608 |       "metadata": {},
 609 |       "source": [
 610 |         "#### Make predictions on the entire MapAI dataset\n",
 611 |         "\n",
 612 |         "Make predictions on test images and save them to the folder named predictions."
 613 |       ]
 614 |     },
 615 |     {
 616 |       "cell_type": "code",
 617 |       "execution_count": null,
 618 |       "metadata": {},
 619 |       "outputs": [],
 620 |       "source": [
 621 |         "import random\n",
 622 |         "import gc\n",
 623 |         "from pathlib import Path\n",
 624 |         "import numpy as np\n",
 625 |         "from PIL import Image\n",
 626 |         "\n",
 627 |         "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n",
 628 |         "\n",
 629 |         "# Output folder for the predictions\n",
 630 |         "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n",
 631 |         "\n",
 632 |         "# PLOT TEST IMAGES as RGB\n",
 633 |         "for n in range(len(test_images)):\n",
 634 |         "  gc.collect()\n",
 635 |         "  # Test image number\n",
 636 |         "  testImgName = str(Path(test_images[n]).stem) + '.tif'\n",
 637 |         "  #print('#', testImgName)\n",
 638 |         "\n",
 639 |         "   # Make predicton on a test image specified with counter n\n",
 640 |         "  test_img = test_images[n]\n",
 641 |         "  test_img_input = np.expand_dims(test_img, 0)\n",
 642 |         "  #print('#', test_img_input[0])\n",
 643 |         "\n",
 644 |         "  # PyTorch --> works\n",
 645 |         "  model.eval()\n",
 646 |         "  with torch.no_grad():\n",
 647 |         "    image = cv2.imread(test_img_input[0])\n",
 648 |         "    image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n",
 649 |         "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
 650 |         "    image = image.astype(\"float32\") / 255\n",
 651 |         "    \n",
 652 |         "    # print('SIZE: ', image.shape)\n",
 653 |         "\n",
 654 |         "    # make the channel axis to be the leading one, add batch dimension\n",
 655 |         "    image = np.transpose(image, (2, 0, 1))\n",
 656 |         "    # create a PyTorch tensor\n",
 657 |         "    image = np.expand_dims(image, 0)\n",
 658 |         "    # flash the tensor to the device\n",
 659 |         "    image  = torch.from_numpy(image).to(DEVICE)\n",
 660 |         "\n",
 661 |         "    # make the prediction\n",
 662 |         "    predMask = model(image).squeeze()\n",
 663 |         "    # pass result through sigmoid\n",
 664 |         "    predMask = torch.sigmoid(predMask)\n",
 665 |         "\n",
 666 |         "    # convert result to numpy array\n",
 667 |         "    predMask = predMask.cpu().numpy()\n",
 668 |         "\n",
 669 |         "    # filter out the weak predictions and convert them to integers\n",
 670 |         "    predMask = (predMask > THRESHOLD) * 255\n",
 671 |         "    predMask = predMask.astype(np.uint8)\n",
 672 |         "\n",
 673 |         "    # generate image from array\n",
 674 |         "    pIMG = Image.fromarray(predMask)\n",
 675 |         "    pIMG.save(str(output_folder + testImgName))\n",
 676 |         "\n",
 677 |         "    print('Prediction:', testImgName, 'saved to:', output_folder)"
 678 |       ]
 679 |     },
 680 |     {
 681 |       "attachments": {},
 682 |       "cell_type": "markdown",
 683 |       "metadata": {},
 684 |       "source": [
 685 |         "#### Make predictions on single images by choice\n",
 686 |         "\n",
 687 |         "Change the parameter n to choose which image to plot."
 688 |       ]
 689 |     },
 690 |     {
 691 |       "cell_type": "code",
 692 |       "execution_count": null,
 693 |       "metadata": {
 694 |         "colab": {
 695 |           "base_uri": "https://localhost:8080/"
 696 |         },
 697 |         "id": "bq7BlbdrcgPB",
 698 |         "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1"
 699 |       },
 700 |       "outputs": [],
 701 |       "source": [
 702 |         "# ----------------------------------------------------------------------\n",
 703 |         "output_folder = PREDICTIONS_DIR + \"/\"\n",
 704 |         "predictions = glob.glob(output_folder + \"*.tif\")\n",
 705 |         "predictions.sort()\n",
 706 |         "print(\"# IMAGES for prediction: \", len(predictions))\n",
 707 |         "print(\"Choosen n can be from 0 to 1367! \")\n",
 708 |         "\n",
 709 |         "# ----------------------------------------------------------------------\n",
 710 |         "\n",
 711 |         "n = 900 # change this number depending on which image you want to test\n",
 712 |         "\n",
 713 |         "fig = plt.figure(figsize=(18,12))\n",
 714 |         "ax1 = fig.add_subplot(131)\n",
 715 |         "\n",
 716 |         "ax1.set_title('RGB image: ')\n",
 717 |         "image = cv2.imread(test_images[n])[:,:,::-1]\n",
 718 |         "ax1.imshow(image)\n",
 719 |         "ax1.set_axis_off()\n",
 720 |         "\n",
 721 |         "ax2 = fig.add_subplot(132)\n",
 722 |         "ax2.set_title('Ground truth: ')\n",
 723 |         "image = cv2.imread(test_masks[n])[:,:,::-1]\n",
 724 |         "image *= 255\n",
 725 |         "ax2.imshow(image)\n",
 726 |         "ax2.set_axis_off()\n",
 727 |         "\n",
 728 |         "ax3 = fig.add_subplot(133)\n",
 729 |         "ax3.set_title('Prediction: ')\n",
 730 |         "image = cv2.imread(predictions[n])[:,:,::-1]\n",
 731 |         "ax3.imshow(image)\n",
 732 |         "ax3.set_axis_off()"
 733 |       ]
 734 |     },
 735 |     {
 736 |       "attachments": {},
 737 |       "cell_type": "markdown",
 738 |       "metadata": {},
 739 |       "source": [
 740 |         "### BUILDING FOOTPRINT REGULARIZATION\n",
 741 |         "\n",
 742 |         "Used repo: https://github.com/zorzi-s/projectRegularization\n",
 743 |         "\n",
 744 |         "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n",
 745 |         "\n",
 746 |         "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n",
 747 |         "\n",
 748 |         "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu\n",
 749 |         "\n",
 750 |         "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows."
 751 |       ]
 752 |     },
 753 |     {
 754 |       "cell_type": "code",
 755 |       "execution_count": null,
 756 |       "metadata": {},
 757 |       "outputs": [],
 758 |       "source": [
 759 |         "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n",
 760 |         "\n",
 761 |         "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n",
 762 |         "print(projectRegDir)\n",
 763 |         "\n",
 764 |         "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n",
 765 |         "print(ptw)\n",
 766 |         "\n",
 767 |         "# OUTPUT REGULARIZATIONS DIR\n",
 768 |         "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n",
 769 |         "print(REGULARIZATION_DIR)\n",
 770 |         "\n",
 771 |         "# GET THE PATHS FOR TRAINED GAN MODELS\n",
 772 |         "ENCODER = os.path.join(ptw, \"E140000_e1\")\n",
 773 |         "GENERATOR = os.path.join(ptw, \"E140000_net\")\n",
 774 |         "\n",
 775 |         "print(ENCODER)\n",
 776 |         "print(GENERATOR)"
 777 |       ]
 778 |     },
 779 |     {
 780 |       "cell_type": "code",
 781 |       "execution_count": null,
 782 |       "metadata": {},
 783 |       "outputs": [],
 784 |       "source": [
 785 |         "# CREATE A NEW variables.py WITH USERS PATHS\n",
 786 |         "\n",
 787 |         "with open(projectRegDir + 'variables.py', 'w') as f:\n",
 788 |         "    f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n",
 789 |         "    f.write('# TRAINING \\n')\n",
 790 |         "    f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n",
 791 |         "    f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n",
 792 |         "    f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n",
 793 |         "    f.write('\\n')\n",
 794 |         "    f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n",
 795 |         "    f.write('\\n')\n",
 796 |         "    f.write('# INFERENCE \\n')\n",
 797 |         "    f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n",
 798 |         "    f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n",
 799 |         "    f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n",
 800 |         "    f.write('\\n')\n",
 801 |         "    f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n",
 802 |         "    f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n",
 803 |         "    f.close()\n",
 804 |         " \n",
 805 |         "print(\"variables.py created with users paths...\")\n"
 806 |       ]
 807 |     },
 808 |     {
 809 |       "attachments": {},
 810 |       "cell_type": "markdown",
 811 |       "metadata": {},
 812 |       "source": [
 813 |         "#### Run projectRegularization\n",
 814 |         "\n",
 815 |         "Takes around 6-8 minutes.\n",
 816 |         "\n",
 817 |         "You only need to change the command below and replace it with the absolute path for regularize.py"
 818 |       ]
 819 |     },
 820 |     {
 821 |       "cell_type": "code",
 822 |       "execution_count": null,
 823 |       "metadata": {},
 824 |       "outputs": [],
 825 |       "source": [
 826 |         "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py"
 827 |       ]
 828 |     },
 829 |     {
 830 |       "attachments": {},
 831 |       "cell_type": "markdown",
 832 |       "metadata": {},
 833 |       "source": [
 834 |         "### Compare predictions and regularizations on a single image"
 835 |       ]
 836 |     },
 837 |     {
 838 |       "cell_type": "code",
 839 |       "execution_count": null,
 840 |       "metadata": {},
 841 |       "outputs": [],
 842 |       "source": [
 843 |         "# Read Regularizations to plot and compare results\n",
 844 |         "\n",
 845 |         "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n",
 846 |         "regularizations.sort()\n",
 847 |         "\n",
 848 |         "print(\"# of predicted images: \", len(predictions))\n",
 849 |         "print(\"# of regularized images: \", len(regularizations))"
 850 |       ]
 851 |     },
 852 |     {
 853 |       "attachments": {},
 854 |       "cell_type": "markdown",
 855 |       "metadata": {},
 856 |       "source": [
 857 |         "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n",
 858 |         "\n",
 859 |         "Change parameter n accordingly."
 860 |       ]
 861 |     },
 862 |     {
 863 |       "cell_type": "code",
 864 |       "execution_count": null,
 865 |       "metadata": {},
 866 |       "outputs": [],
 867 |       "source": [
 868 |         "n = 600\n",
 869 |         "\n",
 870 |         "fig = plt.figure(figsize=(18,12))\n",
 871 |         "ax1 = fig.add_subplot(141)\n",
 872 |         "\n",
 873 |         "ax1.set_title('RGB: ')\n",
 874 |         "image = cv2.imread(test_images[n])[:,:,::-1]\n",
 875 |         "ax1.imshow(image)\n",
 876 |         "ax1.set_axis_off()\n",
 877 |         "\n",
 878 |         "ax2 = fig.add_subplot(142)\n",
 879 |         "ax2.set_title('Ground truth: ')\n",
 880 |         "image = cv2.imread(test_masks[n])[:,:,::-1]\n",
 881 |         "image *= 255\n",
 882 |         "ax2.imshow(image)\n",
 883 |         "ax2.set_axis_off()\n",
 884 |         "\n",
 885 |         "ax3 = fig.add_subplot(143)\n",
 886 |         "ax3.set_title('Prediction: ')\n",
 887 |         "image = cv2.imread(predictions[n])[:,:,::-1]\n",
 888 |         "ax3.imshow(image)\n",
 889 |         "ax3.set_axis_off()\n",
 890 |         "\n",
 891 |         "ax4 = fig.add_subplot(144)\n",
 892 |         "ax4.set_title('Regularization: ')\n",
 893 |         "image = cv2.imread(regularizations[n])[:,:,::-1]\n",
 894 |         "ax4.imshow(image)\n",
 895 |         "ax4.set_axis_off()\n",
 896 |         "\n",
 897 |         "# DEFINE PATH FOR PLOTS TO BE SAVED\n",
 898 |         "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n",
 899 |         "print(figPath)\n",
 900 |         "\n",
 901 |         "# Save plot\n",
 902 |         "fig.savefig(figPath)"
 903 |       ]
 904 |     },
 905 |     {
 906 |       "attachments": {},
 907 |       "cell_type": "markdown",
 908 |       "metadata": {},
 909 |       "source": [
 910 |         "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n",
 911 |         "\n",
 912 |         "GDAL: https://gdal.org/'\n",
 913 |         "\n",
 914 |         "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n",
 915 |         "\n",
 916 |         "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n",
 917 |         "\n",
 918 |         "On Ubuntu you have to follow these steps:\n",
 919 |         "\n",
 920 |         "\n",
 921 |         "\n",
 922 |         "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n",
 923 |         "\n",
 924 |         "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n",
 925 |         "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n",
 926 |         "- python -m pip install --upgrade pip setuptools wheel\n",
 927 |         "- python -m pip install --upgrade gdal\n",
 928 |         "- conda install -c conda forge libgdal\n",
 929 |         "- conda install -c conda-forge libgdal\n",
 930 |         "- conda install -c conda-forge gdal\n",
 931 |         "- conda install tiledb=2.2\n",
 932 |         "- conda install poppler\n",
 933 |         "\n",
 934 |         "When you have this you can hopefully vectorize the detected masks quite easily."
 935 |       ]
 936 |     },
 937 |     {
 938 |       "cell_type": "code",
 939 |       "execution_count": null,
 940 |       "metadata": {},
 941 |       "outputs": [],
 942 |       "source": [
 943 |         "def get_filename_from_path(path):\n",
 944 |         "    \"\"\"\n",
 945 |         "    Given a path, returns the filename after the last frontslash character.\n",
 946 |         "    \"\"\"\n",
 947 |         "    return path.rsplit('/', 1)[-1]\n",
 948 |         "\n",
 949 |         "def get_fname_no_extension(path):\n",
 950 |         "    \"\"\"\n",
 951 |         "    Given a path, returns the filename without its extension.\n",
 952 |         "    \"\"\"\n",
 953 |         "    filename, extension = os.path.splitext(path)\n",
 954 |         "    return filename"
 955 |       ]
 956 |     },
 957 |     {
 958 |       "cell_type": "code",
 959 |       "execution_count": null,
 960 |       "metadata": {},
 961 |       "outputs": [],
 962 |       "source": [
 963 |         "import osgeo\n",
 964 |         "from osgeo import gdal\n",
 965 |         "from osgeo import ogr\n",
 966 |         "print('GDAL version: ', osgeo.gdal.__version__)\n",
 967 |         "\n",
 968 |         "# Choose which image to vectorize\n",
 969 |         "n  = 0\n",
 970 |         "\n",
 971 |         "input = regularizations[n]\n",
 972 |         "print()\n",
 973 |         "print(\"INPUT: \", input)\n",
 974 |         "\n",
 975 |         "# print(get_fname_no_extension(input))\n",
 976 |         "\n",
 977 |         "# out\n",
 978 |         "output = get_filename_from_path(get_fname_no_extension(input)) + \".gpkg\"\n",
 979 |         "print(\"OUTPUT: \", output)\n",
 980 |         "\n",
 981 |         "# Open image with GDAl driver\n",
 982 |         "ds = gdal.Open(input)\n",
 983 |         "# Get the band\n",
 984 |         "band = ds.GetRasterBand(1)\n",
 985 |         "\n",
 986 |         "# Create the output shapefile\n",
 987 |         "driver = ogr.GetDriverByName(\"GPKG\")\n",
 988 |         "out_ds = driver.CreateDataSource(output)\n",
 989 |         "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n",
 990 |         "\n",
 991 |         "# Add a field to the layer to store the pixel values\n",
 992 |         "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n",
 993 |         "out_layer.CreateField(field_defn)\n",
 994 |         "\n",
 995 |         "# Polygonize the PNG file\n",
 996 |         "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n",
 997 |         "\n",
 998 |         "# Close the input and output files\n",
 999 |         "out_ds = None\n",
1000 |         "ds = None"
1001 |       ]
1002 |     },
1003 |     {
1004 |       "attachments": {},
1005 |       "cell_type": "markdown",
1006 |       "metadata": {},
1007 |       "source": [
1008 |         "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below."
1009 |       ]
1010 |     },
1011 |     {
1012 |       "cell_type": "code",
1013 |       "execution_count": null,
1014 |       "metadata": {},
1015 |       "outputs": [],
1016 |       "source": [
1017 |         "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n",
1018 |         "\n",
1019 |         "# RUN from the command line inside Ubuntu\n",
1020 |         "# Change name of input and output according to user needs\n",
1021 |         "\n",
1022 |         "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg"
1023 |       ]
1024 |     }
1025 |   ],
1026 |   "metadata": {
1027 |     "accelerator": "TPU",
1028 |     "colab": {
1029 |       "provenance": []
1030 |     },
1031 |     "gpuClass": "premium",
1032 |     "kernelspec": {
1033 |       "display_name": "torch",
1034 |       "language": "python",
1035 |       "name": "python3"
1036 |     },
1037 |     "language_info": {
1038 |       "codemirror_mode": {
1039 |         "name": "ipython",
1040 |         "version": 3
1041 |       },
1042 |       "file_extension": ".py",
1043 |       "mimetype": "text/x-python",
1044 |       "name": "python",
1045 |       "nbconvert_exporter": "python",
1046 |       "pygments_lexer": "ipython3",
1047 |       "version": "3.10.9"
1048 |     },
1049 |     "vscode": {
1050 |       "interpreter": {
1051 |         "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c"
1052 |       }
1053 |     }
1054 |   },
1055 |   "nbformat": 4,
1056 |   "nbformat_minor": 0
1057 | }
1058 | 


--------------------------------------------------------------------------------
/05-mapai-dcswin-regularization.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "attachments": {},
   5 |       "cell_type": "markdown",
   6 |       "metadata": {
   7 |         "id": "1KJnP7zSGQg5"
   8 |       },
   9 |       "source": [
  10 |         "## Binary semantic segmentation example using DCSwin"
  11 |       ]
  12 |     },
  13 |     {
  14 |       "cell_type": "code",
  15 |       "execution_count": null,
  16 |       "metadata": {
  17 |         "colab": {
  18 |           "base_uri": "https://localhost:8080/"
  19 |         },
  20 |         "id": "cFxHJWmXlcZk",
  21 |         "outputId": "b755d1a8-3650-42d9-8718-401b4458f049"
  22 |       },
  23 |       "outputs": [],
  24 |       "source": [
  25 |         "import os\n",
  26 |         "import glob\n",
  27 |         "import matplotlib.pyplot as plt\n",
  28 |         "\n",
  29 |         "import torch\n",
  30 |         "import torchvision\n",
  31 |         "from tqdm import tqdm\n",
  32 |         "\n",
  33 |         "print(torch.__version__)\n",
  34 |         "print(torchvision.__version__)\n",
  35 |         "\n",
  36 |         "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
  37 |         "print(DEVICE)\n",
  38 |         "\n",
  39 |         "# determine if we will be pinning memory during data loading\n",
  40 |         "PIN_MEMORY = True if DEVICE == \"cuda\" else False"
  41 |       ]
  42 |     },
  43 |     {
  44 |       "cell_type": "markdown",
  45 |       "metadata": {
  46 |         "id": "KVfaGZrWG63Q"
  47 |       },
  48 |       "source": [
  49 |         "#### CONFIGURE YOUR PATHS AND HYPERPARAMETERS FOR TRAINING BELOW."
  50 |       ]
  51 |     },
  52 |     {
  53 |       "cell_type": "code",
  54 |       "execution_count": null,
  55 |       "metadata": {
  56 |         "colab": {
  57 |           "base_uri": "https://localhost:8080/",
  58 |           "height": 235
  59 |         },
  60 |         "id": "OjlBC-raVM2K",
  61 |         "outputId": "b6950142-9c43-40bd-9edb-e10b3973f745"
  62 |       },
  63 |       "outputs": [],
  64 |       "source": [
  65 |         "GD_PATH = os.getcwd() # get current working directory for the repo\n",
  66 |         "print(GD_PATH)\n",
  67 |         "\n",
  68 |         "# PROVIDE PATH TO DOWNLOADED MAPAI DATASET\n",
  69 |         "DATASET_PATH = \"/home/shymon/datasets/\"\n",
  70 |         "\n",
  71 |         "DATASET_PATH = os.path.join(DATASET_PATH, \"mapai_full\") # create dataset path\n",
  72 |         "\n",
  73 |         "print(DATASET_PATH)\n",
  74 |         "\n",
  75 |         "TRAIN_IMG_DIR = os.path.join(DATASET_PATH, \"train\", \"images\")\n",
  76 |         "TRAIN_MASK_DIR = os.path.join(DATASET_PATH, \"train\", \"masks\")\n",
  77 |         "\n",
  78 |         "print(TRAIN_IMG_DIR)\n",
  79 |         "print(TRAIN_MASK_DIR)\n",
  80 |         "\n",
  81 |         "VAL_IMG_DIR = os.path.join(DATASET_PATH, \"validation\", \"images\")\n",
  82 |         "VAL_MASK_DIR = os.path.join(DATASET_PATH, \"validation\", \"masks\")\n",
  83 |         "\n",
  84 |         "print(VAL_IMG_DIR)\n",
  85 |         "print(VAL_MASK_DIR)\n",
  86 |         "\n",
  87 |         "TEST_IMG_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"images\")\n",
  88 |         "TEST_MASK_DIR = os.path.join(DATASET_PATH, \"task1_test\", \"masks\")\n",
  89 |         "\n",
  90 |         "print(TEST_IMG_DIR)\n",
  91 |         "print(TEST_MASK_DIR)\n",
  92 |         "\n",
  93 |         "# CONFIGURE MapAI DATASET\n",
  94 |         "NUM_CHANNELS = 3\n",
  95 |         "NUM_LEVELS  = 3\n",
  96 |         "NUM_CLASSES = 1\n",
  97 |         "\n",
  98 |         "# IMAGE SHAPE\n",
  99 |         "IMG_WIDTH = 512\n",
 100 |         "IMG_HEIGHT = 512\n",
 101 |         "\n",
 102 |         "#---------------------------------------------------------------------------------------------------#\n",
 103 |         "\n",
 104 |         "# CONFIGURE parameters for training\n",
 105 |         "\n",
 106 |         "EPOCHS = 25\n",
 107 |         "init_lr = 1e-4 # learning rate\n",
 108 |         "BATCH_SIZE = 2\n",
 109 |         "\n",
 110 |         "THRESHOLD  = 0.5\n",
 111 |         "base_output = \"out\"\n",
 112 |         "\n",
 113 |         "model_name = \"dcswin-25-epochs.pth\" # provide name for model\n",
 114 |         "training_plot_name = \"dcswin-25-epochs.png\"\n",
 115 |         "\n",
 116 |         "#---------------------------------------------------------------------------------------------------#\n",
 117 |         "\n",
 118 |         "# OUTPUT PATHS\n",
 119 |         "\n",
 120 |         "# Trained model path\n",
 121 |         "MODEL_PATH = os.path.join(GD_PATH, \"trained_models\", model_name) # change depending on the number of epochs\n",
 122 |         "print(MODEL_PATH)\n",
 123 |         "PLOT_PATH  = os.path.join(GD_PATH, \"plots\", training_plot_name) # the folder to save future plots\n",
 124 |         "print(PLOT_PATH)"
 125 |       ]
 126 |     },
 127 |     {
 128 |       "cell_type": "markdown",
 129 |       "metadata": {
 130 |         "id": "IfSMUZbWWdJn"
 131 |       },
 132 |       "source": [
 133 |         "### Load and read the MapAI dataset"
 134 |       ]
 135 |     },
 136 |     {
 137 |       "cell_type": "code",
 138 |       "execution_count": null,
 139 |       "metadata": {
 140 |         "id": "TPiACQ_6VyQP"
 141 |       },
 142 |       "outputs": [],
 143 |       "source": [
 144 |         "import tifffile\n",
 145 |         "from torch.utils.data import Dataset\n",
 146 |         "import cv2\n",
 147 |         "\n",
 148 |         "\n",
 149 |         "class mapAIdataset(Dataset):\n",
 150 |         "    def __init__(self, imagePaths, maskPaths, transforms):\n",
 151 |         "        # store the image and mask filepaths, and augmentation\n",
 152 |         "        # transforms\n",
 153 |         "        self.imagePaths = imagePaths\n",
 154 |         "        self.maskPaths = maskPaths\n",
 155 |         "        self.transforms = transforms\n",
 156 |         "        \n",
 157 |         "    def __len__(self):\n",
 158 |         "        # return the number of total samples contained in the dataset\n",
 159 |         "        return len(self.imagePaths)\n",
 160 |         "    \n",
 161 |         "    def __getitem__(self, idx):\n",
 162 |         "        # grab the image path from the current index\n",
 163 |         "        imagePath = self.imagePaths[idx]\n",
 164 |         "        # load the image from disk, swap its channels from BGR to RGB,\n",
 165 |         "        # and read the associated mask from disk\n",
 166 |         "        image = cv2.imread(imagePath)\n",
 167 |         "        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
 168 |         "        mask = tifffile.imread(self.maskPaths[idx])\n",
 169 |         "        # convert the mask to a float32 tensor with values in the range [0, 1]\n",
 170 |         "        mask = mask.astype('float32')\n",
 171 |         "        # check to see if we are applying any transformations\n",
 172 |         "        if self.transforms is not None:\n",
 173 |         "            # apply the transformations to both image and its mask\n",
 174 |         "            image = self.transforms(image)\n",
 175 |         "            mask = self.transforms(mask)\n",
 176 |         "    \n",
 177 |         "        # return a tuple of the image and its mask\n",
 178 |         "        return (image, mask)"
 179 |       ]
 180 |     },
 181 |     {
 182 |       "attachments": {},
 183 |       "cell_type": "markdown",
 184 |       "metadata": {
 185 |         "id": "AKXL9bO8WnNg"
 186 |       },
 187 |       "source": [
 188 |         "### Build DCSWIN architecture\n",
 189 |         "Downloaded from: https://github.com/WangLibo1995/GeoSeg/blob/main/geoseg/models/FTUNetFormer.py code changed for binary semantic segmentation."
 190 |       ]
 191 |     },
 192 |     {
 193 |       "cell_type": "code",
 194 |       "execution_count": null,
 195 |       "metadata": {
 196 |         "id": "9urE3W1iWp7v"
 197 |       },
 198 |       "outputs": [],
 199 |       "source": [
 200 |         "import sys\n",
 201 |         "subfolder = os.path.join(GD_PATH, \"models\")\n",
 202 |         "sys.path.insert(0, subfolder)\n",
 203 |         "\n",
 204 |         "import DCSwin_model"
 205 |       ]
 206 |     },
 207 |     {
 208 |       "cell_type": "markdown",
 209 |       "metadata": {
 210 |         "id": "22hbANvfWxmX"
 211 |       },
 212 |       "source": [
 213 |         "### Training the segmentation model\n",
 214 |         "Below we append the paths for TRAIN/VAL/TEST sets - images/masks."
 215 |       ]
 216 |     },
 217 |     {
 218 |       "cell_type": "code",
 219 |       "execution_count": null,
 220 |       "metadata": {
 221 |         "colab": {
 222 |           "base_uri": "https://localhost:8080/"
 223 |         },
 224 |         "id": "G2Jha-LCW0ir",
 225 |         "outputId": "fcad4c67-0851-42e6-ea68-6b6dd88d26c2"
 226 |       },
 227 |       "outputs": [],
 228 |       "source": [
 229 |         "from torch.nn import BCEWithLogitsLoss\n",
 230 |         "from torch.optim import Adam\n",
 231 |         "from torch.utils.data import DataLoader\n",
 232 |         "from imutils import paths\n",
 233 |         "import time\n",
 234 |         "\n",
 235 |         "# TRAINING\n",
 236 |         "train_images = sorted(list(paths.list_images(TRAIN_IMG_DIR)))\n",
 237 |         "train_masks = sorted(list(paths.list_images(TRAIN_MASK_DIR)))\n",
 238 |         "\n",
 239 |         "# VALIDATION\n",
 240 |         "val_images = sorted(list(paths.list_images(VAL_IMG_DIR)))\n",
 241 |         "val_masks = sorted(list(paths.list_images(VAL_MASK_DIR)))\n",
 242 |         "\n",
 243 |         "\n",
 244 |         "# TEST\n",
 245 |         "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n",
 246 |         "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))"
 247 |       ]
 248 |     },
 249 |     {
 250 |       "cell_type": "markdown",
 251 |       "metadata": {
 252 |         "id": "gtqUNGR1XCa5"
 253 |       },
 254 |       "source": [
 255 |         "### Define transformations\n",
 256 |         "\n",
 257 |         "I tried out different data augmentation techniques, including Horizontal Flip, Vertical Flip, Contrast, Brightness. They did not improve my results much, the validation and training loss were actually worse than without data augmentation techniques."
 258 |       ]
 259 |     },
 260 |     {
 261 |       "attachments": {},
 262 |       "cell_type": "markdown",
 263 |       "metadata": {
 264 |         "id": "ghW7Nj0OEQMc"
 265 |       },
 266 |       "source": [
 267 |         "https://pytorch.org/vision/stable/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py\n",
 268 |         "\n",
 269 |         "https://albumentations.ai/docs/getting_started/mask_augmentation/"
 270 |       ]
 271 |     },
 272 |     {
 273 |       "cell_type": "code",
 274 |       "execution_count": null,
 275 |       "metadata": {
 276 |         "colab": {
 277 |           "base_uri": "https://localhost:8080/"
 278 |         },
 279 |         "id": "WR_dzdpCXCHY",
 280 |         "outputId": "4e9b1681-2846-489f-edf6-a6240af65563"
 281 |       },
 282 |       "outputs": [],
 283 |       "source": [
 284 |         "import torchvision.transforms as T\n",
 285 |         "\n",
 286 |         "# T.RandomHorizontalFlip(p=0.5),\n",
 287 |         "# T.RandomVerticalFlip(p=0.1),\n",
 288 |         "\n",
 289 |         "# Image augmentations applied\n",
 290 |         "transforms = T.Compose([T.ToPILImage(),\n",
 291 |         "                        T.Resize((IMG_HEIGHT,IMG_WIDTH)),\n",
 292 |         "                        T.ToTensor()])\n",
 293 |         "\n",
 294 |         "# create the train and test datasets\n",
 295 |         "trainDS = mapAIdataset(imagePaths=train_images,\n",
 296 |         "                       maskPaths=train_masks,\n",
 297 |         "                       transforms=transforms)\n",
 298 |         "\n",
 299 |         "valDS = mapAIdataset(imagePaths=val_images,\n",
 300 |         "                     maskPaths=val_masks,\n",
 301 |         "                     transforms=transforms)\n",
 302 |         "\n",
 303 |         "testDS = mapAIdataset(imagePaths=test_images,\n",
 304 |         "                      maskPaths=test_masks,\n",
 305 |         "                      transforms=transforms)\n",
 306 |         "\n",
 307 |         "print(f\"[INFO] found {len(trainDS)} examples in the TRAINING set...\")\n",
 308 |         "print(f\"[INFO] found {len(valDS)} examples in the VALIDATION set...\")\n",
 309 |         "print(f\"[INFO] found {len(testDS)} examples in the TEST set...\")\n",
 310 |         "\n",
 311 |         "# create the training and test data loaders\n",
 312 |         "trainLoader = DataLoader(trainDS,\n",
 313 |         "                         shuffle=True,\n",
 314 |         "                         batch_size=BATCH_SIZE,\n",
 315 |         "                         pin_memory=PIN_MEMORY,\n",
 316 |         "                         num_workers=os.cpu_count())\n",
 317 |         "\n",
 318 |         "valLoader = DataLoader(valDS,\n",
 319 |         "                       shuffle=False,\n",
 320 |         "                       batch_size=BATCH_SIZE,\n",
 321 |         "                       pin_memory=PIN_MEMORY,\n",
 322 |         "                       num_workers=os.cpu_count())\n",
 323 |         "\n",
 324 |         "testLoader = DataLoader(testDS,\n",
 325 |         "                        shuffle=False,\n",
 326 |         "                        batch_size=BATCH_SIZE,\n",
 327 |         "                        pin_memory=PIN_MEMORY,\n",
 328 |         "                        num_workers=os.cpu_count())"
 329 |       ]
 330 |     },
 331 |     {
 332 |       "attachments": {},
 333 |       "cell_type": "markdown",
 334 |       "metadata": {
 335 |         "id": "tAO9M_R4XG6q"
 336 |       },
 337 |       "source": [
 338 |         "### Initialize DCSWIN model for training\n"
 339 |       ]
 340 |     },
 341 |     {
 342 |       "cell_type": "code",
 343 |       "execution_count": null,
 344 |       "metadata": {
 345 |         "colab": {
 346 |           "base_uri": "https://localhost:8080/"
 347 |         },
 348 |         "id": "2IMsYzUaXJW7",
 349 |         "outputId": "5328f1e2-4a71-4c05-fa9e-23b768971315"
 350 |       },
 351 |       "outputs": [],
 352 |       "source": [
 353 |         "model = DCSwin_model.DCSwin().to(DEVICE)\n",
 354 |         "\n",
 355 |         "# loss / optimizer\n",
 356 |         "lossFunction = BCEWithLogitsLoss()\n",
 357 |         "opt = Adam(model.parameters(), lr=init_lr)\n",
 358 |         "\n",
 359 |         "# calculate steps per epoch for train/val/test\n",
 360 |         "trainSteps = len(trainDS) // BATCH_SIZE \n",
 361 |         "valSteps = len(valDS) // BATCH_SIZE\n",
 362 |         "testSteps = len(testDS) // BATCH_SIZE\n",
 363 |         "\n",
 364 |         "print(trainSteps, valSteps, testSteps)\n",
 365 |         "\n",
 366 |         "# initialize a dictionary to store training history\n",
 367 |         "H = {\"train_loss\": [], \"val_loss\": [], \"test_loss\": []}\n",
 368 |         "H"
 369 |       ]
 370 |     },
 371 |     {
 372 |       "cell_type": "code",
 373 |       "execution_count": null,
 374 |       "metadata": {
 375 |         "id": "WEP-IVokbWQg"
 376 |       },
 377 |       "outputs": [],
 378 |       "source": [
 379 |         "torch.cuda.empty_cache()"
 380 |       ]
 381 |     },
 382 |     {
 383 |       "attachments": {},
 384 |       "cell_type": "markdown",
 385 |       "metadata": {
 386 |         "id": "xcjuKhMeXLU-"
 387 |       },
 388 |       "source": [
 389 |         "### TRAINING THE MODEL\n",
 390 |         "\n",
 391 |         "Run this piece of code only if you want to train the model from scratch.\n",
 392 |         "\n",
 393 |         "Training locally: BATCH_SIZE  = 2 takes 5035 MB of GPU memory.\n",
 394 |         "\n"
 395 |       ]
 396 |     },
 397 |     {
 398 |       "cell_type": "code",
 399 |       "execution_count": null,
 400 |       "metadata": {
 401 |         "colab": {
 402 |           "base_uri": "https://localhost:8080/"
 403 |         },
 404 |         "id": "vWuUyLUgXPNf",
 405 |         "outputId": "34c6485b-838b-45cf-99da-601f7cfbc4d1"
 406 |       },
 407 |       "outputs": [],
 408 |       "source": [
 409 |         "# loop over epochs\n",
 410 |         "print(\"[INFO] training DCSwin ...\")\n",
 411 |         "startTime = time.time()\n",
 412 |         "\n",
 413 |         "for epoch in tqdm(range(EPOCHS)):\n",
 414 |         "    model.train()\n",
 415 |         "\n",
 416 |         "    # initialize total training and validation loss\n",
 417 |         "    totalTrainLoss = 0\n",
 418 |         "    totalValLoss = 0\n",
 419 |         "\n",
 420 |         "    # loop over the training set\n",
 421 |         "    for (i, (x, y)) in enumerate(trainLoader):\n",
 422 |         "        # send output to device\n",
 423 |         "        (x, y) = (x.to(DEVICE), y.to(DEVICE))\n",
 424 |         "\n",
 425 |         "        # perform a forward pass and calculate the training loss\n",
 426 |         "        pred = model(x)\n",
 427 |         "        loss = lossFunction(pred, y)\n",
 428 |         "\n",
 429 |         "        # kill previously accumulated gradients then\n",
 430 |         "        # perform backpropagation and update model parameters\n",
 431 |         "        opt.zero_grad()\n",
 432 |         "        loss.backward()\n",
 433 |         "        opt.step()\n",
 434 |         "\n",
 435 |         "        # add the loss to the total training loss\n",
 436 |         "        totalTrainLoss += loss\n",
 437 |         "\n",
 438 |         "    # switch of autograd\n",
 439 |         "    with torch.no_grad():\n",
 440 |         "        # set the model in evaluation mode\n",
 441 |         "        model.eval()\n",
 442 |         "\n",
 443 |         "        # loop over the validation set\n",
 444 |         "        for (x, y) in valLoader:\n",
 445 |         "             # send the input to the device\n",
 446 |         "            (x, y) = (x.to(DEVICE), y.to(DEVICE))\n",
 447 |         "\n",
 448 |         "            # make the predictions and calculate the validation loss\n",
 449 |         "            pred = model(x)\n",
 450 |         "            totalValLoss += lossFunction(pred, y)\n",
 451 |         "\n",
 452 |         "    # calculate the average training and validation loss\n",
 453 |         "    avgTrainLoss = totalTrainLoss / trainSteps\n",
 454 |         "    avgValLoss = totalValLoss / valSteps\n",
 455 |         "        \n",
 456 |         "    # update our training history\n",
 457 |         "    H[\"train_loss\"].append(avgTrainLoss.cpu().detach().numpy())\n",
 458 |         "    H[\"val_loss\"].append(avgValLoss.cpu().detach().numpy())\n",
 459 |         "\n",
 460 |         "    # print the model training and validation information\n",
 461 |         "    print(\"[INFO] EPOCH: {}/{}\".format(epoch + 1, EPOCHS))\n",
 462 |         "    print(\"Train loss: {:.6f}, Val loss: {:.4f}\".format(avgTrainLoss, avgValLoss))\n",
 463 |         "        \n",
 464 |         "# display the total time needed to perform the training\n",
 465 |         "endTime = time.time()\n",
 466 |         "print(\"[INFO] total time taken to train the model: {:.2f}s\".format(endTime - startTime))"
 467 |       ]
 468 |     },
 469 |     {
 470 |       "attachments": {},
 471 |       "cell_type": "markdown",
 472 |       "metadata": {},
 473 |       "source": [
 474 |         "Train loss: 0.001194, Val loss: 0.0013\n",
 475 |         "[INFO] total time taken to train the model: 27115.38s"
 476 |       ]
 477 |     },
 478 |     {
 479 |       "cell_type": "code",
 480 |       "execution_count": null,
 481 |       "metadata": {
 482 |         "colab": {
 483 |           "base_uri": "https://localhost:8080/"
 484 |         },
 485 |         "id": "CsJoOVn11rs9",
 486 |         "outputId": "c9a36460-f773-4771-cfd7-fec78711d8cc"
 487 |       },
 488 |       "outputs": [],
 489 |       "source": [
 490 |         "H # show traning/val loss history"
 491 |       ]
 492 |     },
 493 |     {
 494 |       "cell_type": "markdown",
 495 |       "metadata": {
 496 |         "id": "U6ChLXHuXZHA"
 497 |       },
 498 |       "source": [
 499 |         "### Plot the training and validation loss"
 500 |       ]
 501 |     },
 502 |     {
 503 |       "cell_type": "code",
 504 |       "execution_count": null,
 505 |       "metadata": {
 506 |         "colab": {
 507 |           "base_uri": "https://localhost:8080/",
 508 |           "height": 316
 509 |         },
 510 |         "id": "j04HfubrXYvX",
 511 |         "outputId": "5416f4a7-2647-40e9-ce25-cac7909dea50"
 512 |       },
 513 |       "outputs": [],
 514 |       "source": [
 515 |         "# plot the training loss\n",
 516 |         "print(MODEL_PATH)\n",
 517 |         "print(PLOT_PATH)\n",
 518 |         "\n",
 519 |         "plt.style.use(\"ggplot\")\n",
 520 |         "plt.figure()\n",
 521 |         "plt.plot(H[\"train_loss\"], label=\"train_loss\")\n",
 522 |         "plt.plot(H[\"val_loss\"], label=\"val_loss\")\n",
 523 |         "plt.title(\"Training Loss on Dataset\")\n",
 524 |         "plt.xlabel(\"Epoch #\")\n",
 525 |         "plt.ylabel(\"Loss\")\n",
 526 |         "plt.legend(loc=\"lower left\")\n",
 527 |         "plt.savefig(PLOT_PATH)\n",
 528 |         "# serialize the model to disk\n",
 529 |         "torch.save(model, MODEL_PATH) # saves the model"
 530 |       ]
 531 |     },
 532 |     {
 533 |       "attachments": {},
 534 |       "cell_type": "markdown",
 535 |       "metadata": {
 536 |         "id": "5Y6Fx2oaWr0q"
 537 |       },
 538 |       "source": [
 539 |         "### Prediction part\n",
 540 |         "\n",
 541 |         "Here the trained model is loaded and use for prediction on test images."
 542 |       ]
 543 |     },
 544 |     {
 545 |       "cell_type": "code",
 546 |       "execution_count": null,
 547 |       "metadata": {
 548 |         "colab": {
 549 |           "base_uri": "https://localhost:8080/"
 550 |         },
 551 |         "id": "qYh4flMu7O-m",
 552 |         "outputId": "94909220-5b1f-43ad-b52f-d37bbaa270fd"
 553 |       },
 554 |       "outputs": [],
 555 |       "source": [
 556 |         "# Load saved model for prediction\n",
 557 |         "\n",
 558 |         "print(MODEL_PATH)\n",
 559 |         "\n",
 560 |         "model = torch.load(MODEL_PATH) # add MODEL_PATH after training\n",
 561 |         "print(\"model loaded for prediction\")\n",
 562 |         "\n",
 563 |         "model"
 564 |       ]
 565 |     },
 566 |     {
 567 |       "attachments": {},
 568 |       "cell_type": "markdown",
 569 |       "metadata": {},
 570 |       "source": [
 571 |         "#### Provide test images for MapAI Dataset"
 572 |       ]
 573 |     },
 574 |     {
 575 |       "cell_type": "code",
 576 |       "execution_count": null,
 577 |       "metadata": {},
 578 |       "outputs": [],
 579 |       "source": [
 580 |         "PREDICTIONS_DIR = os.path.join(GD_PATH, \"predictions\")\n",
 581 |         "PREDICTIONS_DIR"
 582 |       ]
 583 |     },
 584 |     {
 585 |       "attachments": {},
 586 |       "cell_type": "markdown",
 587 |       "metadata": {},
 588 |       "source": [
 589 |         "#### Make predictions on the entire MapAI dataset\n",
 590 |         "\n",
 591 |         "Make predictions on test images and save them to the folder named predictions."
 592 |       ]
 593 |     },
 594 |     {
 595 |       "cell_type": "code",
 596 |       "execution_count": null,
 597 |       "metadata": {},
 598 |       "outputs": [],
 599 |       "source": [
 600 |         "import random\n",
 601 |         "import gc\n",
 602 |         "from pathlib import Path\n",
 603 |         "import numpy as np\n",
 604 |         "from PIL import Image\n",
 605 |         "\n",
 606 |         "# PLOTTING PREDICTIONS AS SINGLE IMAGES\n",
 607 |         "\n",
 608 |         "# Output folder for the predictions\n",
 609 |         "output_folder = PREDICTIONS_DIR + \"/\" # check for Windows to save predictions inside the folder\n",
 610 |         "\n",
 611 |         "# PLOT TEST IMAGES as RGB\n",
 612 |         "for n in range(len(test_images)):\n",
 613 |         "  gc.collect()\n",
 614 |         "  # Test image number\n",
 615 |         "  testImgName = str(Path(test_images[n]).stem) + '.tif'\n",
 616 |         "  #print('#', testImgName)\n",
 617 |         "\n",
 618 |         "   # Make predicton on a test image specified with counter n\n",
 619 |         "  test_img = test_images[n]\n",
 620 |         "  test_img_input = np.expand_dims(test_img, 0)\n",
 621 |         "  #print('#', test_img_input[0])\n",
 622 |         "\n",
 623 |         "  # PyTorch --> works\n",
 624 |         "  model.eval()\n",
 625 |         "  with torch.no_grad():\n",
 626 |         "    image = cv2.imread(test_img_input[0])\n",
 627 |         "    image = cv2.resize(image, dsize = (IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)\n",
 628 |         "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
 629 |         "    image = image.astype(\"float32\") / 255\n",
 630 |         "    \n",
 631 |         "    # print('SIZE: ', image.shape)\n",
 632 |         "\n",
 633 |         "    # make the channel axis to be the leading one, add batch dimension\n",
 634 |         "    image = np.transpose(image, (2, 0, 1))\n",
 635 |         "    # create a PyTorch tensor\n",
 636 |         "    image = np.expand_dims(image, 0)\n",
 637 |         "    # flash the tensor to the device\n",
 638 |         "    image  = torch.from_numpy(image).to(DEVICE)\n",
 639 |         "\n",
 640 |         "    # make the prediction\n",
 641 |         "    predMask = model(image).squeeze()\n",
 642 |         "    # pass result through sigmoid\n",
 643 |         "    predMask = torch.sigmoid(predMask)\n",
 644 |         "\n",
 645 |         "    # convert result to numpy array\n",
 646 |         "    predMask = predMask.cpu().numpy()\n",
 647 |         "\n",
 648 |         "    # filter out the weak predictions and convert them to integers\n",
 649 |         "    predMask = (predMask > THRESHOLD) * 255\n",
 650 |         "    predMask = predMask.astype(np.uint8)\n",
 651 |         "\n",
 652 |         "    # generate image from array\n",
 653 |         "    pIMG = Image.fromarray(predMask)\n",
 654 |         "    pIMG.save(str(output_folder + testImgName))\n",
 655 |         "\n",
 656 |         "    print('Prediction:', testImgName, 'saved to:', output_folder)"
 657 |       ]
 658 |     },
 659 |     {
 660 |       "attachments": {},
 661 |       "cell_type": "markdown",
 662 |       "metadata": {},
 663 |       "source": [
 664 |         "#### Make predictions on single images by choice\n",
 665 |         "\n",
 666 |         "Change the parameter n to choose which image to plot."
 667 |       ]
 668 |     },
 669 |     {
 670 |       "cell_type": "code",
 671 |       "execution_count": null,
 672 |       "metadata": {
 673 |         "colab": {
 674 |           "base_uri": "https://localhost:8080/"
 675 |         },
 676 |         "id": "bq7BlbdrcgPB",
 677 |         "outputId": "6860afd9-da51-4911-d975-5a3ed78e01e1"
 678 |       },
 679 |       "outputs": [],
 680 |       "source": [
 681 |         "# ----------------------------------------------------------------------\n",
 682 |         "\n",
 683 |         "output_folder = PREDICTIONS_DIR + \"/\" + \"*.tif\"\n",
 684 |         "\n",
 685 |         "predictions = glob.glob(output_folder)\n",
 686 |         "predictions.sort()\n",
 687 |         "print(\"# IMAGES for prediction: \", len(predictions))\n",
 688 |         "print(\"Choosen n can be from 0 o 1367! \")\n",
 689 |         "\n",
 690 |         "\n",
 691 |         "# ----------------------------------------------------------------------\n",
 692 |         "\n",
 693 |         "n = 900 # change this number depending on which image you want to test\n",
 694 |         "\n",
 695 |         "fig = plt.figure(figsize=(18,12))\n",
 696 |         "ax1 = fig.add_subplot(131)\n",
 697 |         "\n",
 698 |         "ax1.set_title('RGB image: ')\n",
 699 |         "image = cv2.imread(test_images[n])[:,:,::-1]\n",
 700 |         "ax1.imshow(image)\n",
 701 |         "ax1.set_axis_off()\n",
 702 |         "\n",
 703 |         "ax2 = fig.add_subplot(132)\n",
 704 |         "ax2.set_title('Ground truth: ')\n",
 705 |         "image = cv2.imread(test_masks[n])[:,:,::-1]\n",
 706 |         "image *= 255\n",
 707 |         "ax2.imshow(image)\n",
 708 |         "ax2.set_axis_off()\n",
 709 |         "\n",
 710 |         "ax3 = fig.add_subplot(133)\n",
 711 |         "ax3.set_title('Prediction: ')\n",
 712 |         "image = cv2.imread(predictions[n])[:,:,::-1]\n",
 713 |         "ax3.imshow(image)\n",
 714 |         "ax3.set_axis_off()"
 715 |       ]
 716 |     },
 717 |     {
 718 |       "attachments": {},
 719 |       "cell_type": "markdown",
 720 |       "metadata": {},
 721 |       "source": [
 722 |         "### BUILDING FOOTPRINT REGULARIZATION\n",
 723 |         "\n",
 724 |         "Used repo: https://github.com/zorzi-s/projectRegularization\n",
 725 |         "\n",
 726 |         "git clone the repo to the folder where your notebook is stored. To get curent working directory use os.getcwd().\n",
 727 |         "\n",
 728 |         "The pretrained weights need to be downloaded from the provided link and saved into the folder pretrained_weighs that is inside projectRegularization:\n",
 729 |         "\n",
 730 |         "https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu\n",
 731 |         "\n",
 732 |         "Next step is to generate a Python file to locate the necessary pretrained weights from projectRegularization. The code below was only tested on Ubuntu, not on Windows."
 733 |       ]
 734 |     },
 735 |     {
 736 |       "cell_type": "code",
 737 |       "execution_count": null,
 738 |       "metadata": {},
 739 |       "outputs": [],
 740 |       "source": [
 741 |         "# DEFINE NECESSARY PATHS FOR REGULARIZATION PART\n",
 742 |         "\n",
 743 |         "projectRegDir = os.path.join(GD_PATH, \"projectRegularization\")\n",
 744 |         "print(projectRegDir)\n",
 745 |         "\n",
 746 |         "ptw = os.path.join(projectRegDir, \"pretrained_weights\") \n",
 747 |         "print(ptw)\n",
 748 |         "\n",
 749 |         "# OUTPUT REGULARIZATIONS DIR\n",
 750 |         "REGULARIZATION_DIR = os.path.join(GD_PATH, \"regularizations\") + \"/\"\n",
 751 |         "print(REGULARIZATION_DIR)\n",
 752 |         "\n",
 753 |         "# GET THE PATHS FOR TRAINED GAN MODELS\n",
 754 |         "ENCODER = os.path.join(ptw, \"E140000_e1\")\n",
 755 |         "GENERATOR = os.path.join(ptw, \"E140000_net\")\n",
 756 |         "\n",
 757 |         "print(ENCODER)\n",
 758 |         "print(GENERATOR)"
 759 |       ]
 760 |     },
 761 |     {
 762 |       "cell_type": "code",
 763 |       "execution_count": null,
 764 |       "metadata": {},
 765 |       "outputs": [],
 766 |       "source": [
 767 |         "# CREATE A NEW variables.py WITH USERS PATHS\n",
 768 |         "\n",
 769 |         "with open(projectRegDir + 'variables.py', 'w') as f:\n",
 770 |         "    f.write('# CONFIGURE THE PATHS HERE: \\n\\n')\n",
 771 |         "    f.write('# TRAINING \\n')\n",
 772 |         "    f.write('DATASET_RGB = ' + '\"' + str(TRAIN_IMG_DIR + '*.tif' + '\"') + '\\n')\n",
 773 |         "    f.write('DATASET_GTI = ' + '\"' + str(TRAIN_MASK_DIR + '*.tif' + '\"') + '\\n')\n",
 774 |         "    f.write('DATASET_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n",
 775 |         "    f.write('\\n')\n",
 776 |         "    f.write('DEBUG_DIR = ' + '\"' + str('./debug/') + '\"' + '\\n')\n",
 777 |         "    f.write('\\n')\n",
 778 |         "    f.write('# INFERENCE \\n')\n",
 779 |         "    f.write('INF_RGB = ' + '\"' + str(TEST_IMG_DIR + '*.tif' + '\"') + '\\n')\n",
 780 |         "    f.write('INF_SEG = ' + '\"' + str(PREDICTIONS_DIR + '*.tif' + '\"') + '\\n')\n",
 781 |         "    f.write('INF_OUT = ' + '\"' + str(REGULARIZATION_DIR + '\"') + '\\n')\n",
 782 |         "    f.write('\\n')\n",
 783 |         "    f.write('MODEL_ENCODER = ' + '\"' + str(ENCODER) + '\"' + '\\n')\n",
 784 |         "    f.write('MODEL_GENERATOR = ' + '\"' + str(GENERATOR) + '\"' + '\\n')\n",
 785 |         "    f.close()\n",
 786 |         " \n",
 787 |         "print(\"variables.py created with users paths...\")\n"
 788 |       ]
 789 |     },
 790 |     {
 791 |       "attachments": {},
 792 |       "cell_type": "markdown",
 793 |       "metadata": {},
 794 |       "source": [
 795 |         "#### Run projectRegularization\n",
 796 |         "\n",
 797 |         "Takes around 6-8 minutes.\n",
 798 |         "\n",
 799 |         "You only need to change the command below and replace it with the absolute path for regularize.py"
 800 |       ]
 801 |     },
 802 |     {
 803 |       "cell_type": "code",
 804 |       "execution_count": null,
 805 |       "metadata": {},
 806 |       "outputs": [],
 807 |       "source": [
 808 |         "!python /home/shymon/Documents/mapAI-regularization/projectRegularization/regularize.py"
 809 |       ]
 810 |     },
 811 |     {
 812 |       "attachments": {},
 813 |       "cell_type": "markdown",
 814 |       "metadata": {},
 815 |       "source": [
 816 |         "### Compare predictions and regularizations on a single image"
 817 |       ]
 818 |     },
 819 |     {
 820 |       "cell_type": "code",
 821 |       "execution_count": null,
 822 |       "metadata": {},
 823 |       "outputs": [],
 824 |       "source": [
 825 |         "# Read Regularizations to plot and compare results\n",
 826 |         "\n",
 827 |         "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n",
 828 |         "regularizations.sort()\n",
 829 |         "\n",
 830 |         "print(\"# of predicted images: \", len(predictions))\n",
 831 |         "print(\"# of regularized images: \", len(regularizations))"
 832 |       ]
 833 |     },
 834 |     {
 835 |       "attachments": {},
 836 |       "cell_type": "markdown",
 837 |       "metadata": {},
 838 |       "source": [
 839 |         "Code to plot RGB, GT, PREDICTION and REGULARIZATION in a single plot for comparison.\n",
 840 |         "\n",
 841 |         "Change parameter n accordingly."
 842 |       ]
 843 |     },
 844 |     {
 845 |       "cell_type": "code",
 846 |       "execution_count": null,
 847 |       "metadata": {},
 848 |       "outputs": [],
 849 |       "source": [
 850 |         "n = 600\n",
 851 |         "\n",
 852 |         "fig = plt.figure(figsize=(18,12))\n",
 853 |         "ax1 = fig.add_subplot(141)\n",
 854 |         "\n",
 855 |         "ax1.set_title('RGB: ')\n",
 856 |         "image = cv2.imread(test_images[n])[:,:,::-1]\n",
 857 |         "ax1.imshow(image)\n",
 858 |         "ax1.set_axis_off()\n",
 859 |         "\n",
 860 |         "ax2 = fig.add_subplot(142)\n",
 861 |         "ax2.set_title('Ground truth: ')\n",
 862 |         "image = cv2.imread(test_masks[n])[:,:,::-1]\n",
 863 |         "image *= 255\n",
 864 |         "ax2.imshow(image)\n",
 865 |         "ax2.set_axis_off()\n",
 866 |         "\n",
 867 |         "ax3 = fig.add_subplot(143)\n",
 868 |         "ax3.set_title('Prediction: ')\n",
 869 |         "image = cv2.imread(predictions[n])[:,:,::-1]\n",
 870 |         "ax3.imshow(image)\n",
 871 |         "ax3.set_axis_off()\n",
 872 |         "\n",
 873 |         "ax4 = fig.add_subplot(144)\n",
 874 |         "ax4.set_title('Regularization: ')\n",
 875 |         "image = cv2.imread(regularizations[n])[:,:,::-1]\n",
 876 |         "ax4.imshow(image)\n",
 877 |         "ax4.set_axis_off()\n",
 878 |         "\n",
 879 |         "# DEFINE PATH FOR PLOTS TO BE SAVED\n",
 880 |         "figPath = GD_PATH + \"/\" + \"plots\" + \"/\" \"compare-\" + str(n) + \".png\"\n",
 881 |         "print(figPath)\n",
 882 |         "\n",
 883 |         "# Save plot\n",
 884 |         "fig.savefig(figPath)"
 885 |       ]
 886 |     },
 887 |     {
 888 |       "attachments": {},
 889 |       "cell_type": "markdown",
 890 |       "metadata": {},
 891 |       "source": [
 892 |         "### VECTORIZING THE REGULARIZED BUILDING MASKS with GDAL\n",
 893 |         "\n",
 894 |         "GDAL: https://gdal.org/'\n",
 895 |         "\n",
 896 |         "GDAL: https://www.youtube.com/watch?v=q3DLdMj5zLA\n",
 897 |         "\n",
 898 |         "I do not know if it is possible to install GDAL on WINDOWS inside a conda environment.\n",
 899 |         "\n",
 900 |         "On Ubuntu you have to follow these steps:\n",
 901 |         "\n",
 902 |         "\n",
 903 |         "\n",
 904 |         "Specific process for installation: https://stackoverflow.com/questions/44005694/no-module-named-gdal\n",
 905 |         "\n",
 906 |         "- sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove \n",
 907 |         "- sudo apt-get install -y cdo nco gdal-bin libgdal-dev-\n",
 908 |         "- python -m pip install --upgrade pip setuptools wheel\n",
 909 |         "- python -m pip install --upgrade gdal\n",
 910 |         "- conda install -c conda forge libgdal\n",
 911 |         "- conda install -c conda-forge libgdal\n",
 912 |         "- conda install -c conda-forge gdal\n",
 913 |         "- conda install tiledb=2.2\n",
 914 |         "- conda install poppler\n",
 915 |         "\n",
 916 |         "When you have this you can hopefully vectorize the detected masks quite easily."
 917 |       ]
 918 |     },
 919 |     {
 920 |       "cell_type": "code",
 921 |       "execution_count": null,
 922 |       "metadata": {},
 923 |       "outputs": [],
 924 |       "source": [
 925 |         "def get_fname_from_path(path):\n",
 926 |         "    \"\"\"\n",
 927 |         "    Given a path, returns the filename after the last frontslash character.\n",
 928 |         "    \"\"\"\n",
 929 |         "    return path.rsplit('/', 1)[-1]\n",
 930 |         "\n",
 931 |         "def get_fname_no_extension(path):\n",
 932 |         "    \"\"\"\n",
 933 |         "    Given a path, returns the filename without its extension.\n",
 934 |         "    \"\"\"\n",
 935 |         "    filename, extension = os.path.splitext(path)\n",
 936 |         "    return filename"
 937 |       ]
 938 |     },
 939 |     {
 940 |       "cell_type": "code",
 941 |       "execution_count": null,
 942 |       "metadata": {},
 943 |       "outputs": [],
 944 |       "source": [
 945 |         "import osgeo\n",
 946 |         "from osgeo import gdal\n",
 947 |         "from osgeo import ogr\n",
 948 |         "print('GDAL version: ', osgeo.gdal.__version__)\n",
 949 |         "\n",
 950 |         "# Choose which image to vectorize\n",
 951 |         "n  = 0\n",
 952 |         "\n",
 953 |         "input = regularizations[n]\n",
 954 |         "print()\n",
 955 |         "print(\"INPUT: \", input)\n",
 956 |         "\n",
 957 |         "# print(get_fname_no_extension(input))\n",
 958 |         "\n",
 959 |         "# out\n",
 960 |         "output = get_fname_from_path(get_fname_no_extension(input)) + \".gpkg\"\n",
 961 |         "print(\"OUTPUT: \", output)\n",
 962 |         "\n",
 963 |         "# Open image with GDAl driver\n",
 964 |         "ds = gdal.Open(input)\n",
 965 |         "# Get the band\n",
 966 |         "band = ds.GetRasterBand(1)\n",
 967 |         "\n",
 968 |         "# Create the output shapefile\n",
 969 |         "driver = ogr.GetDriverByName(\"GPKG\")\n",
 970 |         "out_ds = driver.CreateDataSource(output)\n",
 971 |         "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n",
 972 |         "\n",
 973 |         "# Add a field to the layer to store the pixel values\n",
 974 |         "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n",
 975 |         "out_layer.CreateField(field_defn)\n",
 976 |         "\n",
 977 |         "# Polygonize the PNG file\n",
 978 |         "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n",
 979 |         "\n",
 980 |         "# Close the input and output files\n",
 981 |         "out_ds = None\n",
 982 |         "ds = None"
 983 |       ]
 984 |     },
 985 |     {
 986 |       "attachments": {},
 987 |       "cell_type": "markdown",
 988 |       "metadata": {},
 989 |       "source": [
 990 |         "For the builing detection case we need to only keep the vectors with pixel value 255. Easiest solution is to use: Extract by attribute. The Python solution with GDAL can be found below."
 991 |       ]
 992 |     },
 993 |     {
 994 |       "cell_type": "code",
 995 |       "execution_count": null,
 996 |       "metadata": {},
 997 |       "outputs": [],
 998 |       "source": [
 999 |         "# ogr2ogr -where ID=\"1\" outfile.gpkg infile.\n",
1000 |         "\n",
1001 |         "# RUN from the command line inside Ubuntu\n",
1002 |         "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg"
1003 |       ]
1004 |     }
1005 |   ],
1006 |   "metadata": {
1007 |     "accelerator": "TPU",
1008 |     "colab": {
1009 |       "provenance": []
1010 |     },
1011 |     "gpuClass": "premium",
1012 |     "kernelspec": {
1013 |       "display_name": "torch",
1014 |       "language": "python",
1015 |       "name": "python3"
1016 |     },
1017 |     "language_info": {
1018 |       "codemirror_mode": {
1019 |         "name": "ipython",
1020 |         "version": 3
1021 |       },
1022 |       "file_extension": ".py",
1023 |       "mimetype": "text/x-python",
1024 |       "name": "python",
1025 |       "nbconvert_exporter": "python",
1026 |       "pygments_lexer": "ipython3",
1027 |       "version": "3.10.9"
1028 |     },
1029 |     "vscode": {
1030 |       "interpreter": {
1031 |         "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c"
1032 |       }
1033 |     }
1034 |   },
1035 |   "nbformat": 4,
1036 |   "nbformat_minor": 0
1037 | }
1038 | 


--------------------------------------------------------------------------------
/06-evaluate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "### Notebook to evaluate results"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import os\n",
 18 |     "import cv2\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "from imutils import paths\n",
 21 |     "import time\n",
 22 |     "import glob\n",
 23 |     "\n",
 24 |     "import tifffile as tiff\n",
 25 |     "import numpy as np\n",
 26 |     "from PIL import Image\n",
 27 |     "\n",
 28 |     "import numpy as np\n",
 29 |     "from sklearn.metrics import jaccard_score"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "# TEST IMAGES:  1368\n",
 42 |       "# PREDICTIONS:  1368\n",
 43 |       "# REGULARIZATIONS:  1368\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "# CONFIGURE PATHS\n",
 49 |     "GD_PATH = os.getcwd() + \"/\"\n",
 50 |     "PLOT_PATH = GD_PATH + \"plots/\"\n",
 51 |     "\n",
 52 |     "# TEST\n",
 53 |     "# Task 1: \n",
 54 |     "TEST_IMG_DIR = \"/home/shymon/datasets/mapai_full/task1_test/images/\"\n",
 55 |     "TEST_MASK_DIR = \"/home/shymon/datasets/mapai_full/task1_test/masks/\"\n",
 56 |     "\n",
 57 |     "# TEST\n",
 58 |     "test_images = sorted(list(paths.list_images(TEST_IMG_DIR)))\n",
 59 |     "test_masks = sorted(list(paths.list_images(TEST_MASK_DIR)))\n",
 60 |     "\n",
 61 |     "PREDICTIONS_DIR = GD_PATH + \"predictions/\"\n",
 62 |     "REGULARIZATION_DIR = GD_PATH + \"regularizations/\"\n",
 63 |     "\n",
 64 |     "# read predictions\n",
 65 |     "predictions = glob.glob(PREDICTIONS_DIR + \"*.tif\")\n",
 66 |     "predictions.sort()\n",
 67 |     "\n",
 68 |     "# read regularizations\n",
 69 |     "regularizations = glob.glob(REGULARIZATION_DIR + \"*.tif\")\n",
 70 |     "regularizations.sort()\n",
 71 |     "\n",
 72 |     "print(\"# TEST IMAGES: \", len(test_images))\n",
 73 |     "print(\"# PREDICTIONS: \", len(predictions))\n",
 74 |     "print(\"# REGULARIZATIONS: \", len(predictions))\n",
 75 |     "\n",
 76 |     "# Project Regularization directory\n",
 77 |     "projectRegDir = GD_PATH + \"projectRegularization\" + \"/\"\n",
 78 |     "\n",
 79 |     "ptw = projectRegDir + \"pretrained_weights\" + \"/\"\n",
 80 |     "\n",
 81 |     "# GET THE PATHS FOR TRAINED GAN MODELS\n",
 82 |     "ENCODER = ptw + \"E140000_e1\"\n",
 83 |     "GENERATOR = ptw + \"E140000_net\"\n",
 84 |     "\n",
 85 |     "# print(ENCODER)\n",
 86 |     "# print(GENERATOR)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "attachments": {},
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "### Calculate Intersection over Union on the test set"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 3,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def iou(gt_mask, pred_mask):\n",
104 |     "\n",
105 |     "    \"\"\"\n",
106 |     "    Calculates the intersection over union (BIoU) between two binary semantic segmentation masks.\n",
107 |     "    \n",
108 |     "    Arguments:\n",
109 |     "    mask1 -- a 2D numpy array representing the first mask\n",
110 |     "    mask2 -- a 2D numpy array representing the second mask\n",
111 |     "    \n",
112 |     "    Returns:\n",
113 |     "    iou -- a float representing the BIoU between the two masks\n",
114 |     "    \"\"\"\n",
115 |     "\n",
116 |     "    intersection  = np.logical_and(gt_mask, pred_mask).sum()\n",
117 |     "    union = np.logical_or(gt_mask, pred_mask).sum()\n",
118 |     "    iou_score = intersection / union if union != 0 else np.nan\n",
119 |     "\n",
120 |     "    return iou_score\n",
121 |     "\n",
122 |     "def biou(segA, segB, boundary_width=1):\n",
123 |     "    \"\"\"\n",
124 |     "    Calculate the Boundary Intersection over Union (BIoU) metric between two binary segmentation masks.\n",
125 |     "\n",
126 |     "    Parameters:\n",
127 |     "    segA (numpy array): A 2-dimensional binary numpy array representing the first segmentation mask.\n",
128 |     "    segB (numpy array): A 2-dimensional binary numpy array representing the second segmentation mask.\n",
129 |     "    boundary_width (int): The width of the boundary region to be included in the calculation (default is 1).\n",
130 |     "\n",
131 |     "    Returns:\n",
132 |     "    float: The BIoU metric between the two segmentation masks.\n",
133 |     "    \"\"\"\n",
134 |     "\n",
135 |     "    # Compute the boundaries of the segmentation masks\n",
136 |     "    boundaryA = np.zeros_like(segA)\n",
137 |     "    boundaryA[:,boundary_width:-boundary_width] = segA[:,boundary_width:-boundary_width] ^ segA[:, :-2*boundary_width] ^ segA[:, 2*boundary_width:]\n",
138 |     "    boundaryA[boundary_width:-boundary_width,:] = boundaryA[boundary_width:-boundary_width,:] ^ segA[:-2*boundary_width,:] ^ segA[2*boundary_width:,:]\n",
139 |     "\n",
140 |     "    boundaryB = np.zeros_like(segB)\n",
141 |     "    boundaryB[:,boundary_width:-boundary_width] = segB[:,boundary_width:-boundary_width] ^ segB[:, :-2*boundary_width] ^ segB[:, 2*boundary_width:]\n",
142 |     "    boundaryB[boundary_width:-boundary_width,:] = boundaryB[boundary_width:-boundary_width,:] ^ segB[:-2*boundary_width,:] ^ segB[2*boundary_width:,:]\n",
143 |     "\n",
144 |     "    # Compute the coordinates of the intersection boundary\n",
145 |     "    intersection_boundary = boundaryA & boundaryB\n",
146 |     "\n",
147 |     "    # Compute the coordinates of the union boundary\n",
148 |     "    union_boundary = boundaryA | boundaryB\n",
149 |     "\n",
150 |     "    # Compute the area of intersection boundary\n",
151 |     "    intersection_boundary_area = np.count_nonzero(intersection_boundary)\n",
152 |     "\n",
153 |     "    # Compute the area of union boundary\n",
154 |     "    union_boundary_area = np.count_nonzero(union_boundary)\n",
155 |     "\n",
156 |     "    # Compute the intersection and union of the interior regions\n",
157 |     "    intersection = np.logical_and(segA, segB)\n",
158 |     "    union = np.logical_or(segA, segB)\n",
159 |     "\n",
160 |     "    # Compute the area of intersection and union of the interior regions\n",
161 |     "    intersection_area = np.count_nonzero(intersection)\n",
162 |     "    union_area = np.count_nonzero(union)\n",
163 |     "\n",
164 |     "    # Compute the BIoU metric\n",
165 |     "    biou = (intersection_area + intersection_boundary_area) / (union_area + union_boundary_area + 1e-6)\n",
166 |     "\n",
167 |     "    return biou\n",
168 |     "\n",
169 |     "# To read the original test images from MapAI\n",
170 |     "def test2arr(tif_img):\n",
171 |     "    img = tiff.imread(tif_img)\n",
172 |     "    arr = np.array(img)\n",
173 |     "    return arr\n",
174 |     "\n",
175 |     "# To read the predictions and regularizations\n",
176 |     "def pr2arr(tif_img):\n",
177 |     "    img = tiff.imread(tif_img)\n",
178 |     "    img = img / 255\n",
179 |     "    img = cv2.resize(img, (500, 500))\n",
180 |     "    arr = np.array(img)\n",
181 |     "    arr = arr.astype(np.uint8)\n",
182 |     "    return arr\n"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "### Evaluation on single image by choice"
190 |    ]
191 |   },
192 |   {
193 |    "attachments": {},
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "(1) Without regularization"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 4,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "Jaccard score or IoU with Scikit-learn:  0.7001\n",
210 |       "Jaccard score or IoU with manual function:  0.7001\n",
211 |       "Boundary Intersection over Union:  0.6959\n"
212 |      ]
213 |     }
214 |    ],
215 |    "source": [
216 |     "n = 900\n",
217 |     "\n",
218 |     "jaccard_sklearn = jaccard_score(test2arr(test_masks[n]), pr2arr(predictions[n]), average='micro')\n",
219 |     "print(\"Jaccard score or IoU with Scikit-learn: \", round(jaccard_sklearn, 4))\n",
220 |     "\n",
221 |     "iou_man = iou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n",
222 |     "print(\"Jaccard score or IoU with manual function: \", round(iou_man, 4))\n",
223 |     "\n",
224 |     "biou_man = biou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n",
225 |     "print(\"Boundary Intersection over Union: \", round(biou_man, 4))"
226 |    ]
227 |   },
228 |   {
229 |    "attachments": {},
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "(2) With regularization"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 5,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "Jaccard score or IoU with Scikit-learn:  0.6841\n",
246 |       "Jaccard score or IoU with manual function:  0.6841\n",
247 |       "Boundary Intersection over Union:  0.6801\n"
248 |      ]
249 |     }
250 |    ],
251 |    "source": [
252 |     "n = 900\n",
253 |     "\n",
254 |     "jaccard_sklearn = jaccard_score(test2arr(test_masks[n]), pr2arr(regularizations[n]), average='micro')\n",
255 |     "print(\"Jaccard score or IoU with Scikit-learn: \", round(jaccard_sklearn, 4))\n",
256 |     "\n",
257 |     "iou_man = iou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n",
258 |     "print(\"Jaccard score or IoU with manual function: \", round(iou_man, 4))\n",
259 |     "\n",
260 |     "biou_man = biou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n",
261 |     "print(\"Boundary Intersection over Union: \", round(biou_man, 4))"
262 |    ]
263 |   },
264 |   {
265 |    "attachments": {},
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "### Evaluation on entire MapAI dataset"
270 |    ]
271 |   },
272 |   {
273 |    "attachments": {},
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "(1) Without regularization"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 6,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "name": "stdout",
287 |      "output_type": "stream",
288 |      "text": [
289 |       "Evaluation without regularization: \n",
290 |       "Mean IoU for Task 1:  0.3995\n",
291 |       "Mean BIoU for Task 1:  0.3766\n",
292 |       "S metric for Task 1:  0.3881\n"
293 |      ]
294 |     }
295 |    ],
296 |    "source": [
297 |     "iou_mapai = np.array([])\n",
298 |     "biou_mapai = np.array([])\n",
299 |     "\n",
300 |     "for n in range(len(test_masks)):\n",
301 |     "    \n",
302 |     "    # Calculate metrics\n",
303 |     "\n",
304 |     "    # IoU\n",
305 |     "    iou_single = iou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n",
306 |     "\n",
307 |     "    # BIoU\n",
308 |     "    biou_single = biou(test2arr(test_masks[n]), pr2arr(predictions[n]))\n",
309 |     "\n",
310 |     "    # Append to whole array\n",
311 |     "    iou_mapai = np.append(iou_mapai, iou_single)\n",
312 |     "    biou_mapai = np.append(biou_mapai, biou_single)\n",
313 |     "\n",
314 |     "#iou_mapai = iou_mapai[iou_mapai != 0]\n",
315 |     "#biou_mapai = biou_mapai[biou_mapai != 0]\n",
316 |     "\n",
317 |     "print(\"Evaluation without regularization: \")\n",
318 |     "print(\"Mean IoU for Task 1: \", round(np.nanmean(iou_mapai), 4))\n",
319 |     "print(\"Mean BIoU for Task 1: \", round(np.nanmean(biou_mapai), 4))\n",
320 |     "print(\"S metric for Task 1: \", round(((np.nanmean(biou_mapai) + np.nanmean(iou_mapai)) / 2 ), 4))"
321 |    ]
322 |   },
323 |   {
324 |    "attachments": {},
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "(2) With regularization"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": 7,
334 |    "metadata": {},
335 |    "outputs": [
336 |     {
337 |      "name": "stdout",
338 |      "output_type": "stream",
339 |      "text": [
340 |       "Mean IoU for Task 1:  0.4017\n",
341 |       "Mean BIoU for Task 1:  0.378\n",
342 |       "S metric for Task 1:  0.3898\n"
343 |      ]
344 |     }
345 |    ],
346 |    "source": [
347 |     "iou_mapai = np.array([])\n",
348 |     "biou_mapai = np.array([])\n",
349 |     "\n",
350 |     "for n in range(len(test_masks)):\n",
351 |     "    \n",
352 |     "    # Calculate metrics\n",
353 |     "\n",
354 |     "    # IoU\n",
355 |     "    iou_single = iou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n",
356 |     "\n",
357 |     "    # BIoU\n",
358 |     "    biou_single = biou(test2arr(test_masks[n]), pr2arr(regularizations[n]))\n",
359 |     "\n",
360 |     "    # Append to whole array\n",
361 |     "    iou_mapai = np.append(iou_mapai, iou_single)\n",
362 |     "    biou_mapai = np.append(biou_mapai, biou_single)\n",
363 |     "\n",
364 |     "#iou_mapai = iou_mapai[iou_mapai != 0]\n",
365 |     "#biou_mapai = biou_mapai[biou_mapai != 0]\n",
366 |     "\n",
367 |     "print(\"Mean IoU for Task 1: \", round(np.nanmean(iou_mapai), 4))\n",
368 |     "print(\"Mean BIoU for Task 1: \", round(np.nanmean(biou_mapai), 4))\n",
369 |     "print(\"S metric for Task 1: \", round(((np.nanmean(biou_mapai) + np.nanmean(iou_mapai)) / 2 ), 4))"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": []
378 |   }
379 |  ],
380 |  "metadata": {
381 |   "kernelspec": {
382 |    "display_name": "torch",
383 |    "language": "python",
384 |    "name": "python3"
385 |   },
386 |   "language_info": {
387 |    "codemirror_mode": {
388 |     "name": "ipython",
389 |     "version": 3
390 |    },
391 |    "file_extension": ".py",
392 |    "mimetype": "text/x-python",
393 |    "name": "python",
394 |    "nbconvert_exporter": "python",
395 |    "pygments_lexer": "ipython3",
396 |    "version": "3.10.9"
397 |   },
398 |   "orig_nbformat": 4,
399 |   "vscode": {
400 |    "interpreter": {
401 |     "hash": "5614dd747bc595cf94d4c937a609d8df6c75b545807dd5ca7f02df8b67f4ea7c"
402 |    }
403 |   }
404 |  },
405 |  "nbformat": 4,
406 |  "nbformat_minor": 2
407 | }
408 | 


--------------------------------------------------------------------------------
/07-vectorize-building-footprint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "attachments": {},
 5 |    "cell_type": "markdown",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "### Vectorize the predicted building footprints with GDAL"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "metadata": {},
15 |    "outputs": [],
16 |    "source": [
17 |     "import osgeo\n",
18 |     "from osgeo import gdal\n",
19 |     "from osgeo import ogr\n",
20 |     "print('GDAL version: ', osgeo.gdal.__version__)\n",
21 |     "\n",
22 |     "# PROVIDE SEGMENTATION PATH\n",
23 |     "input = \"INSERT PATH HERE\"\n",
24 |     "\n",
25 |     "# PROVIDE OUTPUT VECTOR FILE\n",
26 |     "output = \"INSERT PATH HERE\"\n",
27 |     "\n",
28 |     "# Open image with GDAl driver\n",
29 |     "ds = gdal.Open(input)\n",
30 |     "\n",
31 |     "# Get GeoTransform\n",
32 |     "ds.SetGeoTransform([0,1,0,0,0,-1])\n",
33 |     "\n",
34 |     "# Get the band\n",
35 |     "band = ds.GetRasterBand(1)\n",
36 |     "# Create the output shapefile\n",
37 |     "driver = ogr.GetDriverByName(\"GeoPackage\")\n",
38 |     "out_ds = driver.CreateDataSource(output)\n",
39 |     "out_layer = out_ds.CreateLayer(output, geom_type=ogr.wkbPolygon)\n",
40 |     "\n",
41 |     "# Add a field to the layer to store the pixel values\n",
42 |     "field_defn = ogr.FieldDefn(\"Pix_Value\", ogr.OFTInteger)\n",
43 |     "out_layer.CreateField(field_defn)\n",
44 |     "\n",
45 |     "# Polygonize the PNG file\n",
46 |     "gdal.Polygonize(band, None, out_layer, 0, [], callback=None)\n",
47 |     "\n",
48 |     "# Close the input and output files\n",
49 |     "out_ds = None\n",
50 |     "ds = None"
51 |    ]
52 |   },
53 |   {
54 |    "cell_type": "code",
55 |    "execution_count": null,
56 |    "metadata": {},
57 |    "outputs": [],
58 |    "source": [
59 |     "# ogr2ogr -where ID=\"1\" OUTFILE.gpkg INFILE.gpkg\n",
60 |     "\n",
61 |     "# RUN from the command line inside Ubuntu\n",
62 |     "# Change name of input and output according to user needs\n",
63 |     "\n",
64 |     "!ogr2ogr -where Pix_Value=\"255\" bergen_-5943_1104B.gpkg bergen_-5943_1104.gpkg"
65 |    ]
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 3",
71 |    "language": "python",
72 |    "name": "python3"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython3",
84 |    "version": "3.10.6"
85 |   },
86 |   "orig_nbformat": 4,
87 |   "vscode": {
88 |    "interpreter": {
89 |     "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
90 |    }
91 |   }
92 |  },
93 |  "nbformat": 4,
94 |  "nbformat_minor": 2
95 | }
96 | 


--------------------------------------------------------------------------------
/DATASET/info.txt:
--------------------------------------------------------------------------------
1 | STORE THE MAPAI DATASET IN THIS FOLDER.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## mapAI-regularization
 2 | 
 3 | The repository stores the code for our presented work at Foss4G 2023 with the title: **AN END-TO-END DEEP LEARNING WORKFLOW FOR BUILDING SEGMENTATION,
 4 | BOUNDARY REGULARIZATION AND VECTORIZATION OF BUILDING FOOTPRINTS.**
 5 | 
 6 | ## Introduction
 7 | 
 8 | The purpose of our research is to develop and end-to-end workflow for accurate segmentation of building footprints including three major steps: 
 9 |  - (1) binary semantic segmentation with a CNN,
10 |  - (2) applying building boundary regularization and 
11 |  - (3) vectorization. 
12 | 
13 | The dataset used for building segmentation is the NORA MapAI: Precision in Building Segmentation dataset. We have developed an implementation for building footprint segmentation. Our approach extends the segmentation by implementing projectRegularization from (Zorzi and Fraundorfer, 2019, Zorzi et al., 2021) on a semantic segmentation task. The link to the official repository can be accessed here: https://github.com/zorzi-s/projectRegularization. Note that this is already included in our repository.
14 | 
15 | ## MapAI dataset
16 | 
17 | The original MapAI: Precision in Building Segmentation dataset can be downloaded manually from Huggingface: https://huggingface.co/datasets/sjyhne/mapai_training_data
18 | 
19 | or by running our first notebook.
20 | 
21 | ## Installation
22 | 
23 | ```
24 | git clone https://github.com/s1m0nS/mapAI-regularization.git
25 | cd mapAI-regularization
26 | conda create --name mapai python=3.10
27 | conda activate mapai
28 | pip install -r requirements.txt
29 | ```
30 | Installing GDAL inside a conda environment can be tricky. Follow the steps below according to your OS.
31 | 
32 | **Linux:**
33 | 
34 | ```
35 | sudo apt-get update && sudo apt upgrade -y && sudo apt autoremove 
36 | sudo apt-get install -y cdo nco gdal-bin libgdal-dev-
37 | python -m pip install --upgrade pip setuptools wheel
38 | python -m pip install --upgrade gdal
39 | conda install -c conda forge libgdal
40 | conda install -c conda-forge libgdal
41 | conda install -c conda-forge gdal
42 | conda install tiledb=2.2
43 | conda install poppler
44 | ```
45 | 
46 | **Windows:**
47 | 
48 | Get the appropriate .whl file for your Python version from: https://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal
49 | For Python 3.10 use either: 
50 | - GDAL‑3.4.3‑cp310‑cp310‑win_amd64.whl or
51 | - GDAL‑3.4.3‑cp310‑cp310‑win32.whl.
52 | 
53 | then install the appropriate one as:
54 | ```
55 | conda activate mapai
56 | python -m pip install C:\Users\...\GDAL‑3.4.3‑cp310‑cp310‑win_amd64.whl
57 | ```
58 | 
59 | Run our Jupyter Notebooks and enjoy the process. If you encounter errors post an issue.
60 | 
61 | Feedback and new ideas are welcome.
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/models/UNetFormer_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from einops import rearrange, repeat
  5 | 
  6 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_
  7 | import timm
  8 | 
  9 | 
 10 | class ConvBNReLU(nn.Sequential):
 11 |     def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False):
 12 |         super(ConvBNReLU, self).__init__(
 13 |             nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
 14 |                       dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
 15 |             norm_layer(out_channels),
 16 |             nn.ReLU6()
 17 |         )
 18 | 
 19 | 
 20 | class ConvBN(nn.Sequential):
 21 |     def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False):
 22 |         super(ConvBN, self).__init__(
 23 |             nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
 24 |                       dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
 25 |             norm_layer(out_channels)
 26 |         )
 27 | 
 28 | 
 29 | class Conv(nn.Sequential):
 30 |     def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
 31 |         super(Conv, self).__init__(
 32 |             nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
 33 |                       dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
 34 |         )
 35 | 
 36 | 
 37 | class SeparableConvBNReLU(nn.Sequential):
 38 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1,
 39 |                  norm_layer=nn.BatchNorm2d):
 40 |         super(SeparableConvBNReLU, self).__init__(
 41 |             nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation,
 42 |                       padding=((stride - 1) + dilation * (kernel_size - 1)) // 2,
 43 |                       groups=in_channels, bias=False),
 44 |             norm_layer(out_channels),
 45 |             nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
 46 |             nn.ReLU6()
 47 |         )
 48 | 
 49 | 
 50 | class SeparableConvBN(nn.Sequential):
 51 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1,
 52 |                  norm_layer=nn.BatchNorm2d):
 53 |         super(SeparableConvBN, self).__init__(
 54 |             nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation,
 55 |                       padding=((stride - 1) + dilation * (kernel_size - 1)) // 2,
 56 |                       groups=in_channels, bias=False),
 57 |             norm_layer(out_channels),
 58 |             nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
 59 |         )
 60 | 
 61 | 
 62 | class SeparableConv(nn.Sequential):
 63 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1):
 64 |         super(SeparableConv, self).__init__(
 65 |             nn.Conv2d(in_channels, in_channels, kernel_size, stride=stride, dilation=dilation,
 66 |                       padding=((stride - 1) + dilation * (kernel_size - 1)) // 2,
 67 |                       groups=in_channels, bias=False),
 68 |             nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
 69 |         )
 70 | 
 71 | 
 72 | class Mlp(nn.Module):
 73 |     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
 74 |         super().__init__()
 75 |         out_features = out_features or in_features
 76 |         hidden_features = hidden_features or in_features
 77 |         self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
 78 |         self.act = act_layer()
 79 |         self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
 80 |         self.drop = nn.Dropout(drop, inplace=True)
 81 | 
 82 |     def forward(self, x):
 83 |         x = self.fc1(x)
 84 |         x = self.act(x)
 85 |         x = self.drop(x)
 86 |         x = self.fc2(x)
 87 |         x = self.drop(x)
 88 |         return x
 89 | 
 90 | 
 91 | class GlobalLocalAttention(nn.Module):
 92 |     def __init__(self,
 93 |                  dim=256,
 94 |                  num_heads=16,
 95 |                  qkv_bias=False,
 96 |                  window_size=8,
 97 |                  relative_pos_embedding=True
 98 |                  ):
 99 |         super().__init__()
100 |         self.num_heads = num_heads
101 |         head_dim = dim // self.num_heads
102 |         self.scale = head_dim ** -0.5
103 |         self.ws = window_size
104 | 
105 |         self.qkv = Conv(dim, 3*dim, kernel_size=1, bias=qkv_bias)
106 |         self.local1 = ConvBN(dim, dim, kernel_size=3)
107 |         self.local2 = ConvBN(dim, dim, kernel_size=1)
108 |         self.proj = SeparableConvBN(dim, dim, kernel_size=window_size)
109 | 
110 |         self.attn_x = nn.AvgPool2d(kernel_size=(window_size, 1), stride=1,  padding=(window_size//2 - 1, 0))
111 |         self.attn_y = nn.AvgPool2d(kernel_size=(1, window_size), stride=1, padding=(0, window_size//2 - 1))
112 | 
113 |         self.relative_pos_embedding = relative_pos_embedding
114 | 
115 |         if self.relative_pos_embedding:
116 |             # define a parameter table of relative position bias
117 |             self.relative_position_bias_table = nn.Parameter(
118 |                 torch.zeros((2 * window_size - 1) * (2 * window_size - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
119 | 
120 |             # get pair-wise relative position index for each token inside the window
121 |             coords_h = torch.arange(self.ws)
122 |             coords_w = torch.arange(self.ws)
123 |             coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
124 |             coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
125 |             relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
126 |             relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
127 |             relative_coords[:, :, 0] += self.ws - 1  # shift to start from 0
128 |             relative_coords[:, :, 1] += self.ws - 1
129 |             relative_coords[:, :, 0] *= 2 * self.ws - 1
130 |             relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
131 |             self.register_buffer("relative_position_index", relative_position_index)
132 | 
133 |             trunc_normal_(self.relative_position_bias_table, std=.02)
134 | 
135 |     def pad(self, x, ps):
136 |         _, _, H, W = x.size()
137 |         if W % ps != 0:
138 |             x = F.pad(x, (0, ps - W % ps), mode='reflect')
139 |         if H % ps != 0:
140 |             x = F.pad(x, (0, 0, 0, ps - H % ps), mode='reflect')
141 |         return x
142 | 
143 |     def pad_out(self, x):
144 |         x = F.pad(x, pad=(0, 1, 0, 1), mode='reflect')
145 |         return x
146 | 
147 |     def forward(self, x):
148 |         B, C, H, W = x.shape
149 | 
150 |         local = self.local2(x) + self.local1(x)
151 | 
152 |         x = self.pad(x, self.ws)
153 |         B, C, Hp, Wp = x.shape
154 |         qkv = self.qkv(x)
155 | 
156 |         q, k, v = rearrange(qkv, 'b (qkv h d) (hh ws1) (ww ws2) -> qkv (b hh ww) h (ws1 ws2) d', h=self.num_heads,
157 |                             d=C//self.num_heads, hh=Hp//self.ws, ww=Wp//self.ws, qkv=3, ws1=self.ws, ws2=self.ws)
158 | 
159 |         dots = (q @ k.transpose(-2, -1)) * self.scale
160 | 
161 |         if self.relative_pos_embedding:
162 |             relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
163 |                 self.ws * self.ws, self.ws * self.ws, -1)  # Wh*Ww,Wh*Ww,nH
164 |             relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
165 |             dots += relative_position_bias.unsqueeze(0)
166 | 
167 |         attn = dots.softmax(dim=-1)
168 |         attn = attn @ v
169 | 
170 |         attn = rearrange(attn, '(b hh ww) h (ws1 ws2) d -> b (h d) (hh ws1) (ww ws2)', h=self.num_heads,
171 |                          d=C//self.num_heads, hh=Hp//self.ws, ww=Wp//self.ws, ws1=self.ws, ws2=self.ws)
172 | 
173 |         attn = attn[:, :, :H, :W]
174 | 
175 |         out = self.attn_x(F.pad(attn, pad=(0, 0, 0, 1), mode='reflect')) + \
176 |               self.attn_y(F.pad(attn, pad=(0, 1, 0, 0), mode='reflect'))
177 | 
178 |         out = out + local
179 |         out = self.pad_out(out)
180 |         out = self.proj(out)
181 |         # print(out.size())
182 |         out = out[:, :, :H, :W]
183 | 
184 |         return out
185 | 
186 | 
187 | class Block(nn.Module):
188 |     def __init__(self, dim=256, num_heads=16,  mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.,
189 |                  drop_path=0., act_layer=nn.ReLU6, norm_layer=nn.BatchNorm2d, window_size=8):
190 |         super().__init__()
191 |         self.norm1 = norm_layer(dim)
192 |         self.attn = GlobalLocalAttention(dim, num_heads=num_heads, qkv_bias=qkv_bias, window_size=window_size)
193 | 
194 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
195 |         mlp_hidden_dim = int(dim * mlp_ratio)
196 |         self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, out_features=dim, act_layer=act_layer, drop=drop)
197 |         self.norm2 = norm_layer(dim)
198 | 
199 |     def forward(self, x):
200 | 
201 |         x = x + self.drop_path(self.attn(self.norm1(x)))
202 |         x = x + self.drop_path(self.mlp(self.norm2(x)))
203 | 
204 |         return x
205 | 
206 | 
207 | class WF(nn.Module):
208 |     def __init__(self, in_channels=128, decode_channels=128, eps=1e-8):
209 |         super(WF, self).__init__()
210 |         self.pre_conv = Conv(in_channels, decode_channels, kernel_size=1)
211 | 
212 |         self.weights = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True)
213 |         self.eps = eps
214 |         self.post_conv = ConvBNReLU(decode_channels, decode_channels, kernel_size=3)
215 | 
216 |     def forward(self, x, res):
217 |         x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False)
218 |         weights = nn.ReLU()(self.weights)
219 |         fuse_weights = weights / (torch.sum(weights, dim=0) + self.eps)
220 |         x = fuse_weights[0] * self.pre_conv(res) + fuse_weights[1] * x
221 |         x = self.post_conv(x)
222 |         return x
223 | 
224 | 
225 | class FeatureRefinementHead(nn.Module):
226 |     def __init__(self, in_channels=64, decode_channels=64):
227 |         super().__init__()
228 |         self.pre_conv = Conv(in_channels, decode_channels, kernel_size=1)
229 | 
230 |         self.weights = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True)
231 |         self.eps = 1e-8
232 |         self.post_conv = ConvBNReLU(decode_channels, decode_channels, kernel_size=3)
233 | 
234 |         self.pa = nn.Sequential(nn.Conv2d(decode_channels, decode_channels, kernel_size=3, padding=1, groups=decode_channels),
235 |                                 nn.Sigmoid())
236 |         self.ca = nn.Sequential(nn.AdaptiveAvgPool2d(1),
237 |                                 Conv(decode_channels, decode_channels//16, kernel_size=1),
238 |                                 nn.ReLU6(),
239 |                                 Conv(decode_channels//16, decode_channels, kernel_size=1),
240 |                                 nn.Sigmoid())
241 | 
242 |         self.shortcut = ConvBN(decode_channels, decode_channels, kernel_size=1)
243 |         self.proj = SeparableConvBN(decode_channels, decode_channels, kernel_size=3)
244 |         self.act = nn.ReLU6()
245 | 
246 |     def forward(self, x, res):
247 |         x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False)
248 |         weights = nn.ReLU()(self.weights)
249 |         fuse_weights = weights / (torch.sum(weights, dim=0) + self.eps)
250 |         x = fuse_weights[0] * self.pre_conv(res) + fuse_weights[1] * x
251 |         x = self.post_conv(x)
252 |         shortcut = self.shortcut(x)
253 |         pa = self.pa(x) * x
254 |         ca = self.ca(x) * x
255 |         x = pa + ca
256 |         x = self.proj(x) + shortcut
257 |         x = self.act(x)
258 | 
259 |         return x
260 | 
261 | 
262 | class AuxHead(nn.Module):
263 | 
264 |     def __init__(self, in_channels=64, num_classes=8):
265 |         super().__init__()
266 |         self.conv = ConvBNReLU(in_channels, in_channels)
267 |         self.drop = nn.Dropout(0.1)
268 |         self.conv_out = Conv(in_channels, num_classes, kernel_size=1)
269 | 
270 |     def forward(self, x, h, w):
271 |         feat = self.conv(x)
272 |         feat = self.drop(feat)
273 |         feat = self.conv_out(feat)
274 |         feat = F.interpolate(feat, size=(h, w), mode='bilinear', align_corners=False)
275 |         return feat
276 | 
277 | 
278 | class Decoder(nn.Module):
279 |     def __init__(self,
280 |                  encoder_channels=(64, 128, 256, 512),
281 |                  decode_channels=64,
282 |                  dropout=0.1,
283 |                  window_size=8,
284 |                  num_classes=6):
285 |         super(Decoder, self).__init__()
286 | 
287 |         self.pre_conv = ConvBN(encoder_channels[-1], decode_channels, kernel_size=1)
288 |         self.b4 = Block(dim=decode_channels, num_heads=8, window_size=window_size)
289 | 
290 |         self.b3 = Block(dim=decode_channels, num_heads=8, window_size=window_size)
291 |         self.p3 = WF(encoder_channels[-2], decode_channels)
292 | 
293 |         self.b2 = Block(dim=decode_channels, num_heads=8, window_size=window_size)
294 |         self.p2 = WF(encoder_channels[-3], decode_channels)
295 | 
296 |         if self.training:
297 |             self.up4 = nn.UpsamplingBilinear2d(scale_factor=4)
298 |             self.up3 = nn.UpsamplingBilinear2d(scale_factor=2)
299 |             self.aux_head = AuxHead(decode_channels, num_classes)
300 | 
301 |         self.p1 = FeatureRefinementHead(encoder_channels[-4], decode_channels)
302 | 
303 |         self.segmentation_head = nn.Sequential(ConvBNReLU(decode_channels, decode_channels),
304 |                                                nn.Dropout2d(p=dropout, inplace=True),
305 |                                                Conv(decode_channels, num_classes, kernel_size=1))
306 |         self.init_weight()
307 | 
308 |     def forward(self, res1, res2, res3, res4, h, w):
309 |         if self.training:
310 |             x = self.b4(self.pre_conv(res4))
311 |             h4 = self.up4(x)
312 | 
313 |             x = self.p3(x, res3)
314 |             x = self.b3(x)
315 |             h3 = self.up3(x)
316 | 
317 |             x = self.p2(x, res2)
318 |             x = self.b2(x)
319 |             h2 = x
320 |             x = self.p1(x, res1)
321 |             x = self.segmentation_head(x)
322 |             x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=False)
323 | 
324 |             ah = h4 + h3 + h2
325 |             ah = self.aux_head(ah, h, w)
326 | 
327 |             return x, ah
328 |         else:
329 |             x = self.b4(self.pre_conv(res4))
330 |             x = self.p3(x, res3)
331 |             x = self.b3(x)
332 | 
333 |             x = self.p2(x, res2)
334 |             x = self.b2(x)
335 | 
336 |             x = self.p1(x, res1)
337 | 
338 |             x = self.segmentation_head(x)
339 |             x = F.interpolate(x, size=(h, w), mode='bilinear', align_corners=False)
340 | 
341 |             return x
342 | 
343 |     def init_weight(self):
344 |         for m in self.children():
345 |             if isinstance(m, nn.Conv2d):
346 |                 nn.init.kaiming_normal_(m.weight, a=1)
347 |                 if m.bias is not None:
348 |                     nn.init.constant_(m.bias, 0)
349 | 
350 | 
351 | class UNetFormer(nn.Module):
352 |     def __init__(self,
353 |                  decode_channels=64,
354 |                  dropout=0.5,
355 |                  backbone_name='swsl_resnet18',
356 |                  pretrained=True,
357 |                  window_size=8,
358 |                  num_classes=1
359 |                  ):
360 |         super().__init__()
361 | 
362 |         self.backbone = timm.create_model(backbone_name, features_only=True, output_stride=32,
363 |                                           out_indices=(1, 2, 3, 4), pretrained=pretrained)
364 |         encoder_channels = self.backbone.feature_info.channels()
365 | 
366 |         self.decoder = Decoder(encoder_channels, decode_channels, dropout, window_size, num_classes)
367 | 
368 |     def forward(self, x):
369 |         h, w = x.size()[-2:]
370 |         res1, res2, res3, res4 = self.backbone(x)
371 |         if self.training:
372 |             x, ah = self.decoder(res1, res2, res3, res4, h, w)
373 |             return x
374 |         else:
375 |             x = self.decoder(res1, res2, res3, res4, h, w)
376 |             return x


--------------------------------------------------------------------------------
/models/__pycache__/DCSwin_model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/DCSwin_model.cpython-310.pyc


--------------------------------------------------------------------------------
/models/__pycache__/FTUNetFormer_model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/FTUNetFormer_model.cpython-310.pyc


--------------------------------------------------------------------------------
/models/__pycache__/UNetFormer_model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/models/__pycache__/UNetFormer_model.cpython-310.pyc


--------------------------------------------------------------------------------
/plots/compare-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1.png


--------------------------------------------------------------------------------
/plots/compare-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-10.png


--------------------------------------------------------------------------------
/plots/compare-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-100.png


--------------------------------------------------------------------------------
/plots/compare-1000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1000.png


--------------------------------------------------------------------------------
/plots/compare-1100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1100.png


--------------------------------------------------------------------------------
/plots/compare-1200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-1200.png


--------------------------------------------------------------------------------
/plots/compare-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-2.png


--------------------------------------------------------------------------------
/plots/compare-22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-22.png


--------------------------------------------------------------------------------
/plots/compare-250.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-250.png


--------------------------------------------------------------------------------
/plots/compare-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-3.png


--------------------------------------------------------------------------------
/plots/compare-33.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-33.png


--------------------------------------------------------------------------------
/plots/compare-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-4.png


--------------------------------------------------------------------------------
/plots/compare-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-5.png


--------------------------------------------------------------------------------
/plots/compare-500.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-500.png


--------------------------------------------------------------------------------
/plots/compare-600.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-600.png


--------------------------------------------------------------------------------
/plots/compare-750.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-750.png


--------------------------------------------------------------------------------
/plots/compare-800.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-800.png


--------------------------------------------------------------------------------
/plots/compare-900.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/compare-900.png


--------------------------------------------------------------------------------
/plots/dcswin-25-epochs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/dcswin-25-epochs.png


--------------------------------------------------------------------------------
/plots/ft-unet-former-25-epochs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/ft-unet-former-25-epochs.png


--------------------------------------------------------------------------------
/plots/unet-25-epochs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/unet-25-epochs.png


--------------------------------------------------------------------------------
/plots/unet-former-25-epochs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/plots/unet-former-25-epochs.png


--------------------------------------------------------------------------------
/predictions/bergen_-5943_1104.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5943_1104.tif


--------------------------------------------------------------------------------
/predictions/bergen_-5944_1104.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5944_1104.tif


--------------------------------------------------------------------------------
/predictions/bergen_-5948_1107.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/bergen_-5948_1107.tif


--------------------------------------------------------------------------------
/predictions/kristiansand_-4712_-1562.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1562.tif


--------------------------------------------------------------------------------
/predictions/kristiansand_-4712_-1563.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1563.tif


--------------------------------------------------------------------------------
/predictions/kristiansand_-4712_-1568.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/kristiansand_-4712_-1568.tif


--------------------------------------------------------------------------------
/predictions/oslo_-3133_244.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/oslo_-3133_244.tif


--------------------------------------------------------------------------------
/predictions/tromso_923_11083.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11083.tif


--------------------------------------------------------------------------------
/predictions/tromso_923_11084.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11084.tif


--------------------------------------------------------------------------------
/predictions/tromso_923_11086.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/predictions/tromso_923_11086.tif


--------------------------------------------------------------------------------
/projectRegularization/INFO.txt:
--------------------------------------------------------------------------------
1 | projectRegularization was downloaded from:
2 | https://github.com/zorzi-s/projectRegularization
3 | 
4 | Download and the pretrained_weights from:
5 | https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu
6 | 
7 | Save them to folder: pretrained_weights
8 | 


--------------------------------------------------------------------------------
/projectRegularization/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | SOFTWARE LICENSE AGREEMENT
 3 | 
 4 | ICG Software – 2021, all rights reserved, hereinafter "the Software".
 5 | 
 6 | This software has been developed by researchers of ICG (Institute of Computer Graphics and Vision).
 7 | 
 8 | Institute of Computer Graphics and Vision (ICG), Inffeldgasse 16/II,
 9 | 8010 Graz, Austria
10 | 
11 | ICG holds all the ownership rights on the Software.
12 | 
13 | The Software is still being currently developed. It is the ICG’s aim for the Software 
14 | to be used by the scientific community so as to test it and, evaluate it so that ICG may improve it.
15 | 
16 | For these reasons ICG has decided to distribute the Software.
17 | 
18 | ICG grants to the academic user, a free of charge, without right to sub license non-exclusive right 
19 | to use the Software for research purposes for a period of one (1) year from the date of the download 
20 | of the source code. Any other use without of prior consent of ICG is prohibited.
21 | 
22 | The academic user explicitly acknowledges having received from ICG all information allowing him 
23 | to appreciate the adequacy between of the Software and his needs and to undertake all necessary 
24 | precautions for his execution and use.
25 | 
26 | The Software is provided only as a source.
27 | 
28 | In case of using the Software for a publication or other results obtained through the use of the Software, 
29 | user should cite the Software as follows :
30 | 
31 | @inproceedings{zorzi2021machine,
32 |   title={Machine-learned regularization and polygonization of building segmentation masks},
33 |   author={Zorzi, Stefano and Bittner, Ksenia and Fraundorfer, Friedrich},
34 |   booktitle={2020 25th International Conference on Pattern Recognition (ICPR)},
35 |   pages={3098--3105},
36 |   year={2021},
37 |   organization={IEEE}
38 | }
39 | 
40 | Every user of the Software could communicate to the developers [stefano.zorzi@icg.tugraz.at] 
41 | his or her remarks as to the use of the Software.
42 | 
43 | THE USER CANNOT USE, EXPLOIT OR COMMERCIALLY DISTRIBUTE THE SOFTWARE WITHOUT PRIOR AND EXPLICIT CONSENT 
44 | OF ICG (fraundorfer@icg.tugraz.at). ANY SUCH ACTION WILL CONSTITUTE A FORGERY.
45 | 
46 | THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES,
47 | WITH REGARDS TO COMMERCIAL USE, PROFESSIONAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALIZATION OR ADAPTATION.
48 | 
49 | UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL ICG OR THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 
50 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
51 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
52 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, 
53 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
54 | 


--------------------------------------------------------------------------------
/projectRegularization/README.md:
--------------------------------------------------------------------------------
 1 | # Regularization of Building Boundaries in Satellite and Aerial Images
 2 | This repository contains the implementation for our publication "Machine-learned regularization and polygonization of building segmentation masks", ICPR 2021. 
 3 | If you use this implementation please cite the following publication:
 4 | 
 5 | ~~~
 6 | @inproceedings{zorzi2021machine,
 7 |   title={Machine-learned regularization and polygonization of building segmentation masks},
 8 |   author={Zorzi, Stefano and Bittner, Ksenia and Fraundorfer, Friedrich},
 9 |   booktitle={2020 25th International Conference on Pattern Recognition (ICPR)},
10 |   pages={3098--3105},
11 |   year={2021},
12 |   organization={IEEE}
13 | }
14 | ~~~
15 | and
16 | ~~~
17 | @inproceedings{zorzi2019regularization,
18 |   title={Regularization of building boundaries in satellite images using adversarial and regularized losses},
19 |   author={Zorzi, Stefano and Fraundorfer, Friedrich},
20 |   booktitle={IGARSS 2019-2019 IEEE International Geoscience and Remote Sensing Symposium},
21 |   pages={5140--5143},
22 |   year={2019},
23 |   organization={IEEE}
24 | }
25 | ~~~
26 | 
27 | <p align="center"><img width=100% src="README.png"></p>
28 | 
29 | Explanatory video of the approach:
30 | 
31 | [![Watch the video](https://img.youtube.com/vi/07YQOlwIOMs/0.jpg)](https://www.youtube.com/watch?v=07YQOlwIOMs)
32 | 
33 | # Dependencies
34 | 
35 | *  cuda 10.2
36 | *  pytorch >= 1.3
37 | *  opencv
38 | *  gdal
39 | 
40 | # Running the implementation
41 | After installing all of the required dependencies above you can download the pretrained weights from [here](https://drive.google.com/drive/folders/1IPrDpvFq9ODW7UtPAJR_T-gGzxDat_uu?usp=sharing).
42 | 
43 | Unzip the archive and place *saved_models_gan* folder in the main *projectRegularization* directory.
44 | 
45 | Please note that the polygonization step is not yet available!
46 |  
47 | ## Evaluation
48 | Modify *variables.py* accordingly, then run the prediction issuing the command
49 | 
50 | ~~~
51 | python regularize.py
52 | ~~~
53 | 
54 | ## Training
55 | Modify *variables.py* accordingly, then run the training issuing the command
56 | 
57 | ~~~
58 | python train_gan_net.py
59 | ~~~
60 | 


--------------------------------------------------------------------------------
/projectRegularization/README.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/README.png


--------------------------------------------------------------------------------
/projectRegularization/__pycache__/crf_loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/crf_loss.cpython-310.pyc


--------------------------------------------------------------------------------
/projectRegularization/__pycache__/data_loader_gan.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/data_loader_gan.cpython-310.pyc


--------------------------------------------------------------------------------
/projectRegularization/__pycache__/models.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/models.cpython-310.pyc


--------------------------------------------------------------------------------
/projectRegularization/__pycache__/training_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/training_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/projectRegularization/__pycache__/variables.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/__pycache__/variables.cpython-310.pyc


--------------------------------------------------------------------------------
/projectRegularization/crf_loss.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import numpy as np
  4 | import math
  5 | import itertools
  6 | import time
  7 | import datetime
  8 | import sys
  9 | from math import exp
 10 | import random
 11 | 
 12 | #from torchvision.utils import save_image
 13 | #from torchvision import datasets
 14 | 
 15 | from torch.utils.data import DataLoader
 16 | from torch.autograd import Variable
 17 | 
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | import torch
 21 | 
 22 | kernel_size = 9 #gaussian kernel dimension
 23 | dilation = 1 #cheating :) The "real" dimension of the gaussian kernel is kernel size, but the "effective" dimension is (kernel_size*dilation + 1)
 24 | padding = (kernel_size // 2) * dilation #do not touch this
 25 | bs = 4 #batch size
 26 | win = 256 #window size
 27 | 
 28 | sigma_X = 3.0 #for distance gaussian
 29 | sigma_I = 0.1 #for RGB/grayscale gaussian
 30 | 
 31 | sample_interval = 20 # sample image every
 32 | 
 33 | class kernel_loss(torch.nn.Module):
 34 | 
 35 | 	def sub_kernel(self):
 36 | 		filters = kernel_size * kernel_size
 37 | 		middle = kernel_size // 2
 38 | 		kernel = Variable(torch.zeros((filters, 1, kernel_size, kernel_size))).cuda()
 39 | 		for i in range(kernel_size):
 40 | 			for j in range(kernel_size):
 41 | 				kernel[i*kernel_size+j, 0, i, j] = -1
 42 | 				kernel[i*kernel_size+j, 0, middle, middle] = kernel[i*kernel_size+j, 0, middle, middle] + 1
 43 | 		return kernel
 44 | 	
 45 | 	def dist_kernel(self):
 46 | 		filters = kernel_size * kernel_size
 47 | 		middle = kernel_size // 2
 48 | 		kernel = Variable(torch.zeros((bs, filters, 1, 1))).cuda()
 49 | 
 50 | 		for i in range(kernel_size):
 51 | 			for j in range(kernel_size):
 52 | 				ii = i - middle
 53 | 				jj = j - middle
 54 | 				distance = pow(ii,2) + pow(jj,2)
 55 | 				kernel[:, i*kernel_size+j, 0, 0] = exp(-distance / pow(sigma_X,2))
 56 | 		#print(kernel.view(4,1,kernel_size,kernel_size))
 57 | 		return kernel
 58 | 	
 59 | 	def central_kernel(self):
 60 | 		filters = kernel_size * kernel_size
 61 | 		middle = kernel_size // 2
 62 | 		kernel = Variable(torch.zeros((filters, 1, kernel_size, kernel_size))).cuda()
 63 | 		for i in range(kernel_size):
 64 | 			for j in range(kernel_size):
 65 | 				kernel[i*kernel_size+j, 0, middle, middle] = 1
 66 | 		return kernel
 67 | 	
 68 | 	def select_kernel(self):
 69 | 		filters = kernel_size * kernel_size
 70 | 		middle = kernel_size // 2
 71 | 		kernel = Variable(torch.zeros((filters, 1, kernel_size, kernel_size))).cuda()
 72 | 		for i in range(kernel_size):
 73 | 			for j in range(kernel_size):
 74 | 				kernel[i*kernel_size+j, 0, i, j] = 1
 75 | 		return kernel
 76 | 		
 77 | 	def color_tensor(self, x):
 78 | 		result = Variable(torch.zeros((bs, kernel_size*kernel_size, win-2*padding, win-2*padding))).cuda()
 79 | 
 80 | 		for i in range(x.shape[1]):
 81 | 			channel = x[:,i,:,:].unsqueeze(1)
 82 | 			sub = nn.Conv2d(in_channels=1, out_channels=kernel_size*kernel_size, kernel_size=kernel_size, bias=False, padding=0, dilation=dilation)
 83 | 			sub.weight.data = self.sub_matrix
 84 | 			color = sub(channel)
 85 | 			color = torch.pow(color,2)
 86 | 			result = result + color
 87 | 			
 88 | 		result = torch.exp(-result / pow(sigma_I,2))
 89 | 		return result
 90 | 
 91 | 	def probability_tensor(self, y):
 92 | 		conv = nn.Conv2d(in_channels=1, out_channels=kernel_size*kernel_size, kernel_size=kernel_size, bias=False, padding=0, dilation=dilation)
 93 | 		conv.weight.data = self.select_matrix
 94 | 		prob = conv(y)
 95 | 		return prob
 96 | 
 97 | 	#def probability_central(self, y):
 98 | 	#	conv = nn.Conv2d(in_channels=1, out_channels=kernel_size*kernel_size, kernel_size=kernel_size, bias=False, padding=padding)
 99 | 	#	conv.weight.data = self.one_matrix
100 | 	#	prob = conv(y)
101 | 	#	return prob
102 | 
103 | 	def __init__(self):
104 | 		super(kernel_loss,self).__init__()
105 | 		#self.softmax = nn.Softmax(dim=1)
106 | 		self.dist_tensor = self.dist_kernel()
107 | 		#self.one_matrix = self.central_kernel()
108 | 		self.select_matrix = self.select_kernel()
109 | 		self.sub_matrix = self.sub_kernel() #shape: [filters, 1, h, w]
110 | 
111 | 		
112 | 	def forward(self,x,y):
113 | 		"""
114 | 		x --> Image. It can also have just 1 channel (grayscale). Values between 0 and 1
115 | 		y --> Mask. Values between 0 and 1
116 | 		"""
117 | 		#y = self.softmax(y)
118 | 		y0 = y[:,0,:,:].unsqueeze(1) #build: 0, background: 1, default 1
119 | 		y1 = y[:,1,:,:].unsqueeze(1) #build: 1, background: 0, default 0
120 | 		y0p = y0[:,:,padding:-padding,padding:-padding]
121 | 		y1p = y1[:,:,padding:-padding,padding:-padding]
122 | 		
123 | 		W = self.color_tensor(x)
124 | 		W = (W * self.dist_tensor.expand_as(W))
125 | 
126 | 		potts_loss_0 = y0p.expand_as(W) * W * self.probability_tensor(y1)
127 | 		potts_loss_1 = y1p.expand_as(W) * W * self.probability_tensor(y0)
128 | 
129 | 		numel = potts_loss_0.numel()
130 | 		#ncut_loss_0 = (potts_loss_0 / (self.probability_tensor(y0) * W)).mean()
131 | 		#ncut_loss_1 = (potts_loss_1 / (self.probability_tensor(y1) * W)).mean()
132 | 
133 | 		"""
134 | 		if random.randint(0,sample_interval) == 0:
135 | 			r = random.randint(0,20)
136 | 
137 | 			img = torch.mean(W, dim=1).unsqueeze(1)
138 | 			#amin = torch.min(img)
139 | 			#amax = torch.max(img)
140 | 			#img = (img - amin) / (amax - amin)
141 | 			save_image(img, "./debug/%d_img.png" % r, nrow=2)
142 | 
143 | 			#img2 = torch.mean(potts_loss_0, dim=1).unsqueeze(1)
144 | 			#amin = torch.min(img2)
145 | 			#amax = torch.max(img2)
146 | 			#img2 = (img2 - amin) / (amax - amin)
147 | 			#save_image(img2, "./debug/%d_b.png" % r, nrow=2)
148 | 
149 | 			img3 = torch.mean(potts_loss_0, dim=1).unsqueeze(1)
150 | 			#amin = torch.min(img3)
151 | 			#amax = torch.max(img3)
152 | 			#img3 = (img3 - amin) / (amax - amin)
153 | 			save_image(img3, "./debug/%d_loss.png" % r, nrow=2)
154 | 
155 | 			#img4 = torch.mean(loss_matrix, dim=1).unsqueeze(1)
156 | 			##amin = torch.min(img4)
157 | 			##amax = torch.max(img4)
158 | 			##img4 = (img4 - amin) / (amax - amin)
159 | 			#save_image(img4, "./debug/%d_d.png" % r, nrow=2)
160 | 			save_image(x, "./debug/%d_map.png" % r, nrow=2)
161 | 		"""
162 | 		
163 | 		potts_loss_0 = (potts_loss_0).mean()
164 | 		potts_loss_1 = (potts_loss_1).mean()
165 | 		potts_loss = potts_loss_0 + potts_loss_1
166 | 
167 | 		return potts_loss
168 | 
169 | 		"""
170 | 		#ncut_loss_0 = potts_loss_0 / (self.probability_tensor(y0) * W).mean()
171 | 		#ncut_loss_1 = potts_loss_1 / (self.probability_tensor(y1) * W).mean()
172 | 		ncut_loss_0 = potts_loss_0 / (y0p.expand_as(W) * W).mean()
173 | 		ncut_loss_1 = potts_loss_1 / (y1p.expand_as(W) * W).mean()
174 | 
175 | 		#ncut_loss_0 = ncut_loss_0.mean()
176 | 		#ncut_loss_1 = ncut_loss_1.mean()
177 | 		ncut_loss = ncut_loss_0 + ncut_loss_1
178 | 
179 | 		#potts_loss = potts_loss_0 + potts_loss_1
180 | 		#ncut_loss = ncut_loss_0 + ncut_loss_1
181 | 
182 | 		return (potts_loss, ncut_loss, numel)
183 | 		"""
184 | 
185 | 


--------------------------------------------------------------------------------
/projectRegularization/data_loader_gan.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | from glob import glob
  4 | from tqdm import tqdm
  5 | import random
  6 | from skimage import io
  7 | from skimage.segmentation import mark_boundaries
  8 | from skimage.transform import rotate
  9 | import variables as var
 10 | 
 11 | TEST = False 
 12 | 
 13 | def to_categorical(y, num_classes=None, dtype='float32'):
 14 | 
 15 |     y = np.array(y, dtype='int')
 16 |     input_shape = y.shape
 17 |     if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
 18 |         input_shape = tuple(input_shape[:-1])
 19 |     y = y.ravel()
 20 |     if not num_classes:
 21 |         num_classes = np.max(y) + 1
 22 |     n = y.shape[0]
 23 |     categorical = np.zeros((n, num_classes), dtype=dtype)
 24 |     categorical[np.arange(n), y] = 1
 25 |     output_shape = input_shape + (num_classes,)
 26 |     categorical = np.reshape(categorical, output_shape)
 27 |     return categorical
 28 | 
 29 | class DataLoader():
 30 | 
 31 |     def __init__(self, ws=512, nb=10000, bs=8):
 32 |         self.nb = nb
 33 |         self.bs = bs
 34 |         self.ws = ws
 35 | 
 36 |         #self.rgb_files = self.rgb_files[:10]
 37 |         #self.dsm_files = self.dsm_files[:10]
 38 |         #self.gti_files = self.gti_files[:10]
 39 | 
 40 |         self.load_data()
 41 |         self.num_tiles = len(self.rgb_imgs)
 42 |         self.sliding_index = 0
 43 | 
 44 |     def generator(self):
 45 |         for _ in range(self.nb):
 46 |             batch_rgb = []
 47 |             batch_gti = []
 48 |             batch_seg = []
 49 |             for _ in range(self.bs):
 50 |                 rgb, gti, seg = self.extract_image()
 51 | 
 52 |                 batch_rgb.append(rgb)
 53 | 
 54 |                 # the ground truth is categorized
 55 |                 gti = to_categorical(gti != 0, 2)
 56 |                 batch_gti.append(gti)
 57 | 
 58 |                 # the segmentation is categorized
 59 |                 seg = to_categorical(seg != 0, 2)
 60 |                 batch_seg.append(seg)
 61 | 
 62 |             batch_rgb = np.asarray(batch_rgb)
 63 |             batch_gti = np.asarray(batch_gti)
 64 |             batch_seg = np.asarray(batch_seg)
 65 |             batch_rgb = batch_rgb / 255.0
 66 | 
 67 |             #batch_gti = batch_gti[:,:,:,np.newaxis] / 255.0
 68 | 
 69 |             yield (batch_rgb, batch_gti, batch_seg)
 70 | 
 71 | 
 72 |     def test_shape(self, a):
 73 |         ri = a.shape[0] % self.ws
 74 |         rj = a.shape[1] % self.ws
 75 |         return a[:-ri,:-rj]
 76 | 
 77 | 
 78 |     def random_hsv(self, img, value_h=30, value_s=30, value_v=30):
 79 |         hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
 80 |         h, s, v = cv2.split(hsv)
 81 | 
 82 |         h = np.int16(h)
 83 |         s = np.int16(s)
 84 |         v = np.int16(v)
 85 | 
 86 |         h += value_h
 87 |         h[h < 0] = 0
 88 |         h[h > 255] = 255
 89 | 
 90 |         s += value_s
 91 |         s[s < 0] = 0
 92 |         s[s > 255] = 255
 93 | 
 94 |         v += value_v
 95 |         v[v < 0] = 0
 96 |         v[v > 255] = 255
 97 | 
 98 |         h = np.uint8(h)
 99 |         s = np.uint8(s)
100 |         v = np.uint8(v)
101 | 
102 |         final_hsv = cv2.merge((h, s, v))
103 |         img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
104 |         return img
105 | 
106 | 
107 |     def extract_image(self, mode="sequential"):
108 |         if mode == "random":
109 |             rand_t = random.randint(0, self.num_tiles-1)
110 |         else:
111 |             if self.sliding_index < self.num_tiles:
112 |                 rand_t = self.sliding_index
113 |                 self.sliding_index = self.sliding_index + 1
114 |             else:
115 |                 rand_t = 0
116 |                 self.sliding_index = 0
117 | 
118 |         rgb = self.rgb_imgs[rand_t].copy()
119 |         gti = self.gti_imgs[rand_t].copy()
120 |         seg = self.seg_imgs[rand_t].copy()
121 | 
122 |         h = rgb.shape[1]
123 |         w = rgb.shape[0]
124 | 
125 |         void = True
126 |         while void:
127 |             rot = random.randint(0,90)
128 |             ri = random.randint(0, int(h-self.ws*2))
129 |             rj = random.randint(0, int(w-self.ws*2))
130 |             win_rgb = rgb[ri:ri+int(self.ws*2), rj:rj+int(self.ws*2)]
131 |             win_gti = gti[ri:ri+int(self.ws*2), rj:rj+int(self.ws*2)]
132 |             win_seg = seg[ri:ri+int(self.ws*2), rj:rj+int(self.ws*2)]
133 |             
134 |             win_rgb = np.uint8(rotate(win_rgb, rot, resize=False, preserve_range=True))
135 |             win_gti = np.uint8(rotate(win_gti, rot, resize=False, preserve_range=True))
136 |             win_seg = np.uint8(rotate(win_seg, rot, resize=False, preserve_range=True))
137 |             
138 |             win_rgb = win_rgb[self.ws//2:-self.ws//2, self.ws//2:-self.ws//2]
139 |             win_gti = win_gti[self.ws//2:-self.ws//2, self.ws//2:-self.ws//2]
140 |             win_seg = win_seg[self.ws//2:-self.ws//2, self.ws//2:-self.ws//2]
141 |             
142 |             if np.count_nonzero(win_seg):
143 |             	void = False
144 | 
145 |         # Perform some data augmentation
146 |         rot = random.randint(0,3)
147 |         win_rgb = np.rot90(win_rgb, k=rot)
148 |         win_gti = np.rot90(win_gti, k=rot)
149 |         win_seg = np.rot90(win_seg, k=rot)
150 |         if random.randint(0,1) == 1:
151 |             win_rgb = np.fliplr(win_rgb)
152 |             win_gti = np.fliplr(win_gti)
153 |             win_seg = np.fliplr(win_seg)
154 | 
155 |         r_h = random.randint(-20,20)
156 |         r_s = random.randint(-20,20)
157 |         r_v = random.randint(-20,20)
158 |         win_rgb = self.random_hsv(win_rgb, r_h, r_s, r_v)
159 | 
160 |         win_rgb = win_rgb.astype(np.float32)
161 |         win_gti = win_gti.astype(np.float32)
162 |         win_seg = win_seg.astype(np.float32)
163 |         return (win_rgb, win_gti, win_seg)
164 | 
165 |         
166 |     def load_data(self):
167 |         self.rgb_imgs = []
168 |         self.gti_imgs = []
169 |         self.seg_imgs = []
170 | 
171 |         rgb_files = glob(var.DATASET_RGB)
172 |         gti_files = glob(var.DATASET_GTI)
173 |         seg_files = glob(var.DATASET_SEG)
174 | 
175 |         rgb_files.sort()
176 |         gti_files.sort()
177 |         seg_files.sort()
178 | 
179 |         combined = list(zip(rgb_files, gti_files, seg_files))
180 |         random.shuffle(combined)
181 | 
182 |         rgb_files[:], gti_files[:], seg_files[:] = zip(*combined)
183 | 
184 |         if TEST:
185 |             rgb_files = rgb_files[:4]
186 |             gti_files = gti_files[:4]
187 |             seg_files = seg_files[:4]
188 | 
189 |         for rgb_name, gti_name, seg_name in tqdm(zip(rgb_files, gti_files, seg_files), total=len(rgb_files), desc="Loading dataset into RAM"):
190 | 
191 |             tmp = io.imread(rgb_name)
192 |             tmp = tmp.astype(np.uint8)
193 |             self.rgb_imgs.append(tmp)
194 | 
195 |             tmp = io.imread(gti_name)
196 |             tmp = tmp.astype(np.uint8)
197 |             self.gti_imgs.append(tmp)
198 | 
199 |             tmp = io.imread(seg_name)
200 |             tmp = tmp.astype(np.uint8)
201 |             self.seg_imgs.append(tmp)
202 | 
203 | 


--------------------------------------------------------------------------------
/projectRegularization/gdal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/projectRegularization/gdal


--------------------------------------------------------------------------------
/projectRegularization/models.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch
  4 | 
  5 | 
  6 | def weights_init_normal(m):
  7 |     classname = m.__class__.__name__
  8 |     if classname.find("Conv") != -1:
  9 |         torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
 10 |         if hasattr(m, "bias") and m.bias is not None:
 11 |             torch.nn.init.constant_(m.bias.data, 0.0)
 12 |     elif classname.find("BatchNorm2d") != -1:
 13 |         torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
 14 |         torch.nn.init.constant_(m.bias.data, 0.0)
 15 | 
 16 | 
 17 | 
 18 | class ResidualBlock(nn.Module):
 19 |     def __init__(self, in_features):
 20 |         super(ResidualBlock, self).__init__()
 21 | 
 22 |         self.block = nn.Sequential(
 23 |             #nn.ReflectionPad2d(1),
 24 |             nn.Conv2d(in_features, in_features, 3, stride=1, padding=1),
 25 |             nn.InstanceNorm2d(in_features),
 26 |             nn.ReLU(inplace=True),
 27 |             #nn.ReflectionPad2d(1),
 28 |             nn.Conv2d(in_features, in_features, 3, stride=1, padding=1),
 29 |             nn.InstanceNorm2d(in_features),
 30 |             nn.ReLU(inplace=True),
 31 |         )
 32 | 
 33 |     def forward(self, x):
 34 |         return x + self.block(x)
 35 | 
 36 | 
 37 | 
 38 | class GeneratorResNet(nn.Module):
 39 |     def __init__(self, num_residual_blocks=8, in_features=256):
 40 |         super(GeneratorResNet, self).__init__()
 41 | 
 42 |         out_features = in_features
 43 | 
 44 |         model = []
 45 | 
 46 |         # Residual blocks
 47 |         for _ in range(num_residual_blocks):
 48 |             model += [ResidualBlock(out_features)]
 49 | 
 50 |         # Upsampling
 51 |         for _ in range(2):
 52 |             out_features //= 2
 53 |             model += [
 54 |                 nn.Upsample(scale_factor=2),
 55 |                 nn.Conv2d(in_features, out_features, 3, stride=1, padding=1),
 56 |                 nn.InstanceNorm2d(out_features),
 57 |                 nn.ReLU(inplace=True),
 58 |                 nn.Conv2d(out_features, out_features, 3, stride=1, padding=1),
 59 |                 nn.InstanceNorm2d(out_features),
 60 |                 nn.ReLU(inplace=True),
 61 |                 nn.Conv2d(out_features, out_features, 3, stride=1, padding=1),
 62 |                 nn.InstanceNorm2d(out_features),
 63 |                 nn.ReLU(inplace=True),
 64 |             ]
 65 |             in_features = out_features
 66 | 
 67 |         # Output layer
 68 |         #model += [nn.ReflectionPad2d(2), nn.Conv2d(out_features, 2, 7), nn.Softmax()]
 69 |         model += [nn.Conv2d(out_features, 2, 7, stride=1, padding=3), nn.Sigmoid()]
 70 | 
 71 |         self.model = nn.Sequential(*model)
 72 | 
 73 |     def forward(self, feature_map):
 74 |         x = self.model(feature_map)
 75 |         return x
 76 | 
 77 | 
 78 | class Encoder(nn.Module):
 79 |     def __init__(self, channels=3+2):
 80 |         super(Encoder, self).__init__()
 81 | 
 82 |         # Initial convolution block
 83 |         out_features = 64
 84 |         model = [
 85 |             nn.Conv2d(channels, out_features, 7, stride=1, padding=3),
 86 |             nn.InstanceNorm2d(out_features),
 87 |             nn.ReLU(inplace=True),
 88 |         ]
 89 |         in_features = out_features
 90 | 
 91 |         # Downsampling
 92 |         for _ in range(2):
 93 |             out_features *= 2
 94 |             model += [
 95 |                 nn.Conv2d(in_features, out_features, 3, stride=1, padding=1),
 96 |                 nn.InstanceNorm2d(out_features),
 97 |                 nn.ReLU(inplace=True),
 98 |                 nn.Conv2d(out_features, out_features, 3, stride=1, padding=1),
 99 |                 nn.InstanceNorm2d(out_features),
100 |                 nn.ReLU(inplace=True),
101 |                 nn.MaxPool2d(2, stride=2),
102 |             ]
103 |             in_features = out_features
104 | 
105 |         self.model = nn.Sequential(*model)
106 | 
107 |     def forward(self, arguments):
108 |         x = torch.cat(arguments, dim=1)
109 |         x = self.model(x)
110 |         return x
111 | 
112 | 
113 | class Discriminator(nn.Module):
114 |     def __init__(self):
115 |         super(Discriminator, self).__init__()
116 | 
117 |         channels = 2
118 |         out_channels = 2
119 | 
120 |         def discriminator_block(in_filters, out_filters, normalize=True):
121 |             """Returns downsampling layers of each discriminator block"""
122 |             layers = [nn.Conv2d(in_filters, out_filters, 3, stride=1, padding=1)]
123 |             if normalize:
124 |                 layers.append(nn.InstanceNorm2d(out_filters))
125 |             layers.append(nn.ReLU())
126 | 
127 |             layers.append(nn.Conv2d(out_filters, out_filters, 3, stride=1, padding=1))
128 |             if normalize:
129 |                 layers.append(nn.InstanceNorm2d(out_filters))
130 |             layers.append(nn.ReLU())
131 |             layers.append(nn.MaxPool2d(2, stride=2))
132 |             return layers
133 | 
134 |         self.model = nn.Sequential(
135 |             *discriminator_block(channels, 64, normalize=False),
136 |             *discriminator_block(64, 128),
137 |             *discriminator_block(128, 256),
138 |             *discriminator_block(256, 512),
139 |             nn.Conv2d(512, out_channels, 3, padding=1),
140 |             nn.Sigmoid()
141 |         )
142 | 
143 |     def forward(self, img):
144 |         #img = torch.cat((rgb, mask), dim=1)
145 |         img = self.model(img)
146 |         return img
147 | 


--------------------------------------------------------------------------------
/projectRegularization/regularize.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from skimage import io
  3 | from skimage.transform import rotate
  4 | import numpy as np
  5 | import torch
  6 | from tqdm import tqdm
  7 | from osgeo import gdal
  8 | import os
  9 | import glob
 10 | from skimage.segmentation import mark_boundaries
 11 | from PIL import Image, ImageDraw, ImageFont
 12 | from numpy.linalg import svd
 13 | import cv2
 14 | from skimage import measure
 15 | 
 16 | from models import GeneratorResNet, Encoder
 17 | from skimage.transform import rescale
 18 | import variables as var
 19 | 
 20 | 
 21 | 
 22 | 
 23 | def compute_IoU(mask, pred):
 24 |     mask = mask!=0
 25 |     pred = pred!=0
 26 |     
 27 |     m1 = np.logical_and(mask, pred)
 28 |     m2 = np.logical_and(np.logical_not(mask), np.logical_not(pred))
 29 |     m3 = np.logical_and(mask==0, pred==1)
 30 |     m4 = np.logical_and(mask==1, pred==0)
 31 |     m5 = np.logical_or(mask, pred)
 32 |     
 33 |     tp = np.count_nonzero(m1)
 34 |     fp = np.count_nonzero(m3)
 35 |     fn = np.count_nonzero(m4)
 36 |     
 37 |     IoU = tp/(tp+(fn+fp)) 
 38 |     return IoU
 39 | 
 40 | 
 41 | def to_categorical(y, num_classes=None, dtype='float32'):
 42 |     y = np.array(y, dtype='int')
 43 |     input_shape = y.shape
 44 |     if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
 45 |         input_shape = tuple(input_shape[:-1])
 46 |     y = y.ravel()
 47 |     if not num_classes:
 48 |         num_classes = np.max(y) + 1
 49 |     n = y.shape[0]
 50 |     categorical = np.zeros((n, num_classes), dtype=dtype)
 51 |     categorical[np.arange(n), y] = 1
 52 |     output_shape = input_shape + (num_classes,)
 53 |     categorical = np.reshape(categorical, output_shape)
 54 |     return categorical
 55 | 
 56 | 
 57 | def predict_building(rgb, mask, model):
 58 | 	Tensor = torch.cuda.FloatTensor
 59 | 
 60 | 	mask = to_categorical(mask, 2)
 61 | 
 62 | 	rgb = rgb[np.newaxis, :, :, :]
 63 | 	mask = mask[np.newaxis, :, :, :]
 64 | 
 65 | 	E, G = model
 66 | 
 67 | 	rgb = Tensor(rgb)
 68 | 	mask = Tensor(mask)
 69 | 	rgb = rgb.permute(0,3,1,2)
 70 | 	mask = mask.permute(0,3,1,2)
 71 | 
 72 | 	rgb = rgb / 255.0
 73 | 
 74 | 	# PREDICTION
 75 | 	pred = G(E([rgb, mask]))
 76 | 	pred = pred.permute(0,2,3,1)
 77 | 
 78 | 	pred = pred.detach().cpu().numpy()
 79 | 
 80 | 	pred = np.argmax(pred[0,:,:,:], axis=-1)
 81 | 	return pred
 82 | 
 83 | 
 84 | 
 85 | def fix_limits(i_min, i_max, j_min, j_max, min_image_size=256):
 86 | 
 87 | 	def closest_divisible_size(size, factor=4):
 88 | 		while size % factor:
 89 | 			size += 1
 90 | 		return size
 91 | 
 92 | 	height = i_max - i_min
 93 | 	width = j_max - j_min
 94 | 
 95 | 	# pad the rows
 96 | 	if height < min_image_size:
 97 | 		diff = min_image_size - height
 98 | 	else:
 99 | 		diff = closest_divisible_size(height) - height + 16
100 | 
101 | 	i_min -= (diff // 2)
102 | 	i_max += (diff // 2 + diff % 2)
103 | 
104 | 	# pad the columns
105 | 	if width < min_image_size:
106 | 		diff = min_image_size - width
107 | 	else:
108 | 		diff = closest_divisible_size(width) - width + 16
109 | 
110 | 	j_min -= (diff // 2)
111 | 	j_max += (diff // 2 + diff % 2)
112 | 
113 | 	return i_min, i_max, j_min, j_max
114 | 
115 | 
116 | 
117 | def regularization(rgb, ins_segmentation, model, in_mode="semantic", out_mode="semantic", min_size=10):
118 |     assert in_mode == "instance" or in_mode == "semantic"
119 |     assert out_mode == "instance" or out_mode == "semantic"
120 | 
121 |     if in_mode == "semantic":
122 |         ins_segmentation = np.uint16(measure.label(ins_segmentation, background=0))
123 | 
124 |     max_instance = np.amax(ins_segmentation)
125 |     border = 256
126 | 
127 |     ins_segmentation = np.uint16(cv2.copyMakeBorder(ins_segmentation,border,border,border,border,cv2.BORDER_CONSTANT,value=0))
128 |     rgb = np.uint8(cv2.copyMakeBorder(rgb,border,border,border,border,cv2.BORDER_CONSTANT,value=(0,0,0)))
129 | 
130 |     regularization = np.zeros(ins_segmentation.shape, dtype=np.uint16)
131 | 
132 |     for ins in tqdm(range(1, max_instance+1), desc="Regularization"):
133 |         indices = np.argwhere(ins_segmentation==ins)
134 |         building_size = indices.shape[0]
135 |         if building_size > min_size:
136 |             i_min = np.amin(indices[:,0])
137 |             i_max = np.amax(indices[:,0])
138 |             j_min = np.amin(indices[:,1])
139 |             j_max = np.amax(indices[:,1])
140 | 
141 |             i_min, i_max, j_min, j_max = fix_limits(i_min, i_max, j_min, j_max)
142 | 
143 |             mask = np.copy(ins_segmentation[i_min:i_max, j_min:j_max] == ins)
144 |             rgb_mask = np.copy(rgb[i_min:i_max, j_min:j_max, :])
145 | 
146 | 
147 | 
148 |             max_building_size = 1024
149 |             rescaled = False
150 |             if mask.shape[0] > max_building_size and mask.shape[0] >= mask.shape[1]:
151 |                 f = max_building_size / mask.shape[0]
152 |                 mask = rescale(mask, f, anti_aliasing=False, preserve_range=True)
153 |                 rgb_mask = rescale(rgb_mask, f, anti_aliasing=False)
154 |                 rescaled = True
155 |             elif mask.shape[1] > max_building_size and mask.shape[1] >= mask.shape[0]:
156 |                 f = max_building_size / mask.shape[1]
157 |                 mask = rescale(mask, f, anti_aliasing=False)
158 |                 rgb_mask = rescale(rgb_mask, f, anti_aliasing=False, preserve_range=True)
159 |                 rescaled = True
160 | 
161 |             pred = predict_building(rgb_mask, mask, model)
162 | 
163 |             if rescaled:
164 |                 pred = rescale(pred, 1/f, anti_aliasing=False, preserve_range=True)
165 | 
166 | 
167 | 
168 |             pred_indices = np.argwhere(pred != 0)
169 | 
170 |             if pred_indices.shape[0] > 0:
171 |                 pred_indices[:,0] = pred_indices[:,0] + i_min
172 |                 pred_indices[:,1] = pred_indices[:,1] + j_min
173 |                 x, y = zip(*pred_indices)
174 |                 if out_mode == "semantic":
175 |                     regularization[x,y] = 1
176 |                 else:
177 |                     regularization[x,y] = ins
178 | 
179 |     return regularization[border:-border, border:-border]
180 | 
181 | 
182 | 
183 | def copyGeoreference(inp, output):
184 |     dataset = gdal.Open(inp)
185 |     if dataset is None:
186 |         print('Unable to open', inp, 'for reading')
187 |         sys.exit(1)
188 | 
189 |     projection = dataset.GetProjection()
190 |     geotransform = dataset.GetGeoTransform()
191 | 
192 |     if projection is None and geotransform is None:
193 |         print('No projection or geotransform found on file' + input)
194 |         sys.exit(1)
195 | 
196 |     dataset2 = gdal.Open(output, gdal.GA_Update)
197 | 
198 |     if dataset2 is None:
199 |         print('Unable to open', output, 'for writing')
200 |         sys.exit(1)
201 | 
202 |     if geotransform is not None and geotransform != (0, 1, 0, 0, 0, 1):
203 |         dataset2.SetGeoTransform(geotransform)
204 | 
205 |     if projection is not None and projection != '':
206 |         dataset2.SetProjection(projection)
207 | 
208 |     gcp_count = dataset.GetGCPCount()
209 |     if gcp_count != 0:
210 |         dataset2.SetGCPs(dataset.GetGCPs(), dataset.GetGCPProjection())
211 | 
212 |     dataset = None
213 |     dataset2 = None
214 | 
215 | 
216 | 
217 | def regularize_segmentations(img_folder, seg_folder, out_folder, in_mode="semantic", out_mode="semantic", samples=None):
218 |     """
219 |     BUILDING REGULARIZATION
220 |     Inputs:
221 |      - satellite image (3 channels)
222 |      - building segmentation (1 channel)
223 |     Output:
224 |      - regularized mask
225 |     """
226 | 
227 |     img_files = glob.glob(img_folder)
228 |     seg_files = glob.glob(seg_folder)
229 | 
230 |     img_files.sort()
231 |     seg_files.sort()
232 | 
233 |     for num, (satellite_image_file, building_segmentation_file) in enumerate(zip(img_files, seg_files)):
234 |         print(satellite_image_file, building_segmentation_file)
235 |         _, rgb_name = os.path.split(satellite_image_file)
236 |         _, seg_name = os.path.split(building_segmentation_file)
237 |         assert rgb_name == seg_name
238 | 
239 |         output_file = out_folder + seg_name
240 | 
241 |         E1 = Encoder()
242 |         G = GeneratorResNet()
243 |         G.load_state_dict(torch.load(var.MODEL_GENERATOR))
244 |         E1.load_state_dict(torch.load(var.MODEL_ENCODER))
245 |         E1 = E1.cuda()
246 |         G = G.cuda()
247 | 
248 |         model = [E1,G]
249 | 
250 |         M = io.imread(building_segmentation_file)
251 |         M = np.uint16(M)
252 |         P = io.imread(satellite_image_file)
253 |         P = np.uint8(P)
254 | 
255 |         R = regularization(P, M, model, in_mode=in_mode, out_mode=out_mode)
256 | 
257 |         if out_mode == "instance":
258 |             io.imsave(output_file, np.uint16(R))
259 |         else:
260 |             io.imsave(output_file, np.uint8(R*255))
261 | 
262 |         if samples is not None:
263 |             i = 1000
264 |             j = 1000
265 |             h, w = 1080, 1920
266 |             P = P[i:i+h, j:j+w]
267 |             R = R[i:i+h, j:j+w]
268 |             M = M[i:i+h, j:j+w]
269 | 
270 |             R = mark_boundaries(P, R, mode="thick")
271 |             M = mark_boundaries(P, M, mode="thick")
272 | 
273 |             R = np.uint8(R*255)
274 |             M = np.uint8(M*255)
275 | 
276 |             font                   = cv2.FONT_HERSHEY_SIMPLEX
277 |             bottomLeftCornerOfText = (20,1060)
278 |             fontScale              = 1
279 |             fontColor              = (255,255,0)
280 |             lineType               = 2
281 | 
282 |             cv2.putText(R, "INRIA dataset, " + rgb_name + ", regularization", 
283 |                 bottomLeftCornerOfText, 
284 |                 font, 
285 |                 fontScale,
286 |                 fontColor,
287 |                 lineType)
288 | 
289 |             cv2.putText(M, "INRIA dataset, " + rgb_name + ", segmentation", 
290 |                 bottomLeftCornerOfText, 
291 |                 font, 
292 |                 fontScale,
293 |                 fontColor,
294 |                 lineType)
295 | 
296 |             io.imsave(samples + "./%d_2reg.png" % num, np.uint8(R))
297 |             io.imsave(samples + "./%d_1seg.png" % num, np.uint8(M))
298 | 
299 |         copyGeoreference(satellite_image_file, output_file)
300 |         copyGeoreference(satellite_image_file, building_segmentation_file)
301 | 
302 | 
303 | regularize_segmentations(img_folder=var.INF_RGB, seg_folder=var.INF_SEG, out_folder=var.INF_OUT, in_mode="semantic", out_mode="semantic", samples=None)
304 | 


--------------------------------------------------------------------------------
/projectRegularization/train_gan_net.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | from torch import optim
  7 | from torch.optim.lr_scheduler import MultiStepLR
  8 | from torch.autograd import Variable
  9 | from torch.utils.data import DataLoader
 10 | 
 11 | from tqdm import tqdm
 12 | import click
 13 | import numpy as np
 14 | import cv2
 15 | from skimage.segmentation import mark_boundaries
 16 | from skimage import io
 17 | import itertools
 18 | 
 19 | from models import GeneratorResNet, Encoder, Discriminator
 20 | from data_loader_gan import DataLoader
 21 | from training_utils import sample_images, LossBuffer, LambdaLR
 22 | import variables as var
 23 | from crf_loss import kernel_loss
 24 | 
 25 | 
 26 | 
 27 | def crf_factor(batch_index, start_crf_batch, end_crf_batch, crf_initial_factor, crf_final_factor):
 28 |     if batch_index <= start_crf_batch:
 29 |         return 0.0
 30 |     elif start_crf_batch < batch_index < end_crf_batch:
 31 |         return crf_initial_factor + ((crf_final_factor - crf_initial_factor) * (batch_index - start_crf_batch) / (end_crf_batch - start_crf_batch))
 32 |     else:
 33 |         return crf_final_factor
 34 | 
 35 | 
 36 | def train(
 37 |     models_path='./saved_models_gan/', \
 38 |     restore=False, \
 39 |     batch_size=4, \
 40 |     start_batch=0, n_batches=140000, \
 41 |     start_crf_batch=60000, end_crf_batch=120000, crf_initial_factor=0.0, crf_final_factor=175.0, \
 42 |     start_lr_decay=120000, \
 43 |     start_lr=0.00004, win_size=256, sample_interval=20, backup_interval=5000):
 44 | 
 45 |     patch_size = int(win_size / pow(2, 4))
 46 | 
 47 |     Tensor = torch.cuda.FloatTensor
 48 | 
 49 |     e1 = Encoder(channels=3+2)
 50 |     e2 = Encoder(channels=2)
 51 |     net = GeneratorResNet()
 52 |     disc = Discriminator()
 53 | 
 54 |     if restore:
 55 |         print("Restoring model number %d" % start_batch)
 56 |         e1.load_state_dict(torch.load(models_path + "E%d_e1" % start_batch))
 57 |         e2.load_state_dict(torch.load(models_path + "E%d_e2" % start_batch))
 58 |         net.load_state_dict(torch.load(models_path + "E%d_net" % start_batch))
 59 |         disc.load_state_dict(torch.load(models_path + "E%d_disc" % start_batch))
 60 | 
 61 |     e1 = e1.cuda()
 62 |     e2 = e2.cuda()
 63 |     net = net.cuda()
 64 |     disc = disc.cuda()
 65 | 
 66 |     os.makedirs(models_path, exist_ok=True)
 67 |     
 68 |     loss_0_buffer = LossBuffer()
 69 |     loss_1_buffer = LossBuffer()
 70 |     loss_2_buffer = LossBuffer()
 71 |     loss_3_buffer = LossBuffer()
 72 |     loss_4_buffer = LossBuffer()
 73 |     loss_5_buffer = LossBuffer()
 74 | 
 75 |     gen_obj = DataLoader(bs=batch_size, nb=n_batches, ws=win_size)
 76 |     
 77 |     # Optimizers
 78 |     optimizer_G = torch.optim.Adam(itertools.chain(net.parameters(), e1.parameters(), e2.parameters()), lr=start_lr)    
 79 |     optimizer_D = torch.optim.Adam(disc.parameters(), lr=start_lr)
 80 | 
 81 |     # Learning rate update schedulers
 82 |     lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(optimizer_G, lr_lambda=LambdaLR(n_batches, start_lr_decay).step)
 83 |     lr_scheduler_D = torch.optim.lr_scheduler.LambdaLR(optimizer_D, lr_lambda=LambdaLR(n_batches, start_lr_decay).step)
 84 | 
 85 |     bce_criterion = nn.BCELoss()
 86 |     bce_criterion = bce_criterion.cuda()
 87 | 
 88 |     densecrflosslayer = kernel_loss()
 89 |     densecrflosslayer = densecrflosslayer.cuda()
 90 | 
 91 |     loader = gen_obj.generator()
 92 |     train_iterator = tqdm(loader, total=(n_batches + 1 - start_batch))
 93 |     img_index = 0
 94 | 
 95 |     for batch_index, (rgb, gti, seg) in enumerate(train_iterator):
 96 | 
 97 |         batch_index = batch_index + start_batch
 98 | 
 99 |         rgb = Variable(Tensor(rgb))
100 |         gti = Variable(Tensor(gti))
101 |         seg = Variable(Tensor(seg))
102 | 
103 |         rgb = rgb.permute(0,3,1,2)
104 |         gti = gti.permute(0,3,1,2)
105 |         seg = seg.permute(0,3,1,2)
106 | 
107 |         # Adversarial ground truths
108 |         ones = Variable(Tensor(np.ones((batch_size, 1, patch_size, patch_size))), requires_grad=False)
109 |         zeros = Variable(Tensor(np.zeros((batch_size, 1, patch_size, patch_size))), requires_grad=False)
110 |         valid = torch.cat((ones, zeros), dim=1)
111 |         fake = torch.cat((zeros, ones), dim=1)
112 | 
113 |         # ------------------
114 |         #  Train Generators
115 |         # ------------------
116 | 
117 |         #e1.train()
118 |         #e2.train()
119 |         #net.train()
120 | 
121 |         optimizer_G.zero_grad()
122 | 
123 |         reg = net(e1([rgb, seg]))
124 |         rec = net(e2([gti]))
125 | 
126 |         # Identity loss (reconstruction loss)
127 |         loss_rec_1 = bce_criterion(reg, seg)
128 |         loss_rec_2 = bce_criterion(rec, gti)
129 | 
130 |         # GAN loss
131 |         loss_GAN = bce_criterion(disc(reg), valid)
132 | 
133 |         # CRF loss
134 |         pot_multiplier = crf_factor(batch_index, start_crf_batch, end_crf_batch, crf_initial_factor, crf_final_factor)
135 |         loss_pot = densecrflosslayer(rgb, reg)
136 |         loss_pot = loss_pot.cuda()
137 | 
138 |         # Total loss
139 |         loss_G = 3 * loss_GAN + 1 * loss_rec_1 + 3 * loss_rec_2 + pot_multiplier * loss_pot
140 | 
141 |         loss_G.backward()
142 |         optimizer_G.step()
143 | 
144 | 
145 |         # -----------------------
146 |         #  Train Discriminator A
147 |         # -----------------------
148 | 
149 |         #disc.train()
150 | 
151 |         optimizer_D.zero_grad()
152 | 
153 |         loss_real = bce_criterion(disc(rec.detach()), valid)
154 |         loss_fake = bce_criterion(disc(reg.detach()), fake)
155 | 
156 |         # Total loss
157 |         loss_D = (loss_real + loss_fake) / 2
158 | 
159 |         loss_D.backward()
160 |         optimizer_D.step()
161 | 
162 |         # --------------
163 |         #  Update LR
164 |         # --------------
165 | 
166 |         lr_scheduler_G.step(batch_index)
167 |         lr_scheduler_D.step(batch_index)
168 | 
169 |         for g in optimizer_D.param_groups:
170 |         	current_lr = g['lr'] 
171 | 
172 |         # --------------
173 |         #  Log Progress
174 |         # --------------
175 | 
176 |         status = "[Batch %d][D loss: %f][G loss: %f, adv: %f, rec1: %f, rec2: %f][pot: %f, pot_mul: %f][lr: %f]" % \
177 |         (batch_index, \
178 |         loss_0_buffer.push(loss_D.item()), \
179 |         loss_1_buffer.push(loss_G.item()), loss_2_buffer.push(loss_GAN.item()), loss_3_buffer.push(loss_rec_1.item()), loss_4_buffer.push(loss_rec_2.item()), 
180 |         loss_5_buffer.push(loss_pot.item()), pot_multiplier, current_lr, )
181 | 
182 |         train_iterator.set_description(status)
183 | 
184 |         if (batch_index % sample_interval == 0):
185 |             img_index += 1
186 |             void_mask = torch.zeros(gti.shape).cuda()
187 |             sample_images(img_index, rgb, [void_mask, gti, rec, seg, reg])
188 |             if img_index >= 100:
189 |                 img_index = 0
190 | 
191 |         if (batch_index % backup_interval == 0):
192 |             torch.save(e1.state_dict(), models_path + "E" + str(batch_index) + "_e1")
193 |             torch.save(e2.state_dict(), models_path + "E" + str(batch_index) + "_e2")
194 |             torch.save(net.state_dict(), models_path + "E" + str(batch_index) + "_net")
195 |             torch.save(disc.state_dict(), models_path + "E" + str(batch_index) + "_disc")
196 | 
197 |         
198 | if __name__ == '__main__':
199 |     train()
200 | 


--------------------------------------------------------------------------------
/projectRegularization/training_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import glob
 4 | from tqdm import tqdm
 5 | import random
 6 | from skimage import io
 7 | from skimage.segmentation import mark_boundaries
 8 | 
 9 | import random
10 | import time
11 | import datetime
12 | import sys
13 | 
14 | from torch.autograd import Variable
15 | import torch
16 | import numpy as np
17 | 
18 | from osgeo import gdal
19 | 
20 | import variables as var
21 | 
22 | 
23 | def sample_images(sample_index, img, masks):
24 |     batch = img.shape[0]
25 | 
26 |     img = img.permute(0,2,3,1)
27 | 
28 |     for i in range(len(masks)):
29 |         masks[i] = masks[i].permute(0,2,3,1)
30 | 
31 |     img = img.cpu().numpy()
32 |     ip = np.uint8(img * 255)
33 |     for i in range(len(masks)):
34 |         masks[i] = masks[i].detach().cpu().numpy()
35 |         masks[i] = np.argmax(masks[i], axis=-1)
36 |         masks[i] = np.uint8(masks[i] * 255)
37 | 
38 |     line_mode = "inner"
39 | 
40 |     for i in range(len(masks)):
41 |         row = np.copy(ip[0,:,:,:])
42 |         line = cv2.Canny(masks[i][0,:,:], 0, 255)
43 |         row = mark_boundaries(row, line, color=(1,1,0), mode=line_mode) * 255#, outline_color=(self.red,self.greed,0))
44 |         for b in range(1,batch):
45 |             pic = np.copy(ip[b,:,:,:])
46 |             line = cv2.Canny(masks[i][b,:,:], 0, 255)
47 |             pic = mark_boundaries(pic, line, color=(1,1,0), mode=line_mode) * 255#, outline_color=(self.red,self.greed,0))
48 |             row = np.concatenate((row, pic), 1)
49 |         masks[i] = row
50 | 
51 |     img = np.concatenate(masks, 0)
52 |     img = np.uint8(img)
53 |     io.imsave(var.DEBUG_DIR + "debug_%s.png" % str(sample_index), img)
54 | 
55 | 
56 | class LossBuffer():
57 |     def __init__(self, max_size=100):
58 |         self.data = []
59 |         self.max_size = max_size
60 | 
61 |     def push(self, data):
62 |         self.data.append(data)    
63 |         if len(self.data) > self.max_size:
64 |             self.data = self.data[1:]
65 |         return sum(self.data) / len(self.data)
66 | 
67 | 
68 | class LambdaLR():
69 |     def __init__(self, n_batches, decay_start_batch):
70 |         assert ((n_batches - decay_start_batch) > 0), "Decay must start before the training session ends!"
71 |         self.n_batches = n_batches
72 |         self.decay_start_batch = decay_start_batch
73 | 
74 |     def step(self, batch):
75 |         if batch > self.decay_start_batch:
76 |             factor = 1.0 - (batch - self.decay_start_batch) / (self.n_batches - self.decay_start_batch)
77 |             if factor > 0:
78 |                 return factor
79 |             else:
80 |                 return 0.0
81 |         else:
82 |             return 1.0
83 | 


--------------------------------------------------------------------------------
/projectRegularization/variables.py:
--------------------------------------------------------------------------------
 1 | # CONFIGURE THE PATHS HERE: 
 2 | 
 3 | # TRAINING 
 4 | DATASET_RGB = "/home/shymon/datasets/mapai_full/train/images/*.tif"
 5 | DATASET_GTI = "/home/shymon/datasets/mapai_full/train/masks/*.tif"
 6 | DATASET_SEG = "/home/shymon/Documents/mapAI-regularization/predictions/*.tif"
 7 | 
 8 | DEBUG_DIR = "./debug/"
 9 | 
10 | # INFERENCE 
11 | INF_RGB = "/home/shymon/datasets/mapai_full/task1_test/images/*.tif"
12 | INF_SEG = "/home/shymon/Documents/mapAI-regularization/predictions/*.tif"
13 | INF_OUT = "/home/shymon/Documents/mapAI-regularization/regularizations/"
14 | 
15 | MODEL_ENCODER = "/home/shymon/Documents/mapAI-regularization/projectRegularization/pretrained_weights/E140000_e1"
16 | MODEL_GENERATOR = "/home/shymon/Documents/mapAI-regularization/projectRegularization/pretrained_weights/E140000_net"
17 | 


--------------------------------------------------------------------------------
/regularizations/bergen_-5943_1104.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/bergen_-5943_1104.tif


--------------------------------------------------------------------------------
/regularizations/bergen_-5944_1104.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/bergen_-5944_1104.tif


--------------------------------------------------------------------------------
/regularizations/bergen_-5948_1107.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/bergen_-5948_1107.tif


--------------------------------------------------------------------------------
/regularizations/kristiansand_-4712_-1562.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/kristiansand_-4712_-1562.tif


--------------------------------------------------------------------------------
/regularizations/kristiansand_-4712_-1563.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/kristiansand_-4712_-1563.tif


--------------------------------------------------------------------------------
/regularizations/kristiansand_-4712_-1568.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/kristiansand_-4712_-1568.tif


--------------------------------------------------------------------------------
/regularizations/oslo_-3133_244.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/oslo_-3133_244.tif


--------------------------------------------------------------------------------
/regularizations/tromso_923_11083.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/tromso_923_11083.tif


--------------------------------------------------------------------------------
/regularizations/tromso_923_11084.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/tromso_923_11084.tif


--------------------------------------------------------------------------------
/regularizations/tromso_923_11086.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/s1m0nS/mapAI-regularization/b2f5a9202a2c820e42023c53e5c95ce816eaacdc/regularizations/tromso_923_11086.tif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohttp==3.8.4
  2 | aiosignal==1.3.1
  3 | anyio==3.6.2
  4 | argon2-cffi==21.3.0
  5 | argon2-cffi-bindings==21.2.0
  6 | asttokens==2.2.1
  7 | async-timeout==4.0.2
  8 | attrs==22.2.0
  9 | backcall==0.2.0
 10 | beautifulsoup4==4.11.2
 11 | bleach==6.0.0
 12 | brotlipy==0.7.0
 13 | certifi==2022.12.7
 14 | cffi==1.15.1
 15 | charset-normalizer==2.0.4
 16 | click==8.1.3
 17 | click-plugins==1.1.1
 18 | cligj==0.7.2
 19 | comm==0.1.3
 20 | contourpy==1.0.7
 21 | cryptography==39.0.1
 22 | cycler==0.11.0
 23 | datasets==2.10.1
 24 | debugpy==1.6.7
 25 | decorator==5.1.1
 26 | defusedxml==0.7.1
 27 | dill==0.3.6
 28 | efficientnet-pytorch==0.7.1
 29 | einops==0.6.0
 30 | entrypoints==0.4
 31 | executing==1.2.0
 32 | fastjsonschema==2.16.3
 33 | filelock==3.9.0
 34 | Fiona==1.9.1
 35 | flit_core==3.6.0
 36 | fonttools==4.39.0
 37 | frozenlist==1.3.3
 38 | fsspec==2023.3.0
 39 | GDAL==3.6.3
 40 | geopandas==0.12.2
 41 | huggingface-hub==0.12.1
 42 | idna==3.4
 43 | imagecodecs==2023.3.16
 44 | imageio==2.26.0
 45 | importlib-metadata==6.0.0
 46 | importlib-resources==5.12.0
 47 | imutils==0.5.4
 48 | ipykernel==6.22.0
 49 | ipython==8.12.0
 50 | ipython-genutils==0.2.0
 51 | jedi==0.18.2
 52 | Jinja2==3.1.2
 53 | joblib==1.2.0
 54 | jsonschema==4.17.3
 55 | jupyter_client==8.1.0
 56 | jupyter_core==5.3.0
 57 | jupyter-server==1.23.6
 58 | jupyterlab-pygments==0.2.2
 59 | kiwisolver==1.4.4
 60 | lazy_loader==0.1
 61 | MarkupSafe==2.1.1
 62 | matplotlib==3.7.1
 63 | matplotlib-inline==0.1.6
 64 | mistune==2.0.5
 65 | mkl-fft==1.3.1
 66 | mkl-random==1.2.2
 67 | mkl-service==2.4.0
 68 | multidict==6.0.4
 69 | multiprocess==0.70.14
 70 | munch==2.5.0
 71 | nbclassic==0.5.2
 72 | nbclient==0.7.2
 73 | nbconvert==7.2.9
 74 | nbformat==5.7.3
 75 | nest-asyncio==1.5.6
 76 | networkx==3.0
 77 | notebook==6.5.3
 78 | notebook_shim==0.2.2
 79 | numpy==1.23.5
 80 | opencv-python==4.7.0.72
 81 | packaging==23.0
 82 | pandas==1.5.3
 83 | pandocfilters==1.5.0
 84 | parso==0.8.3
 85 | pexpect==4.8.0
 86 | pickleshare==0.7.5
 87 | Pillow==9.4.0
 88 | pip==23.0.1
 89 | pkgutil_resolve_name==1.3.10
 90 | platformdirs==3.2.0
 91 | potrace==0.3
 92 | pretrainedmodels==0.7.4
 93 | prometheus-client==0.16.0
 94 | prompt-toolkit==3.0.38
 95 | psutil==5.9.0
 96 | ptyprocess==0.7.0
 97 | pure-eval==0.2.2
 98 | pyarrow==11.0.0
 99 | pycparser==2.21
100 | Pygments==2.11.2
101 | pylsd-nova==1.2.0
102 | pyOpenSSL==23.0.0
103 | pyparsing==3.0.9
104 | pyproj==3.4.1
105 | pyrsistent==0.18.0
106 | PySocks==1.7.1
107 | python-dateutil==2.8.2
108 | pytz==2022.7.1
109 | PyWavelets==1.4.1
110 | PyYAML==6.0
111 | pyzmq==25.0.2
112 | requests==2.28.1
113 | responses==0.18.0
114 | scikit-image==0.20.0
115 | scikit-learn==1.2.2
116 | scipy==1.10.1
117 | segmentation-models-pytorch==0.3.2
118 | Send2Trash==1.8.0
119 | setuptools==67.6.0
120 | shapely==2.0.1
121 | six==1.16.0
122 | sniffio==1.3.0
123 | soupsieve==2.3.2.post1
124 | stack-data==0.6.2
125 | terminado==0.17.1
126 | threadpoolctl==3.1.0
127 | tifffile==2023.2.28
128 | timm==0.6.12
129 | tinycss2==1.2.1
130 | torch==1.13.1
131 | torchaudio==0.13.1
132 | torchvision==0.14.1
133 | tornado==6.2
134 | tqdm==4.65.0
135 | traitlets==5.9.0
136 | typing_extensions==4.4.0
137 | urllib3==1.26.14
138 | wcwidth==0.2.6
139 | webencodings==0.5.1
140 | websocket-client==1.5.1
141 | wheel==0.38.4
142 | xxhash==3.2.0
143 | yarl==1.8.2
144 | zipp==3.15.0
145 | 


--------------------------------------------------------------------------------
/trained_models/Link_to_download_trained_models.txt:
--------------------------------------------------------------------------------
1 | DOWNLOAD TRAINED MODELS FROM:
2 | https://drive.google.com/drive/folders/1P6xTVq-o_mChcaDKBYugEnAX7Q2ZAzuM?usp=sharing
3 | 
4 | Have a nice day.


--------------------------------------------------------------------------------